由于线上的服务多,日志量也是逐日增大,一直人工处理也是一个头疼的一个点,所以想出了一个方案,自动压缩历史的日志,空出多余的空间出来。根据月份打包

支持压缩的文件格式:

1、newWebSite.log.2019-05-14

2、info.2019-01-03.out
执行命令对应的日志目录:
python zipFile.py /home/webApp/logs
#!/usr/bin/python
# coding:utf-8
import datetime
import os
import sys

import commands

"""
author:jimw
date:2019-11-28
支持范围:
1、newWebSite.log.2019-05-14

2、catalina.2019-01-03.out
python zipFile.py /home/webApp/logs
根据月份打包

zip logging
"""


# 返回需要打包的集合
def loggingFile(filepath):
    # 遍历filepath下所有文件,包括子目录 下个小版本优化
    files = os.listdir(filepath)

    # 这里必须排序,不然无法整合出数据
    files.sort()
    now = datetime.datetime.now()
    # 得到今年的的时间 (年份) 得到的today_year等于2019年
    today_month = str(now.year) + '-' + str(now.month)
    ###
    data_list_todays = []
    data_list_files = []
    dictName = dict()
    flagName = ''
    os.chdir(filepath)
    for fi in files:

        fi_d = os.path.join(filepath, fi)
        if not os.path.isdir(fi_d):
            varName = os.path.join(filepath, fi_d)
            varName = varName.split('/')[-1]
            # 文件名统计
            if (varName.split('.')[1] == 'log'):
                varFileName = varName.split('.')[0] + '.' + varName.split('.')[1]
            else:
                varFileName = varName.split('.')[0]
            if flagName == '':
                # flagName = varFileName
                print()
            elif flagName != varFileName:
                data_list_todays = []
            flagName = varFileName
            print('flagName:', flagName)
            data_list_files.append(varFileName)

            # 判断是否重复
            if containsDuplicate(data_list_files):
                data_list_files.remove(varFileName)
                # print(data_list_files)
                # 月份统计
                # print(varFileName)
            try:

                if (varName.split('.')[1] == 'log'):
                    varNameindex = varName.split('.')[2]
                    varNameMonth = varNameindex.split('-')[0] + '-' + varNameindex.split('-')[1]
                else:
                    varNameindex = varName.split('.')[1]
                    varNameMonth = varNameindex.split('-')[0] + '-' + varNameindex.split('-')[1]
                if (varNameMonth == today_month):
                    print('当前月份不压缩')
                    continue
                # print(varNameMonth)
            except(Exception):
                # print(varName)
                continue
            data_list_todays.append(varNameMonth)
            if containsDuplicate(data_list_todays):
                data_list_todays.remove(varNameMonth)
                # pythodata_list_todays.reverse()
            data_list_todays.reverse()
            print(data_list_todays)
            dictName[varFileName] = data_list_todays
    return dictName


# 如果返回true 则不新加
def containsDuplicate(nums):
    """

    :type nums: List[int]
    :rtype: bool
    """
    if len(nums) == 0 or len(nums) == 1:
        return False
    d = {}
    for i in nums:
        if i in d:
            return True
        d[i] = 0
    return False


# 执行命令
def execCmd(cmd):
    print(cmd)
    # r = os.popen(cmd)
    (status, output) = commands.getstatusoutput(cmd)
    print("result:", status)
    print("result output:", output)
    text = 'ok'
    return text


# 遍历结果
def zipFile(result):
    print(result)
    for file, valus in result.items():

        # print('不存在则打包')
        # print(file)
        # print(valus)
        for months in valus:
            varZipFile = file + '.' + months + '.tar.gz'
            if os.path.isfile(varZipFile):
                print('varZipFile 已经存在不需要更新:', varZipFile)
                continue
            # 压缩
            cmdval = 'tar -zcvf ./' + varZipFile + ' ' + file + '.' + months + '*'
            print(cmdval)
            execCmd(cmdval)
            # 删除,后续再说


if __name__ == '__main__':
    # 递归遍历/root目录下所有文件
    result = loggingFile(sys.argv[1])
    # result = loggingFile('D://logs')
    zipFile(result)
02-13 05:24