#!/usr/bin/env python3 import os import logging import string import hashlib import time import shutil class Cloner: def __init__(self, targetDir: str, backupDir: str): self.targetDir = targetDir self.backupDir = backupDir self.interval = 1 def handleDir(self, dirPath: str): backupDirPath = dirPath.replace(self.targetDir, self.backupDir) if not os.path.isdir(backupDirPath): os.mkdir(backupDirPath) with os.scandir(dirPath) as it: for entry in it: if entry.is_dir(): self.handleDir(entry.path) elif entry.is_file(): self.handleFile(entry.path) def handleFile(self, targetFileName: str): cloneFileName = targetFileName.replace(self.targetDir, self.backupDir) cloneHashFileName = cloneFileName + '.md5' if os.path.exists(cloneFileName): with open(cloneHashFileName, 'r') as cloneHashFile, open(targetFileName, 'rb') as targetFile: cloneHashFileContent = cloneHashFile.read() targetFileContent = targetFile.read() realHash = hashlib.md5(targetFileContent).hexdigest() if realHash != cloneHashFileContent: shutil.copyfile(cloneFileName, targetFileName) logging.warning( '{file} failed check, restoring...'.format(file=targetFileName)) else: logging.info('{file} is fine'.format(file=targetFileName)) else: logging.warning( '{file} is a new file, added to backup dir'.format(file=targetFileName)) shutil.copyfile(targetFileName, cloneFileName) with open(cloneHashFileName, 'w') as cloneHashFile, open(targetFileName, 'rb') as targetFile: targetFileContent = targetFile.read() cloneHashFile.write(hashlib.md5(targetFileContent).hexdigest()) def setLog(self, logFileName: str): logFormatter = logging.Formatter( fmt="[%(asctime)s][%(levelname)s] %(message)s", datefmt='%d-%b-%y %H:%M:%S') rootLogger = logging.getLogger() fileHandler = logging.FileHandler(logFileName, mode='a') fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) # # uncomment this to enable console logging # consoleHandler = logging.StreamHandler() # consoleHandler.setFormatter(logFormatter) # rootLogger.addHandler(consoleHandler) rootLogger.setLevel(logging.WARNING) def setInterval(self, interval: int): self.interval = interval def start(self): while True: self.handleDir(self.targetDir) time.sleep(5) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument( '--target', metavar='PATH', type=str, required=True, help='directory to clone and monitor') parser.add_argument( '--backup', metavar='PATH', type=str, required=True, help='backup directory to store clone' ) parser.add_argument( '--interval', metavar='SECOND', type=int, default=1, help='interval in second(s) which the scripts run, default is 1' ) parser.add_argument( '--log', metavar='LOGFILE', type=str, default='/var/log/py-dir-watcher/log.txt', help='specify custom logfile target, default is \'/var/log/py-dir-watcher/log.txt\' ' ) args = parser.parse_args() app = Cloner(args.target, args.backup) app.setLog(args.log) app.setInterval(args.interval) app.start()