Python智能文件备份工具开发指南

B站影视 电影资讯 2025-05-21 16:37 1

摘要:标准库:os, shutil, hashlib, argparse, logging, datetime, json第三方库:watchdog(文件监控),python-crontab(定时任务)

以下是一个基于Python的智能文件备份工具开发指南,包含完整实现思路和代码示例:

一、项目需求分析

核心功能:自动识别文件变更(增量备份)多备份策略选择(全量/增量)智能文件分类存储跨平台支持(Windows/Linux/macOS)备份日志记录排除特定文件类型高级功能:自动压缩加密云存储集成版本控制定时任务

二、技术选型

标准库:os, shutil, hashlib, argparse, logging, datetime, json第三方库:watchdog(文件监控),python-crontab(定时任务)

三、完整实现代码(基础版)

python

import os

import shutil

import hashlib

import json

import logging

from datetime import datetime

class SmartBackup:

def __init__(self, config_File='config.json'):

self.config = self.load_config(config_file)

self.setup_logging

self.file_state_db = {}

self.backup_count = 0

def load_config(self, config_file):

"""加载配置文件"""

default_config = {

"source_dir": "./source",

"backup_dir": "./backup",

"exclude_ext": [".tmp", ".log"],

"exclude_dirs": [".git", "__pycache__"],

"backup_mode": "incremental",

"max_backups": 5,

"compress": False

}

try:

with open(config_file) as f:

return json.load(f)

except FileNotFoundError:

return default_config

def setup_logging(self):

"""配置日志系统"""

logging.basicConfig(

filename='backup.log',

level=logging.INFO,

format='%(asctime)s - %(levelname)s - %(message)s'

)

def get_file_hash(self, filepath):

"""计算文件哈希值"""

hasher = hashlib.md5

with open(filepath, 'rb') as f:

while chunk := f.read(4096):

hasher.update(chunk)

return hasher.hexdigest

def scan_source(self):

"""扫描源目录并记录文件状态"""

for root, dirs, files in os.walk(self.config['source_dir']):

# 过滤排除目录

dirs[:] = [d for d in dirs if d not in self.config['exclude_dirs']]

for file in files:

filepath = os.path.join(root, file)

# 过滤排除扩展名

if os.path.splitext(file)[1] in self.config['exclude_ext']:

continue

# 记录文件状态

mtime = os.path.getmtime(filepath)

size = os.path.getsize(filepath)

file_hash = self.get_file_hash(filepath)

self.file_state_db[filepath] = {

'mtime': mtime,

'size': size,

'hash': file_hash

}

def perform_backup(self):

"""执行备份操作"""

backup_time = datetime.now.strftime("%Y%m%d_%H%M%S")

backup_root = os.path.join(

self.config['backup_dir'],

f"backup_{backup_time}"

)

try:

os.makedirs(backup_root, exist_ok=True)

except OSError as e:

logging.error(f"创建备份目录失败: {e}")

return

for filepath, state in self.file_state_db.items:

rel_path = os.path.relpath(filepath, self.config['source_dir'])

target_path = os.path.join(backup_root, rel_path)

# 创建目标目录

os.makedirs(os.path.dirname(target_path), exist_ok=True)

# 执行复制

try:

if self.config['backup_mode'] == 'full' or \

not os.path.exists(target_path) or \

self.is_file_modified(filepath, state):

shutil.copy2(filepath, target_path)

logging.info(f"备份文件: {rel_path}")

self.backup_count += 1

except Exception as e:

logging.error(f"备份失败 {filepath}: {e}")

# 清理旧备份

self.clean_old_backups

def is_file_modified(self, filepath, state):

"""检查文件是否被修改"""

try:

current_mtime = os.path.getmtime(filepath)

current_size = os.path.getsize(filepath)

current_hash = self.get_file_hash(filepath)

return (current_mtime != state['mtime'] or

current_size != state['size'] or

current_hash != state['hash'])

except FileNotFoundError:

return False

def clean_old_backups(self):

"""清理旧备份"""

backups = sorted([

(os.path.getmtime(os.path.join(self.config['backup_dir'], d)), d)

for d in os.listdir(self.config['backup_dir'])

if d.startswith("backup_")

], reverse=True)

while len(backups) > self.config['max_backups']:

_, oldest = backups.pop

shutil.rmtree(os.path.join(self.config['backup_dir'], oldest))

logging.info(f"删除旧备份: {oldest}")

def run(self):

"""运行备份流程"""

logging.info("=== 开始备份任务 ===")

self.scan_source

self.perform_backup

logging.info(f"备份完成,共备份 {self.backup_count} 个文件")

if __name__ == "__main__":

backup = SmartBackup

backup.run

四、配置文件示例(config.json)

json

{

"source_dir": "/path/to/source",

"backup_dir": "/path/to/backups",

"exclude_ext": [".tmp", ".log", ".cache"],

"exclude_dirs": [".git", "node_modules"],

"backup_mode": "incremental",

"max_backups": 7,

"compress": false

}

五、功能扩展建议

增加压缩功能:

python

import zipfile

def compress_backup(self, backup_path):

"""压缩备份目录"""

zip_name = f"{backup_path}.zip"

with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:

for root, dirs, files in os.walk(backup_path):

for file in files:

file_path = os.path.join(root, file)

zipf.write(file_path,

os.path.relpath(file_path, backup_path))

shutil.rmtree(backup_path)

添加文件监控(使用watchdog):

python

from watchdog.observers import Observer

from watchdog.events import FileSystemEventHandler

class BackupHandler(FileSystemEventHandler):

def __init__(self, backup):

self.backup = backup

def on_modified(self, event):

if not event.is_directory:

self.backup.run

def start_monitoring(self):

event_handler = BackupHandler(self)

observer = Observer

observer.schedule(event_handler,

self.config['source_dir'],

recursive=True)

observer.start

try:

while True:

time.sleep(1)

except KeyboardInterrupt:

observer.stop

observer.join

六、使用说明

安装依赖:

bash

pip install watchdog python-crontab

运行方式:单次备份:python backup_tool.py监控模式:添加"watch_mode": true到配置文件后运行定时任务配置:

python

from crontab import CronTab

def schedule_backup(self, schedule="0 2 * * *"):

"""设置定时任务"""

cron = CronTab(user=True)

job = cron.new(command=f'python {os.path.abspath(__file__)}')

job.setall(schedule)

cron.write

使用rsync算法优化大文件传输添加网络传输协议支持(SFTP/FTP)实现差分备份策略添加AES文件加密功能支持AWS S3/Google Drive等云存储开发GUI界面(使用PyQt)

这个工具可以根据实际需求进行扩展,建议从基础版本开始逐步添加功能。注意处理文件权限和不同操作系统的路径差异问题,关键操作需要添加异常处理保证程序健壮性。

来源:老客数据一点号

相关推荐