摘要:标准库:os, shutil, hashlib, argparse, logging, datetime, json第三方库:watchdog(文件监控),python-crontab(定时任务)
以下是一个基于Python的智能文件备份工具开发指南,包含完整实现思路和代码示例:
一、项目需求分析
核心功能:自动识别文件变更(增量备份)多备份策略选择(全量/增量)智能文件分类存储跨平台支持(Windows/Linux/macOS)备份日志记录排除特定文件类型高级功能:自动压缩加密云存储集成版本控制定时任务二、技术选型
标准库:os, shutil, hashlib, argparse, logging, datetime, json第三方库:watchdog(文件监控),python-crontab(定时任务)三、完整实现代码(基础版)
python
import os
import shutil
import hashlib
import json
import logging
from datetime import datetime
class SmartBackup:
def __init__(self, config_File='config.json'):
self.config = self.load_config(config_file)
self.setup_logging
self.file_state_db = {}
self.backup_count = 0
def load_config(self, config_file):
"""加载配置文件"""
default_config = {
"source_dir": "./source",
"backup_dir": "./backup",
"exclude_ext": [".tmp", ".log"],
"exclude_dirs": [".git", "__pycache__"],
"backup_mode": "incremental",
"max_backups": 5,
"compress": False
}
try:
with open(config_file) as f:
return json.load(f)
except FileNotFoundError:
return default_config
def setup_logging(self):
"""配置日志系统"""
logging.basicConfig(
filename='backup.log',
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
def get_file_hash(self, filepath):
"""计算文件哈希值"""
hasher = hashlib.md5
with open(filepath, 'rb') as f:
while chunk := f.read(4096):
hasher.update(chunk)
return hasher.hexdigest
def scan_source(self):
"""扫描源目录并记录文件状态"""
for root, dirs, files in os.walk(self.config['source_dir']):
# 过滤排除目录
dirs[:] = [d for d in dirs if d not in self.config['exclude_dirs']]
for file in files:
filepath = os.path.join(root, file)
# 过滤排除扩展名
if os.path.splitext(file)[1] in self.config['exclude_ext']:
continue
# 记录文件状态
mtime = os.path.getmtime(filepath)
size = os.path.getsize(filepath)
file_hash = self.get_file_hash(filepath)
self.file_state_db[filepath] = {
'mtime': mtime,
'size': size,
'hash': file_hash
}
def perform_backup(self):
"""执行备份操作"""
backup_time = datetime.now.strftime("%Y%m%d_%H%M%S")
backup_root = os.path.join(
self.config['backup_dir'],
f"backup_{backup_time}"
)
try:
os.makedirs(backup_root, exist_ok=True)
except OSError as e:
logging.error(f"创建备份目录失败: {e}")
return
for filepath, state in self.file_state_db.items:
rel_path = os.path.relpath(filepath, self.config['source_dir'])
target_path = os.path.join(backup_root, rel_path)
# 创建目标目录
os.makedirs(os.path.dirname(target_path), exist_ok=True)
# 执行复制
try:
if self.config['backup_mode'] == 'full' or \
not os.path.exists(target_path) or \
self.is_file_modified(filepath, state):
shutil.copy2(filepath, target_path)
logging.info(f"备份文件: {rel_path}")
self.backup_count += 1
except Exception as e:
logging.error(f"备份失败 {filepath}: {e}")
# 清理旧备份
self.clean_old_backups
def is_file_modified(self, filepath, state):
"""检查文件是否被修改"""
try:
current_mtime = os.path.getmtime(filepath)
current_size = os.path.getsize(filepath)
current_hash = self.get_file_hash(filepath)
return (current_mtime != state['mtime'] or
current_size != state['size'] or
current_hash != state['hash'])
except FileNotFoundError:
return False
def clean_old_backups(self):
"""清理旧备份"""
backups = sorted([
(os.path.getmtime(os.path.join(self.config['backup_dir'], d)), d)
for d in os.listdir(self.config['backup_dir'])
if d.startswith("backup_")
], reverse=True)
while len(backups) > self.config['max_backups']:
_, oldest = backups.pop
shutil.rmtree(os.path.join(self.config['backup_dir'], oldest))
logging.info(f"删除旧备份: {oldest}")
def run(self):
"""运行备份流程"""
logging.info("=== 开始备份任务 ===")
self.scan_source
self.perform_backup
logging.info(f"备份完成,共备份 {self.backup_count} 个文件")
if __name__ == "__main__":
backup = SmartBackup
backup.run
四、配置文件示例(config.json)
json
{
"source_dir": "/path/to/source",
"backup_dir": "/path/to/backups",
"exclude_ext": [".tmp", ".log", ".cache"],
"exclude_dirs": [".git", "node_modules"],
"backup_mode": "incremental",
"max_backups": 7,
"compress": false
}
五、功能扩展建议
增加压缩功能:python
import zipfile
def compress_backup(self, backup_path):
"""压缩备份目录"""
zip_name = f"{backup_path}.zip"
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, dirs, files in os.walk(backup_path):
for file in files:
file_path = os.path.join(root, file)
zipf.write(file_path,
os.path.relpath(file_path, backup_path))
shutil.rmtree(backup_path)
添加文件监控(使用watchdog):python
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class BackupHandler(FileSystemEventHandler):
def __init__(self, backup):
self.backup = backup
def on_modified(self, event):
if not event.is_directory:
self.backup.run
def start_monitoring(self):
event_handler = BackupHandler(self)
observer = Observer
observer.schedule(event_handler,
self.config['source_dir'],
recursive=True)
observer.start
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop
observer.join
六、使用说明
安装依赖:bash
pip install watchdog python-crontab
运行方式:单次备份:python backup_tool.py监控模式:添加"watch_mode": true到配置文件后运行定时任务配置:python
from crontab import CronTab
def schedule_backup(self, schedule="0 2 * * *"):
"""设置定时任务"""
cron = CronTab(user=True)
job = cron.new(command=f'python {os.path.abspath(__file__)}')
job.setall(schedule)
cron.write
使用rsync算法优化大文件传输添加网络传输协议支持(SFTP/FTP)实现差分备份策略添加AES文件加密功能支持AWS S3/Google Drive等云存储开发GUI界面(使用PyQt)这个工具可以根据实际需求进行扩展,建议从基础版本开始逐步添加功能。注意处理文件权限和不同操作系统的路径差异问题,关键操作需要添加异常处理保证程序健壮性。
来源:老客数据一点号