Files
Py-Backup/core/data_processing.py
Désiré Werner Menrath 988b0e8d1d fix(app): Behebt das Hängenbleiben der UI und die falsche Grössenberechnung
Mehrere grundlegende Probleme in der Anwendungslogik wurden behoben:

- **UI-Verarbeitungsschleife:** Die Verarbeitung von Nachrichten aus Hintergrund-Threads wurde komplett überarbeitet. Zuvor führten zwei konkurrierende Schleifen zu einer Race Condition, bei der Nachrichten verloren gingen. Jetzt gibt es eine einzige, zentrale Verarbeitungsschleife, die Nachrichten in Stapeln verarbeitet. Dies behebt das Problem, dass die Benutzeroberfläche nach dem Löschen oder dem Abschluss eines Backups im "in Arbeit"-Zustand hängen blieb.

- **Backup-Grössenberechnung:** Die Ermittlung der Grösse von inkrementellen Backups wurde robuster gestaltet.
    - Die rsync-Ausgabe wird nun zuverlässig auf Englisch erzwungen, um Parsing-Fehler in anderen System-Locales zu vermeiden.
    - Die Grösse wird nun aus der `sent... received...` Zusammenfassungszeile von rsync ausgelesen, was auch bei Backups ohne Datenänderungen einen Wert ungleich Null liefert.
    - Es wird nun korrekt zwischen Voll-Backups (Anzeige der Gesamtgrösse) und inkrementellen Backups (Anzeige der Übertragungsgrösse) unterschieden.

- **Sonstige Korrekturen:**
    - Eine fehlende Übersetzung für die manuelle Ausschlussliste wurde hinzugefügt.
    - Ein überflüssiger Aufruf zum Starten der Verarbeitungsschleife wurde entfernt.
2025-09-02 13:59:06 +02:00

207 lines
8.8 KiB
Python

# pyimage/core/data_processing.py
import os
import fnmatch
import shutil
import re
import subprocess
from queue import Empty
from pbp_app_config import AppConfig, Msg
from shared_libs.logger import app_logger
class DataProcessing:
def __init__(self, app):
self.app = app
def load_exclude_patterns(self):
all_patterns = set()
try:
if AppConfig.GENERATED_EXCLUDE_LIST_PATH.exists():
with open(AppConfig.GENERATED_EXCLUDE_LIST_PATH, 'r') as f:
generated_patterns = [
line.strip() for line in f if line.strip() and not line.startswith('#')]
all_patterns.update(generated_patterns)
app_logger.log(
f"Loaded generated exclusion patterns: {generated_patterns}")
except FileNotFoundError:
app_logger.log(
f"Generated exclusion list not found: {AppConfig.GENERATED_EXCLUDE_LIST_PATH}")
except IOError as e:
app_logger.log(f"Error loading generated exclusion list: {e}")
try:
if AppConfig.USER_EXCLUDE_LIST_PATH.exists():
with open(AppConfig.USER_EXCLUDE_LIST_PATH, 'r') as f:
user_patterns = [
line.strip() for line in f if line.strip() and not line.startswith('#')]
all_patterns.update(user_patterns)
app_logger.log(
f"Loaded user-defined exclusion patterns: {user_patterns}")
except FileNotFoundError:
app_logger.log(
f"User-defined exclusion list not found: {AppConfig.USER_EXCLUDE_LIST_PATH}")
except IOError as e:
app_logger.log(f"Error loading user-defined exclusion list: {e}")
try:
if AppConfig.MANUAL_EXCLUDE_LIST_PATH.exists():
with open(AppConfig.MANUAL_EXCLUDE_LIST_PATH, 'r') as f:
manual_patterns = [
line.strip() for line in f if line.strip() and not line.startswith('#')]
all_patterns.update(manual_patterns)
app_logger.log(
f"Loaded manual exclusion patterns: {manual_patterns}")
except FileNotFoundError:
app_logger.log(
f"Manual exclusion list not found: {AppConfig.MANUAL_EXCLUDE_LIST_PATH}")
except IOError as e:
app_logger.log(f"Error loading manual exclusion list: {e}")
final_patterns = sorted(list(all_patterns))
app_logger.log(f"Combined exclusion patterns: {final_patterns}")
return final_patterns
def get_folder_size_threaded(self, path, button_text, stop_event, exclude_patterns=None, mode='backup'):
total_size = 0
if exclude_patterns is None:
exclude_patterns = []
# Compile exclude patterns into a single regex for performance
if exclude_patterns:
exclude_regex = re.compile(
'|'.join(fnmatch.translate(p) for p in exclude_patterns))
else:
exclude_regex = None
for dirpath, dirnames, filenames in os.walk(path, topdown=True):
if stop_event.is_set():
return # Stop the calculation
if exclude_regex:
dirnames[:] = [d for d in dirnames if not exclude_regex.match(
os.path.join(dirpath, d))]
for f in filenames:
if stop_event.is_set():
return # Stop the calculation
fp = os.path.join(dirpath, f)
if not exclude_regex or not exclude_regex.match(fp):
if not os.path.islink(fp):
try:
total_size += os.path.getsize(fp)
except OSError:
pass
if not stop_event.is_set():
self.app.queue.put((button_text, total_size, mode))
def get_user_folder_size_threaded(self, path, button_text, stop_event, mode='backup'):
"""Calculates folder size without applying any exclusion lists."""
total_size = 0
for dirpath, dirnames, filenames in os.walk(path, topdown=True):
if stop_event.is_set():
return # Stop the calculation
for f in filenames:
if stop_event.is_set():
return # Stop the calculation
fp = os.path.join(dirpath, f)
if not os.path.islink(fp):
try:
total_size += os.path.getsize(fp)
except OSError:
pass
if not stop_event.is_set():
self.app.queue.put((button_text, total_size, mode))
def get_incremental_backup_size(self, source_path: str, dest_path: str, is_system: bool, exclude_files: list = None) -> int:
"""
Calculates the approximate size of an incremental backup using rsync's dry-run feature.
This is much faster than walking the entire file tree.
"""
app_logger.log(f"Calculating incremental backup size for source: {source_path}")
parent_dest = os.path.dirname(dest_path)
if not os.path.exists(parent_dest):
# If the parent destination doesn't exist, there are no previous backups to link to.
# In this case, the incremental size is the full size of the source.
# We can use the existing full-size calculation method.
# This is a simplified approach for the estimation.
# A more accurate one would run rsync without --link-dest.
app_logger.log("Destination parent does not exist, cannot calculate incremental size. Returning 0.")
return 0
# Find the latest backup to link against
try:
backups = sorted([d for d in os.listdir(parent_dest) if os.path.isdir(os.path.join(parent_dest, d))], reverse=True)
if not backups:
app_logger.log("No previous backups found. Incremental size is full size.")
return 0 # Or trigger a full size calculation
latest_backup_path = os.path.join(parent_dest, backups[0])
except FileNotFoundError:
app_logger.log("Could not list backups, assuming no prior backups exist.")
return 0
command = []
if is_system:
command.extend(['pkexec', 'rsync', '-aAXHvn', '--stats'])
else:
command.extend(['rsync', '-avn', '--stats'])
command.append(f"--link-dest={latest_backup_path}")
if exclude_files:
for exclude_file in exclude_files:
command.append(f"--exclude-from={exclude_file}")
if AppConfig.MANUAL_EXCLUDE_LIST_PATH.exists():
command.append(f"--exclude-from={AppConfig.MANUAL_EXCLUDE_LIST_PATH}")
# The destination for a dry run can be a dummy path, but it must exist.
# Let's use a temporary directory.
dummy_dest = os.path.join(parent_dest, "dry_run_dest")
os.makedirs(dummy_dest, exist_ok=True)
command.extend([source_path, dummy_dest])
app_logger.log(f"Executing rsync dry-run command: {' '.join(command)}")
try:
result = subprocess.run(command, capture_output=True, text=True, check=False)
# Clean up the dummy directory
shutil.rmtree(dummy_dest)
if result.returncode != 0:
app_logger.log(f"Rsync dry-run failed with code {result.returncode}: {result.stderr}")
return 0
output = result.stdout + "\n" + result.stderr
# The regex now accepts dots as thousands separators (e.g., 1.234.567).
match = re.search(r"Total transferred file size: ([\d,.]+) bytes", output)
if match:
# Remove both dots and commas before converting to an integer.
size_str = match.group(1).replace(',', '').replace('.', '')
size_bytes = int(size_str)
app_logger.log(f"Estimated incremental backup size: {size_bytes} bytes")
return size_bytes
else:
app_logger.log("Could not find 'Total transferred file size' in rsync output.")
# Log the output just in case something changes in the future
app_logger.log(f"Full rsync output for debugging:\n{output}")
return 0
except FileNotFoundError:
app_logger.log("Error: 'rsync' or 'pkexec' command not found.")
return 0
except Exception as e:
app_logger.log(f"An unexpected error occurred during incremental size calculation: {e}")
return 0
# The queue processing logic has been moved to main_app.py
# to fix a race condition and ensure all queue messages are handled correctly.