From 87e972bb43c0cf94aa3d5a9c0a8d74e8419a32ec Mon Sep 17 00:00:00 2001 From: Marcel Peterkau Date: Sat, 27 Jun 2026 10:35:35 +0200 Subject: [PATCH] Add JSON integrity hash checks --- src/ccma/domain/models.py | 4 +- src/ccma/services/housekeeper.py | 91 +++++++++++++++++++++++++++++++- src/ccma/storage/atomic.py | 41 +++++++++++++- src/ccma/storage/repository.py | 44 ++++++++++++++- src/ccma/ui/dialogs.py | 43 +++++++++++++++ src/ccma/ui/main_window.py | 14 ++++- src/ccma/ui/work_tabs.py | 10 ++-- tests/test_repository.py | 47 +++++++++++++++++ tests/test_ui_imports.py | 19 ++++++- 9 files changed, 302 insertions(+), 11 deletions(-) diff --git a/src/ccma/domain/models.py b/src/ccma/domain/models.py index 04682f6..ea4b926 100644 --- a/src/ccma/domain/models.py +++ b/src/ccma/domain/models.py @@ -295,10 +295,12 @@ class ContributionData: @dataclass(frozen=True, slots=True) class HousekeeperFinding: severity: str - member_id: str code: str title: str detail: str + member_id: str = "" + asset_id: str = "" + target_type: str = "member" due_date: date | None = None key: str = "" diff --git a/src/ccma/services/housekeeper.py b/src/ccma/services/housekeeper.py index 32346fb..d51b370 100644 --- a/src/ccma/services/housekeeper.py +++ b/src/ccma/services/housekeeper.py @@ -79,7 +79,9 @@ class Housekeeper: items = _items_by_key(working) successful_scopes: set[tuple[str, str]] = set() member_ids = set(self.repository.list_member_ids()) + asset_ids = set(self.repository.list_asset_ids()) _remove_orphaned_member_items(items, member_ids) + _remove_orphaned_asset_items(items, asset_ids) rules = load_rules(self.repository.root) repository_config = self.repository.get_configuration() @@ -93,6 +95,15 @@ class Housekeeper: successful_scopes.add(("member-record-check", member_id)) continue successful_scopes.add(("member-record-check", member_id)) + self._refresh_hash_integrity_tasks( + items, + target_type="member", + target_id=member_id, + warnings=self.repository.member_hash_warnings(member_id), + run_id=run_id, + now=now, + ) + successful_scopes.add(("member-hash-check", member_id)) for rule in rules: scope = (rule.rule_id, member.member_id) try: @@ -130,6 +141,17 @@ class Housekeeper: else: successful_scopes.add(scope) + for asset_id in sorted(asset_ids): + self._refresh_hash_integrity_tasks( + items, + target_type="asset", + target_id=asset_id, + warnings=self.repository.asset_hash_warnings(asset_id), + run_id=run_id, + now=now, + ) + successful_scopes.add(("asset-hash-check", asset_id)) + self._resolve_stale_tasks(items, successful_scopes, run_id, now) working.update( { @@ -198,6 +220,58 @@ class Housekeeper: ) items[key] = item + @staticmethod + def _refresh_hash_integrity_tasks( + items: dict[str, dict[str, Any]], + *, + target_type: str, + target_id: str, + warnings: list[str], + run_id: str, + now: str, + ) -> None: + key = f"{target_type}-hash-check:{target_id}:json-hash-mismatch" + if not warnings: + item = items.get(key) + if item and item.get("status") == "open": + item["status"] = "resolved" + item["resolved_run"] = run_id + item["resolved_at"] = now + return + item = items.get(key, {}) + was_resolved = item.get("status") == "resolved" + item.update( + { + "key": key, + "rule_id": f"{target_type}-hash-check", + "rule_file": "", + "rule_source": "housekeeper", + "member_id": target_id if target_type == "member" else "", + "asset_id": target_id if target_type == "asset" else "", + "target_type": target_type, + "action": "task", + "status": "open", + "severity": "warning", + "code": "json_hash_mismatch", + "title": ( + f"Mitgliederakte {target_id}: JSON extern geändert" + if target_type == "member" + else f"Assetakte {target_id}: JSON extern geändert" + ), + "detail": " ; ".join(warnings), + "due_date": None, + "first_seen_run": item.get("first_seen_run", run_id), + "first_seen_at": item.get("first_seen_at", now), + "last_seen_run": run_id, + "last_seen_at": now, + "seen_count": int(item.get("seen_count", 0)) + 1, + "reopened_count": int(item.get("reopened_count", 0)) + (1 if was_resolved else 0), + "resolved_run": None, + "resolved_at": None, + } + ) + items[key] = item + def _apply_action( self, items: dict[str, dict[str, Any]], @@ -343,7 +417,8 @@ class Housekeeper: for item in items.values(): if item.get("action") != "task" or item.get("status") != "open": continue - scope = (str(item.get("rule_id", "")), str(item.get("member_id", ""))) + target_id = str(item.get("member_id", "") or item.get("asset_id", "")) + scope = (str(item.get("rule_id", "")), target_id) if scope not in successful_scopes or item.get("last_seen_run") == run_id: continue item["status"] = "resolved" @@ -396,10 +471,12 @@ def _open_findings(items: list[dict[str, Any]]) -> list[HousekeeperFinding]: findings.append( HousekeeperFinding( severity=str(item.get("severity", "info")), - member_id=str(item.get("member_id", "")), code=str(item.get("code", item.get("rule_id", "housekeeper"))), title=str(item.get("title", item.get("key", "Hausmeister"))), detail=str(item.get("detail", "")), + member_id=str(item.get("member_id", "")), + asset_id=str(item.get("asset_id", "")), + target_type=str(item.get("target_type", "member")), due_date=due_date, key=str(item.get("key", "")), ) @@ -421,6 +498,16 @@ def _remove_orphaned_member_items(items: dict[str, dict[str, Any]], member_ids: del items[key] +def _remove_orphaned_asset_items(items: dict[str, dict[str, Any]], asset_ids: set[str]) -> None: + orphaned_keys = [ + key + for key, item in items.items() + if item.get("asset_id") and str(item["asset_id"]) not in asset_ids + ] + for key in orphaned_keys: + del items[key] + + def _non_negative_delay(value: float) -> float: try: delay = float(value) diff --git a/src/ccma/storage/atomic.py b/src/ccma/storage/atomic.py index 2cfd925..196b69f 100644 --- a/src/ccma/storage/atomic.py +++ b/src/ccma/storage/atomic.py @@ -1,17 +1,56 @@ +import hashlib import json import os import tempfile +from copy import deepcopy from pathlib import Path from typing import Any +CONTENT_HASH_FIELD = "content_hash" + + +def _hashable_copy(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> Any: + if isinstance(data, dict): + return {key: _hashable_copy(value, hash_field=hash_field) for key, value in data.items() if key != hash_field} + if isinstance(data, list): + return [_hashable_copy(item, hash_field=hash_field) for item in data] + return data + + +def compute_json_content_hash(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> str: + payload = json.dumps( + _hashable_copy(data, hash_field=hash_field), + ensure_ascii=False, + sort_keys=True, + separators=(",", ":"), + ).encode("utf-8") + return hashlib.sha256(payload).hexdigest() + + +def attach_json_content_hash(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> Any: + cloned = deepcopy(data) + if isinstance(cloned, dict): + cloned[hash_field] = compute_json_content_hash(cloned, hash_field=hash_field) + return cloned + + +def json_content_hash_matches(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> bool: + if not isinstance(data, dict): + return True + stored = str(data.get(hash_field, "")).strip() + if not stored: + return False + return stored == compute_json_content_hash(data, hash_field=hash_field) + def write_json_atomic(path: Path, data: Any) -> None: path.parent.mkdir(parents=True, exist_ok=True) descriptor, temporary_name = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=path.parent) temporary = Path(temporary_name) + payload = attach_json_content_hash(data) try: with os.fdopen(descriptor, "w", encoding="utf-8", newline="\n") as handle: - json.dump(data, handle, ensure_ascii=False, indent=2) + json.dump(payload, handle, ensure_ascii=False, indent=2) handle.write("\n") handle.flush() os.fsync(handle.fileno()) diff --git a/src/ccma/storage/repository.py b/src/ccma/storage/repository.py index 4c6bfdf..c976197 100644 --- a/src/ccma/storage/repository.py +++ b/src/ccma/storage/repository.py @@ -21,7 +21,7 @@ from ccma.domain.contributions import ( ) from ccma.domain.dates import DateValidationError, normalize_date_input, validate_member_dates from ccma.domain.models import ASSET_STATUS_LABELS, MEMBERSHIP_STATUS_LABELS, Asset, ContributionData, Event, Member -from ccma.storage.atomic import read_json, write_json_atomic +from ccma.storage.atomic import json_content_hash_matches, read_json, write_json_atomic class RepositoryError(RuntimeError): @@ -142,6 +142,8 @@ class MemberRepository: errors: list[str] = [] try: config = read_json(self.root / "repository.json") + if not json_content_hash_matches(config): + errors.append("repository.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert.") if int(config.get("schema_version", 0)) != 1: errors.append("repository.json: nicht unterstützte schema_version") policy = config.get("member_number_policy") or {} @@ -155,6 +157,7 @@ class MemberRepository: for member_dir in self._member_directories(): try: member, _contributions = self.preflight_member_record(member_dir.name) + errors.extend(f"{member_dir.name}/{warning}" for warning in self.member_hash_warnings(member_dir.name)) validate_member_dates( birth_date=member.birth_date, accepted_at=member.accepted_at, @@ -183,6 +186,7 @@ class MemberRepository: for asset_dir in self._asset_directories(): try: asset = self.get_asset(asset_dir.name) + errors.extend(f"{asset_dir.name}/{warning}" for warning in self.asset_hash_warnings(asset_dir.name)) if asset.asset_id != asset_dir.name: errors.append(f"{asset_dir.name}/asset.json: asset_id stimmt nicht mit Ordner überein") if asset.schema_version != 1: @@ -1211,6 +1215,44 @@ class MemberRepository: raise RepositoryError("repository.json enthält keine gültige Konfiguration.") return configuration + def member_hash_warnings(self, member_id: str) -> list[str]: + warnings: list[str] = [] + try: + member_raw = read_json(self._member_path(member_id) / "member.json") + if not json_content_hash_matches(member_raw): + warnings.append("member.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert.") + except (OSError, ValueError, TypeError, json.JSONDecodeError): + pass + try: + contributions_raw = read_json(self._member_path(member_id) / "contributions.json") + if not json_content_hash_matches(contributions_raw): + warnings.append( + "contributions.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert." + ) + except (OSError, ValueError, TypeError, json.JSONDecodeError): + pass + return warnings + + def asset_hash_warnings(self, asset_id: str) -> list[str]: + warnings: list[str] = [] + try: + asset_raw = read_json(self._asset_path(asset_id) / "asset.json") + if not json_content_hash_matches(asset_raw): + warnings.append("asset.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert.") + except (OSError, ValueError, TypeError, json.JSONDecodeError): + pass + return warnings + + def refresh_member_record_hashes(self, member_id: str) -> None: + member = self.get_member(member_id) + contributions = self.get_contributions(member_id) + write_json_atomic(self._member_path(member_id) / "member.json", member.to_dict()) + write_json_atomic(self._member_path(member_id) / "contributions.json", contributions.to_dict()) + + def refresh_asset_record_hashes(self, asset_id: str) -> None: + asset = self.get_asset(asset_id) + write_json_atomic(self._asset_path(asset_id) / "asset.json", asset.to_dict()) + def get_member_number_policy(self) -> dict[str, str]: try: config = read_json(self.root / "repository.json") diff --git a/src/ccma/ui/dialogs.py b/src/ccma/ui/dialogs.py index f49b936..1146736 100644 --- a/src/ccma/ui/dialogs.py +++ b/src/ccma/ui/dialogs.py @@ -510,3 +510,46 @@ class AssetClaimDialog(tk.Toplevel): return self.destroy() self.on_created(str(result["claim"]["claim_id"])) + + +class IntegrityWarningDialog(tk.Toplevel): + def __init__( + self, + master: tk.Misc, + *, + title: str, + warnings: list[str], + on_confirm: Callable[[], None], + ): + super().__init__(master) + self.on_confirm = on_confirm + self.title(title) + self.transient(master.winfo_toplevel()) + self.resizable(False, False) + self.warnings = warnings + self._build_ui() + self.bind("", lambda _event: self.destroy()) + self.after_idle(self._activate_modal) + + def _build_ui(self) -> None: + frame = ttk.Frame(self, padding=18) + frame.pack(fill="both", expand=True) + message = ( + "ACHTUNG: Die zugehörigen JSON-Dateien wurden vermutlich extern geändert.\n\n" + "Haben Sie alle Daten geprüft und soll der Hash jetzt aktualisiert werden?" + ) + ttk.Label(frame, text=message, justify="left").grid(row=0, column=0, sticky="w") + ttk.Label(frame, text="\n".join(f"• {item}" for item in self.warnings), style="Warning.TLabel", justify="left").grid( + row=1, column=0, sticky="w", pady=(12, 0) + ) + buttons = ttk.Frame(frame) + buttons.grid(row=2, column=0, sticky="e", pady=(18, 0)) + ttk.Button(buttons, text="Nein", command=self.destroy).pack(side="left", padx=(0, 8)) + ttk.Button(buttons, text="Ja, bestätigen", style="Accent.TButton", command=self._confirm).pack(side="left") + + def _activate_modal(self) -> None: + _activate_modal_window(self) + + def _confirm(self) -> None: + self.destroy() + self.on_confirm() diff --git a/src/ccma/ui/main_window.py b/src/ccma/ui/main_window.py index 17f4553..9f827c3 100644 --- a/src/ccma/ui/main_window.py +++ b/src/ccma/ui/main_window.py @@ -471,7 +471,7 @@ class MainWindow(ttk.Frame): tab = HousekeeperTab( self.notebook, self.findings, - self.open_member, + self._open_housekeeper_target, self.run_housekeeper, self.delete_housekeeper_task, lambda: self.tabs.close(key), @@ -545,12 +545,22 @@ class MainWindow(ttk.Frame): self.tabs.refresh_icons(self.icons) def _show_validation_warning(self) -> None: + display_errors = [item for item in self.validation_errors if "Hash fehlt oder stimmt nicht" not in item] + if not display_errors: + return messagebox.showwarning( "Datenprüfung", - "Der Store enthält ungültige Akten:\n\n" + "\n".join(self.validation_errors[:12]), + "Der Store enthält ungültige Akten:\n\n" + "\n".join(display_errors[:12]), parent=self, ) + def _open_housekeeper_target(self, finding: HousekeeperFinding) -> None: + if finding.target_type == "asset" and finding.asset_id: + self.open_asset(finding.asset_id) + return + if finding.member_id: + self.open_member(finding.member_id) + def _member_reference_label(self, member_id: str) -> str: if not member_id: return "—" diff --git a/src/ccma/ui/work_tabs.py b/src/ccma/ui/work_tabs.py index af7c845..c89fdad 100644 --- a/src/ccma/ui/work_tabs.py +++ b/src/ccma/ui/work_tabs.py @@ -485,14 +485,14 @@ class HousekeeperTab(ttk.Frame): self, master: tk.Misc, findings: list[HousekeeperFinding], - on_open_member: Callable[[str], None], + on_open_target: Callable[[HousekeeperFinding], None], on_refresh: Callable[[], list[HousekeeperFinding]], on_delete: Callable[[str], list[HousekeeperFinding]], on_close: Callable[[], None], ): super().__init__(master, padding=12) self.findings = findings - self.on_open_member = on_open_member + self.on_open_target = on_open_target self.on_refresh = on_refresh self.on_delete = on_delete self.on_close = on_close @@ -602,11 +602,15 @@ class HousekeeperTab(ttk.Frame): def _open_selected(self) -> None: selected = self.tree.selection() if selected: - self.on_open_member(self.findings[int(selected[0])].member_id) + self.on_open_target(self.findings[int(selected[0])]) def _finding_details(finding: HousekeeperFinding) -> str: lines = [f"{finding.severity.upper()} · {finding.code}", finding.title] + if finding.target_type == "asset" and finding.asset_id: + lines.append(f"Asset: {finding.asset_id}") + elif finding.member_id: + lines.append(f"Mitglied: {finding.member_id}") if finding.key: lines.append(f"Key: {finding.key}") if finding.due_date: diff --git a/tests/test_repository.py b/tests/test_repository.py index fbbbe87..7fa593e 100644 --- a/tests/test_repository.py +++ b/tests/test_repository.py @@ -4,6 +4,7 @@ from decimal import Decimal import pytest from ccma.domain.contributions import claim_balance +from ccma.services.housekeeper import Housekeeper from ccma.storage.repository import ( MemberRepository, RepositoryError, @@ -37,6 +38,7 @@ def test_repository_creates_transparent_member_record(tmp_path) -> None: assert raw["person"]["first_name"] == "Ada" assert raw["person"]["nickname"] == "Enchantress" assert raw["schema_version"] == 1 + assert raw["content_hash"] def test_search_matches_name_email_number_and_german_birth_date(tmp_path) -> None: @@ -376,3 +378,48 @@ def test_negative_claim_can_be_settled_with_credit(tmp_path) -> None: data, loaded_claim = repository.get_claim(member.member_id, str(claim["claim_id"])) assert claim_balance(data, loaded_claim) == Decimal("0.00") assert data.credits[0]["amount"] == "25.00" + + +def test_member_hash_warning_does_not_block_reading(tmp_path) -> None: + repository = MemberRepository(tmp_path) + repository.initialize() + member = repository.create_member(first_name="Ada", last_name="Lovelace") + path = repository.members_root / member.member_id / "member.json" + raw = json.loads(path.read_text(encoding="utf-8")) + raw["person"]["first_name"] = "Eve" + path.write_text(json.dumps(raw, ensure_ascii=False, indent=2), encoding="utf-8") + + loaded = repository.get_member(member.member_id) + warnings = repository.member_hash_warnings(member.member_id) + + assert loaded.first_name == "Eve" + assert warnings + assert "Hash fehlt oder stimmt nicht" in warnings[0] + + +def test_refresh_member_record_hashes_clears_hash_warning(tmp_path) -> None: + repository = MemberRepository(tmp_path) + repository.initialize() + member = repository.create_member(first_name="Ada", last_name="Lovelace") + path = repository.members_root / member.member_id / "member.json" + raw = json.loads(path.read_text(encoding="utf-8")) + raw["person"]["first_name"] = "Eve" + path.write_text(json.dumps(raw, ensure_ascii=False, indent=2), encoding="utf-8") + + assert repository.member_hash_warnings(member.member_id) + repository.refresh_member_record_hashes(member.member_id) + assert repository.member_hash_warnings(member.member_id) == [] + + +def test_housekeeper_reports_json_hash_mismatch(tmp_path) -> None: + repository = MemberRepository(tmp_path) + repository.initialize() + member = repository.create_member(first_name="Ada", last_name="Lovelace") + path = repository.members_root / member.member_id / "member.json" + raw = json.loads(path.read_text(encoding="utf-8")) + raw["person"]["last_name"] = "Example" + path.write_text(json.dumps(raw, ensure_ascii=False, indent=2), encoding="utf-8") + + findings = Housekeeper(repository).run() + + assert any(finding.code == "json_hash_mismatch" and finding.member_id == member.member_id for finding in findings) diff --git a/tests/test_ui_imports.py b/tests/test_ui_imports.py index 6843203..cd9d398 100644 --- a/tests/test_ui_imports.py +++ b/tests/test_ui_imports.py @@ -69,10 +69,10 @@ def test_housekeeper_details_are_multiline() -> None: finding = HousekeeperFinding( severity="error", - member_id="member-1", code="invalid_member_record", title="Mitgliederakte beschädigt", detail="Die JSON-Datei ist leer und wird nicht automatisch überschrieben.", + member_id="member-1", due_date=date(2026, 7, 31), ) @@ -164,3 +164,20 @@ def test_negative_claims_are_labeled_as_credit() -> None: data = ContributionData() claim = {"claim_id": "claim-1", "title": "Rueckzahlung", "amount": "-25.00"} assert claim_status(data, claim) == "credit" + + +def test_housekeeper_details_include_asset_target() -> None: + from ccma.domain.models import HousekeeperFinding + from ccma.ui.work_tabs import _finding_details + + finding = HousekeeperFinding( + severity="warning", + code="json_hash_mismatch", + title="Assetakte extern geändert", + detail="asset.json: Hash fehlt oder stimmt nicht.", + asset_id="asset-1", + target_type="asset", + ) + + rendered = _finding_details(finding) + assert "Asset: asset-1" in rendered