Add JSON integrity hash checks

This commit is contained in:
Marcel Peterkau
2026-06-27 10:35:35 +02:00
parent d1dab793a6
commit 87e972bb43
9 changed files with 302 additions and 11 deletions
+3 -1
View File
@@ -295,10 +295,12 @@ class ContributionData:
@dataclass(frozen=True, slots=True) @dataclass(frozen=True, slots=True)
class HousekeeperFinding: class HousekeeperFinding:
severity: str severity: str
member_id: str
code: str code: str
title: str title: str
detail: str detail: str
member_id: str = ""
asset_id: str = ""
target_type: str = "member"
due_date: date | None = None due_date: date | None = None
key: str = "" key: str = ""
+89 -2
View File
@@ -79,7 +79,9 @@ class Housekeeper:
items = _items_by_key(working) items = _items_by_key(working)
successful_scopes: set[tuple[str, str]] = set() successful_scopes: set[tuple[str, str]] = set()
member_ids = set(self.repository.list_member_ids()) member_ids = set(self.repository.list_member_ids())
asset_ids = set(self.repository.list_asset_ids())
_remove_orphaned_member_items(items, member_ids) _remove_orphaned_member_items(items, member_ids)
_remove_orphaned_asset_items(items, asset_ids)
rules = load_rules(self.repository.root) rules = load_rules(self.repository.root)
repository_config = self.repository.get_configuration() repository_config = self.repository.get_configuration()
@@ -93,6 +95,15 @@ class Housekeeper:
successful_scopes.add(("member-record-check", member_id)) successful_scopes.add(("member-record-check", member_id))
continue continue
successful_scopes.add(("member-record-check", member_id)) successful_scopes.add(("member-record-check", member_id))
self._refresh_hash_integrity_tasks(
items,
target_type="member",
target_id=member_id,
warnings=self.repository.member_hash_warnings(member_id),
run_id=run_id,
now=now,
)
successful_scopes.add(("member-hash-check", member_id))
for rule in rules: for rule in rules:
scope = (rule.rule_id, member.member_id) scope = (rule.rule_id, member.member_id)
try: try:
@@ -130,6 +141,17 @@ class Housekeeper:
else: else:
successful_scopes.add(scope) successful_scopes.add(scope)
for asset_id in sorted(asset_ids):
self._refresh_hash_integrity_tasks(
items,
target_type="asset",
target_id=asset_id,
warnings=self.repository.asset_hash_warnings(asset_id),
run_id=run_id,
now=now,
)
successful_scopes.add(("asset-hash-check", asset_id))
self._resolve_stale_tasks(items, successful_scopes, run_id, now) self._resolve_stale_tasks(items, successful_scopes, run_id, now)
working.update( working.update(
{ {
@@ -198,6 +220,58 @@ class Housekeeper:
) )
items[key] = item items[key] = item
@staticmethod
def _refresh_hash_integrity_tasks(
items: dict[str, dict[str, Any]],
*,
target_type: str,
target_id: str,
warnings: list[str],
run_id: str,
now: str,
) -> None:
key = f"{target_type}-hash-check:{target_id}:json-hash-mismatch"
if not warnings:
item = items.get(key)
if item and item.get("status") == "open":
item["status"] = "resolved"
item["resolved_run"] = run_id
item["resolved_at"] = now
return
item = items.get(key, {})
was_resolved = item.get("status") == "resolved"
item.update(
{
"key": key,
"rule_id": f"{target_type}-hash-check",
"rule_file": "<hash-check>",
"rule_source": "housekeeper",
"member_id": target_id if target_type == "member" else "",
"asset_id": target_id if target_type == "asset" else "",
"target_type": target_type,
"action": "task",
"status": "open",
"severity": "warning",
"code": "json_hash_mismatch",
"title": (
f"Mitgliederakte {target_id}: JSON extern geändert"
if target_type == "member"
else f"Assetakte {target_id}: JSON extern geändert"
),
"detail": " ; ".join(warnings),
"due_date": None,
"first_seen_run": item.get("first_seen_run", run_id),
"first_seen_at": item.get("first_seen_at", now),
"last_seen_run": run_id,
"last_seen_at": now,
"seen_count": int(item.get("seen_count", 0)) + 1,
"reopened_count": int(item.get("reopened_count", 0)) + (1 if was_resolved else 0),
"resolved_run": None,
"resolved_at": None,
}
)
items[key] = item
def _apply_action( def _apply_action(
self, self,
items: dict[str, dict[str, Any]], items: dict[str, dict[str, Any]],
@@ -343,7 +417,8 @@ class Housekeeper:
for item in items.values(): for item in items.values():
if item.get("action") != "task" or item.get("status") != "open": if item.get("action") != "task" or item.get("status") != "open":
continue continue
scope = (str(item.get("rule_id", "")), str(item.get("member_id", ""))) target_id = str(item.get("member_id", "") or item.get("asset_id", ""))
scope = (str(item.get("rule_id", "")), target_id)
if scope not in successful_scopes or item.get("last_seen_run") == run_id: if scope not in successful_scopes or item.get("last_seen_run") == run_id:
continue continue
item["status"] = "resolved" item["status"] = "resolved"
@@ -396,10 +471,12 @@ def _open_findings(items: list[dict[str, Any]]) -> list[HousekeeperFinding]:
findings.append( findings.append(
HousekeeperFinding( HousekeeperFinding(
severity=str(item.get("severity", "info")), severity=str(item.get("severity", "info")),
member_id=str(item.get("member_id", "")),
code=str(item.get("code", item.get("rule_id", "housekeeper"))), code=str(item.get("code", item.get("rule_id", "housekeeper"))),
title=str(item.get("title", item.get("key", "Hausmeister"))), title=str(item.get("title", item.get("key", "Hausmeister"))),
detail=str(item.get("detail", "")), detail=str(item.get("detail", "")),
member_id=str(item.get("member_id", "")),
asset_id=str(item.get("asset_id", "")),
target_type=str(item.get("target_type", "member")),
due_date=due_date, due_date=due_date,
key=str(item.get("key", "")), key=str(item.get("key", "")),
) )
@@ -421,6 +498,16 @@ def _remove_orphaned_member_items(items: dict[str, dict[str, Any]], member_ids:
del items[key] del items[key]
def _remove_orphaned_asset_items(items: dict[str, dict[str, Any]], asset_ids: set[str]) -> None:
orphaned_keys = [
key
for key, item in items.items()
if item.get("asset_id") and str(item["asset_id"]) not in asset_ids
]
for key in orphaned_keys:
del items[key]
def _non_negative_delay(value: float) -> float: def _non_negative_delay(value: float) -> float:
try: try:
delay = float(value) delay = float(value)
+40 -1
View File
@@ -1,17 +1,56 @@
import hashlib
import json import json
import os import os
import tempfile import tempfile
from copy import deepcopy
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
CONTENT_HASH_FIELD = "content_hash"
def _hashable_copy(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> Any:
if isinstance(data, dict):
return {key: _hashable_copy(value, hash_field=hash_field) for key, value in data.items() if key != hash_field}
if isinstance(data, list):
return [_hashable_copy(item, hash_field=hash_field) for item in data]
return data
def compute_json_content_hash(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> str:
payload = json.dumps(
_hashable_copy(data, hash_field=hash_field),
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
).encode("utf-8")
return hashlib.sha256(payload).hexdigest()
def attach_json_content_hash(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> Any:
cloned = deepcopy(data)
if isinstance(cloned, dict):
cloned[hash_field] = compute_json_content_hash(cloned, hash_field=hash_field)
return cloned
def json_content_hash_matches(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> bool:
if not isinstance(data, dict):
return True
stored = str(data.get(hash_field, "")).strip()
if not stored:
return False
return stored == compute_json_content_hash(data, hash_field=hash_field)
def write_json_atomic(path: Path, data: Any) -> None: def write_json_atomic(path: Path, data: Any) -> None:
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
descriptor, temporary_name = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=path.parent) descriptor, temporary_name = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=path.parent)
temporary = Path(temporary_name) temporary = Path(temporary_name)
payload = attach_json_content_hash(data)
try: try:
with os.fdopen(descriptor, "w", encoding="utf-8", newline="\n") as handle: with os.fdopen(descriptor, "w", encoding="utf-8", newline="\n") as handle:
json.dump(data, handle, ensure_ascii=False, indent=2) json.dump(payload, handle, ensure_ascii=False, indent=2)
handle.write("\n") handle.write("\n")
handle.flush() handle.flush()
os.fsync(handle.fileno()) os.fsync(handle.fileno())
+43 -1
View File
@@ -21,7 +21,7 @@ from ccma.domain.contributions import (
) )
from ccma.domain.dates import DateValidationError, normalize_date_input, validate_member_dates from ccma.domain.dates import DateValidationError, normalize_date_input, validate_member_dates
from ccma.domain.models import ASSET_STATUS_LABELS, MEMBERSHIP_STATUS_LABELS, Asset, ContributionData, Event, Member from ccma.domain.models import ASSET_STATUS_LABELS, MEMBERSHIP_STATUS_LABELS, Asset, ContributionData, Event, Member
from ccma.storage.atomic import read_json, write_json_atomic from ccma.storage.atomic import json_content_hash_matches, read_json, write_json_atomic
class RepositoryError(RuntimeError): class RepositoryError(RuntimeError):
@@ -142,6 +142,8 @@ class MemberRepository:
errors: list[str] = [] errors: list[str] = []
try: try:
config = read_json(self.root / "repository.json") config = read_json(self.root / "repository.json")
if not json_content_hash_matches(config):
errors.append("repository.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert.")
if int(config.get("schema_version", 0)) != 1: if int(config.get("schema_version", 0)) != 1:
errors.append("repository.json: nicht unterstützte schema_version") errors.append("repository.json: nicht unterstützte schema_version")
policy = config.get("member_number_policy") or {} policy = config.get("member_number_policy") or {}
@@ -155,6 +157,7 @@ class MemberRepository:
for member_dir in self._member_directories(): for member_dir in self._member_directories():
try: try:
member, _contributions = self.preflight_member_record(member_dir.name) member, _contributions = self.preflight_member_record(member_dir.name)
errors.extend(f"{member_dir.name}/{warning}" for warning in self.member_hash_warnings(member_dir.name))
validate_member_dates( validate_member_dates(
birth_date=member.birth_date, birth_date=member.birth_date,
accepted_at=member.accepted_at, accepted_at=member.accepted_at,
@@ -183,6 +186,7 @@ class MemberRepository:
for asset_dir in self._asset_directories(): for asset_dir in self._asset_directories():
try: try:
asset = self.get_asset(asset_dir.name) asset = self.get_asset(asset_dir.name)
errors.extend(f"{asset_dir.name}/{warning}" for warning in self.asset_hash_warnings(asset_dir.name))
if asset.asset_id != asset_dir.name: if asset.asset_id != asset_dir.name:
errors.append(f"{asset_dir.name}/asset.json: asset_id stimmt nicht mit Ordner überein") errors.append(f"{asset_dir.name}/asset.json: asset_id stimmt nicht mit Ordner überein")
if asset.schema_version != 1: if asset.schema_version != 1:
@@ -1211,6 +1215,44 @@ class MemberRepository:
raise RepositoryError("repository.json enthält keine gültige Konfiguration.") raise RepositoryError("repository.json enthält keine gültige Konfiguration.")
return configuration return configuration
def member_hash_warnings(self, member_id: str) -> list[str]:
warnings: list[str] = []
try:
member_raw = read_json(self._member_path(member_id) / "member.json")
if not json_content_hash_matches(member_raw):
warnings.append("member.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert.")
except (OSError, ValueError, TypeError, json.JSONDecodeError):
pass
try:
contributions_raw = read_json(self._member_path(member_id) / "contributions.json")
if not json_content_hash_matches(contributions_raw):
warnings.append(
"contributions.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert."
)
except (OSError, ValueError, TypeError, json.JSONDecodeError):
pass
return warnings
def asset_hash_warnings(self, asset_id: str) -> list[str]:
warnings: list[str] = []
try:
asset_raw = read_json(self._asset_path(asset_id) / "asset.json")
if not json_content_hash_matches(asset_raw):
warnings.append("asset.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert.")
except (OSError, ValueError, TypeError, json.JSONDecodeError):
pass
return warnings
def refresh_member_record_hashes(self, member_id: str) -> None:
member = self.get_member(member_id)
contributions = self.get_contributions(member_id)
write_json_atomic(self._member_path(member_id) / "member.json", member.to_dict())
write_json_atomic(self._member_path(member_id) / "contributions.json", contributions.to_dict())
def refresh_asset_record_hashes(self, asset_id: str) -> None:
asset = self.get_asset(asset_id)
write_json_atomic(self._asset_path(asset_id) / "asset.json", asset.to_dict())
def get_member_number_policy(self) -> dict[str, str]: def get_member_number_policy(self) -> dict[str, str]:
try: try:
config = read_json(self.root / "repository.json") config = read_json(self.root / "repository.json")
+43
View File
@@ -510,3 +510,46 @@ class AssetClaimDialog(tk.Toplevel):
return return
self.destroy() self.destroy()
self.on_created(str(result["claim"]["claim_id"])) self.on_created(str(result["claim"]["claim_id"]))
class IntegrityWarningDialog(tk.Toplevel):
def __init__(
self,
master: tk.Misc,
*,
title: str,
warnings: list[str],
on_confirm: Callable[[], None],
):
super().__init__(master)
self.on_confirm = on_confirm
self.title(title)
self.transient(master.winfo_toplevel())
self.resizable(False, False)
self.warnings = warnings
self._build_ui()
self.bind("<Escape>", lambda _event: self.destroy())
self.after_idle(self._activate_modal)
def _build_ui(self) -> None:
frame = ttk.Frame(self, padding=18)
frame.pack(fill="both", expand=True)
message = (
"ACHTUNG: Die zugehörigen JSON-Dateien wurden vermutlich extern geändert.\n\n"
"Haben Sie alle Daten geprüft und soll der Hash jetzt aktualisiert werden?"
)
ttk.Label(frame, text=message, justify="left").grid(row=0, column=0, sticky="w")
ttk.Label(frame, text="\n".join(f"{item}" for item in self.warnings), style="Warning.TLabel", justify="left").grid(
row=1, column=0, sticky="w", pady=(12, 0)
)
buttons = ttk.Frame(frame)
buttons.grid(row=2, column=0, sticky="e", pady=(18, 0))
ttk.Button(buttons, text="Nein", command=self.destroy).pack(side="left", padx=(0, 8))
ttk.Button(buttons, text="Ja, bestätigen", style="Accent.TButton", command=self._confirm).pack(side="left")
def _activate_modal(self) -> None:
_activate_modal_window(self)
def _confirm(self) -> None:
self.destroy()
self.on_confirm()
+12 -2
View File
@@ -471,7 +471,7 @@ class MainWindow(ttk.Frame):
tab = HousekeeperTab( tab = HousekeeperTab(
self.notebook, self.notebook,
self.findings, self.findings,
self.open_member, self._open_housekeeper_target,
self.run_housekeeper, self.run_housekeeper,
self.delete_housekeeper_task, self.delete_housekeeper_task,
lambda: self.tabs.close(key), lambda: self.tabs.close(key),
@@ -545,12 +545,22 @@ class MainWindow(ttk.Frame):
self.tabs.refresh_icons(self.icons) self.tabs.refresh_icons(self.icons)
def _show_validation_warning(self) -> None: def _show_validation_warning(self) -> None:
display_errors = [item for item in self.validation_errors if "Hash fehlt oder stimmt nicht" not in item]
if not display_errors:
return
messagebox.showwarning( messagebox.showwarning(
"Datenprüfung", "Datenprüfung",
"Der Store enthält ungültige Akten:\n\n" + "\n".join(self.validation_errors[:12]), "Der Store enthält ungültige Akten:\n\n" + "\n".join(display_errors[:12]),
parent=self, parent=self,
) )
def _open_housekeeper_target(self, finding: HousekeeperFinding) -> None:
if finding.target_type == "asset" and finding.asset_id:
self.open_asset(finding.asset_id)
return
if finding.member_id:
self.open_member(finding.member_id)
def _member_reference_label(self, member_id: str) -> str: def _member_reference_label(self, member_id: str) -> str:
if not member_id: if not member_id:
return "" return ""
+7 -3
View File
@@ -485,14 +485,14 @@ class HousekeeperTab(ttk.Frame):
self, self,
master: tk.Misc, master: tk.Misc,
findings: list[HousekeeperFinding], findings: list[HousekeeperFinding],
on_open_member: Callable[[str], None], on_open_target: Callable[[HousekeeperFinding], None],
on_refresh: Callable[[], list[HousekeeperFinding]], on_refresh: Callable[[], list[HousekeeperFinding]],
on_delete: Callable[[str], list[HousekeeperFinding]], on_delete: Callable[[str], list[HousekeeperFinding]],
on_close: Callable[[], None], on_close: Callable[[], None],
): ):
super().__init__(master, padding=12) super().__init__(master, padding=12)
self.findings = findings self.findings = findings
self.on_open_member = on_open_member self.on_open_target = on_open_target
self.on_refresh = on_refresh self.on_refresh = on_refresh
self.on_delete = on_delete self.on_delete = on_delete
self.on_close = on_close self.on_close = on_close
@@ -602,11 +602,15 @@ class HousekeeperTab(ttk.Frame):
def _open_selected(self) -> None: def _open_selected(self) -> None:
selected = self.tree.selection() selected = self.tree.selection()
if selected: if selected:
self.on_open_member(self.findings[int(selected[0])].member_id) self.on_open_target(self.findings[int(selected[0])])
def _finding_details(finding: HousekeeperFinding) -> str: def _finding_details(finding: HousekeeperFinding) -> str:
lines = [f"{finding.severity.upper()} · {finding.code}", finding.title] lines = [f"{finding.severity.upper()} · {finding.code}", finding.title]
if finding.target_type == "asset" and finding.asset_id:
lines.append(f"Asset: {finding.asset_id}")
elif finding.member_id:
lines.append(f"Mitglied: {finding.member_id}")
if finding.key: if finding.key:
lines.append(f"Key: {finding.key}") lines.append(f"Key: {finding.key}")
if finding.due_date: if finding.due_date:
+47
View File
@@ -4,6 +4,7 @@ from decimal import Decimal
import pytest import pytest
from ccma.domain.contributions import claim_balance from ccma.domain.contributions import claim_balance
from ccma.services.housekeeper import Housekeeper
from ccma.storage.repository import ( from ccma.storage.repository import (
MemberRepository, MemberRepository,
RepositoryError, RepositoryError,
@@ -37,6 +38,7 @@ def test_repository_creates_transparent_member_record(tmp_path) -> None:
assert raw["person"]["first_name"] == "Ada" assert raw["person"]["first_name"] == "Ada"
assert raw["person"]["nickname"] == "Enchantress" assert raw["person"]["nickname"] == "Enchantress"
assert raw["schema_version"] == 1 assert raw["schema_version"] == 1
assert raw["content_hash"]
def test_search_matches_name_email_number_and_german_birth_date(tmp_path) -> None: def test_search_matches_name_email_number_and_german_birth_date(tmp_path) -> None:
@@ -376,3 +378,48 @@ def test_negative_claim_can_be_settled_with_credit(tmp_path) -> None:
data, loaded_claim = repository.get_claim(member.member_id, str(claim["claim_id"])) data, loaded_claim = repository.get_claim(member.member_id, str(claim["claim_id"]))
assert claim_balance(data, loaded_claim) == Decimal("0.00") assert claim_balance(data, loaded_claim) == Decimal("0.00")
assert data.credits[0]["amount"] == "25.00" assert data.credits[0]["amount"] == "25.00"
def test_member_hash_warning_does_not_block_reading(tmp_path) -> None:
repository = MemberRepository(tmp_path)
repository.initialize()
member = repository.create_member(first_name="Ada", last_name="Lovelace")
path = repository.members_root / member.member_id / "member.json"
raw = json.loads(path.read_text(encoding="utf-8"))
raw["person"]["first_name"] = "Eve"
path.write_text(json.dumps(raw, ensure_ascii=False, indent=2), encoding="utf-8")
loaded = repository.get_member(member.member_id)
warnings = repository.member_hash_warnings(member.member_id)
assert loaded.first_name == "Eve"
assert warnings
assert "Hash fehlt oder stimmt nicht" in warnings[0]
def test_refresh_member_record_hashes_clears_hash_warning(tmp_path) -> None:
repository = MemberRepository(tmp_path)
repository.initialize()
member = repository.create_member(first_name="Ada", last_name="Lovelace")
path = repository.members_root / member.member_id / "member.json"
raw = json.loads(path.read_text(encoding="utf-8"))
raw["person"]["first_name"] = "Eve"
path.write_text(json.dumps(raw, ensure_ascii=False, indent=2), encoding="utf-8")
assert repository.member_hash_warnings(member.member_id)
repository.refresh_member_record_hashes(member.member_id)
assert repository.member_hash_warnings(member.member_id) == []
def test_housekeeper_reports_json_hash_mismatch(tmp_path) -> None:
repository = MemberRepository(tmp_path)
repository.initialize()
member = repository.create_member(first_name="Ada", last_name="Lovelace")
path = repository.members_root / member.member_id / "member.json"
raw = json.loads(path.read_text(encoding="utf-8"))
raw["person"]["last_name"] = "Example"
path.write_text(json.dumps(raw, ensure_ascii=False, indent=2), encoding="utf-8")
findings = Housekeeper(repository).run()
assert any(finding.code == "json_hash_mismatch" and finding.member_id == member.member_id for finding in findings)
+18 -1
View File
@@ -69,10 +69,10 @@ def test_housekeeper_details_are_multiline() -> None:
finding = HousekeeperFinding( finding = HousekeeperFinding(
severity="error", severity="error",
member_id="member-1",
code="invalid_member_record", code="invalid_member_record",
title="Mitgliederakte beschädigt", title="Mitgliederakte beschädigt",
detail="Die JSON-Datei ist leer und wird nicht automatisch überschrieben.", detail="Die JSON-Datei ist leer und wird nicht automatisch überschrieben.",
member_id="member-1",
due_date=date(2026, 7, 31), due_date=date(2026, 7, 31),
) )
@@ -164,3 +164,20 @@ def test_negative_claims_are_labeled_as_credit() -> None:
data = ContributionData() data = ContributionData()
claim = {"claim_id": "claim-1", "title": "Rueckzahlung", "amount": "-25.00"} claim = {"claim_id": "claim-1", "title": "Rueckzahlung", "amount": "-25.00"}
assert claim_status(data, claim) == "credit" assert claim_status(data, claim) == "credit"
def test_housekeeper_details_include_asset_target() -> None:
from ccma.domain.models import HousekeeperFinding
from ccma.ui.work_tabs import _finding_details
finding = HousekeeperFinding(
severity="warning",
code="json_hash_mismatch",
title="Assetakte extern geändert",
detail="asset.json: Hash fehlt oder stimmt nicht.",
asset_id="asset-1",
target_type="asset",
)
rendered = _finding_details(finding)
assert "Asset: asset-1" in rendered