Add JSON integrity hash checks

This commit is contained in:
Marcel Peterkau
2026-06-27 10:35:35 +02:00
parent d1dab793a6
commit 87e972bb43
9 changed files with 302 additions and 11 deletions
+3 -1
View File
@@ -295,10 +295,12 @@ class ContributionData:
@dataclass(frozen=True, slots=True)
class HousekeeperFinding:
severity: str
member_id: str
code: str
title: str
detail: str
member_id: str = ""
asset_id: str = ""
target_type: str = "member"
due_date: date | None = None
key: str = ""
+89 -2
View File
@@ -79,7 +79,9 @@ class Housekeeper:
items = _items_by_key(working)
successful_scopes: set[tuple[str, str]] = set()
member_ids = set(self.repository.list_member_ids())
asset_ids = set(self.repository.list_asset_ids())
_remove_orphaned_member_items(items, member_ids)
_remove_orphaned_asset_items(items, asset_ids)
rules = load_rules(self.repository.root)
repository_config = self.repository.get_configuration()
@@ -93,6 +95,15 @@ class Housekeeper:
successful_scopes.add(("member-record-check", member_id))
continue
successful_scopes.add(("member-record-check", member_id))
self._refresh_hash_integrity_tasks(
items,
target_type="member",
target_id=member_id,
warnings=self.repository.member_hash_warnings(member_id),
run_id=run_id,
now=now,
)
successful_scopes.add(("member-hash-check", member_id))
for rule in rules:
scope = (rule.rule_id, member.member_id)
try:
@@ -130,6 +141,17 @@ class Housekeeper:
else:
successful_scopes.add(scope)
for asset_id in sorted(asset_ids):
self._refresh_hash_integrity_tasks(
items,
target_type="asset",
target_id=asset_id,
warnings=self.repository.asset_hash_warnings(asset_id),
run_id=run_id,
now=now,
)
successful_scopes.add(("asset-hash-check", asset_id))
self._resolve_stale_tasks(items, successful_scopes, run_id, now)
working.update(
{
@@ -198,6 +220,58 @@ class Housekeeper:
)
items[key] = item
@staticmethod
def _refresh_hash_integrity_tasks(
items: dict[str, dict[str, Any]],
*,
target_type: str,
target_id: str,
warnings: list[str],
run_id: str,
now: str,
) -> None:
key = f"{target_type}-hash-check:{target_id}:json-hash-mismatch"
if not warnings:
item = items.get(key)
if item and item.get("status") == "open":
item["status"] = "resolved"
item["resolved_run"] = run_id
item["resolved_at"] = now
return
item = items.get(key, {})
was_resolved = item.get("status") == "resolved"
item.update(
{
"key": key,
"rule_id": f"{target_type}-hash-check",
"rule_file": "<hash-check>",
"rule_source": "housekeeper",
"member_id": target_id if target_type == "member" else "",
"asset_id": target_id if target_type == "asset" else "",
"target_type": target_type,
"action": "task",
"status": "open",
"severity": "warning",
"code": "json_hash_mismatch",
"title": (
f"Mitgliederakte {target_id}: JSON extern geändert"
if target_type == "member"
else f"Assetakte {target_id}: JSON extern geändert"
),
"detail": " ; ".join(warnings),
"due_date": None,
"first_seen_run": item.get("first_seen_run", run_id),
"first_seen_at": item.get("first_seen_at", now),
"last_seen_run": run_id,
"last_seen_at": now,
"seen_count": int(item.get("seen_count", 0)) + 1,
"reopened_count": int(item.get("reopened_count", 0)) + (1 if was_resolved else 0),
"resolved_run": None,
"resolved_at": None,
}
)
items[key] = item
def _apply_action(
self,
items: dict[str, dict[str, Any]],
@@ -343,7 +417,8 @@ class Housekeeper:
for item in items.values():
if item.get("action") != "task" or item.get("status") != "open":
continue
scope = (str(item.get("rule_id", "")), str(item.get("member_id", "")))
target_id = str(item.get("member_id", "") or item.get("asset_id", ""))
scope = (str(item.get("rule_id", "")), target_id)
if scope not in successful_scopes or item.get("last_seen_run") == run_id:
continue
item["status"] = "resolved"
@@ -396,10 +471,12 @@ def _open_findings(items: list[dict[str, Any]]) -> list[HousekeeperFinding]:
findings.append(
HousekeeperFinding(
severity=str(item.get("severity", "info")),
member_id=str(item.get("member_id", "")),
code=str(item.get("code", item.get("rule_id", "housekeeper"))),
title=str(item.get("title", item.get("key", "Hausmeister"))),
detail=str(item.get("detail", "")),
member_id=str(item.get("member_id", "")),
asset_id=str(item.get("asset_id", "")),
target_type=str(item.get("target_type", "member")),
due_date=due_date,
key=str(item.get("key", "")),
)
@@ -421,6 +498,16 @@ def _remove_orphaned_member_items(items: dict[str, dict[str, Any]], member_ids:
del items[key]
def _remove_orphaned_asset_items(items: dict[str, dict[str, Any]], asset_ids: set[str]) -> None:
orphaned_keys = [
key
for key, item in items.items()
if item.get("asset_id") and str(item["asset_id"]) not in asset_ids
]
for key in orphaned_keys:
del items[key]
def _non_negative_delay(value: float) -> float:
try:
delay = float(value)
+40 -1
View File
@@ -1,17 +1,56 @@
import hashlib
import json
import os
import tempfile
from copy import deepcopy
from pathlib import Path
from typing import Any
CONTENT_HASH_FIELD = "content_hash"
def _hashable_copy(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> Any:
if isinstance(data, dict):
return {key: _hashable_copy(value, hash_field=hash_field) for key, value in data.items() if key != hash_field}
if isinstance(data, list):
return [_hashable_copy(item, hash_field=hash_field) for item in data]
return data
def compute_json_content_hash(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> str:
payload = json.dumps(
_hashable_copy(data, hash_field=hash_field),
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
).encode("utf-8")
return hashlib.sha256(payload).hexdigest()
def attach_json_content_hash(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> Any:
cloned = deepcopy(data)
if isinstance(cloned, dict):
cloned[hash_field] = compute_json_content_hash(cloned, hash_field=hash_field)
return cloned
def json_content_hash_matches(data: Any, *, hash_field: str = CONTENT_HASH_FIELD) -> bool:
if not isinstance(data, dict):
return True
stored = str(data.get(hash_field, "")).strip()
if not stored:
return False
return stored == compute_json_content_hash(data, hash_field=hash_field)
def write_json_atomic(path: Path, data: Any) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
descriptor, temporary_name = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=path.parent)
temporary = Path(temporary_name)
payload = attach_json_content_hash(data)
try:
with os.fdopen(descriptor, "w", encoding="utf-8", newline="\n") as handle:
json.dump(data, handle, ensure_ascii=False, indent=2)
json.dump(payload, handle, ensure_ascii=False, indent=2)
handle.write("\n")
handle.flush()
os.fsync(handle.fileno())
+43 -1
View File
@@ -21,7 +21,7 @@ from ccma.domain.contributions import (
)
from ccma.domain.dates import DateValidationError, normalize_date_input, validate_member_dates
from ccma.domain.models import ASSET_STATUS_LABELS, MEMBERSHIP_STATUS_LABELS, Asset, ContributionData, Event, Member
from ccma.storage.atomic import read_json, write_json_atomic
from ccma.storage.atomic import json_content_hash_matches, read_json, write_json_atomic
class RepositoryError(RuntimeError):
@@ -142,6 +142,8 @@ class MemberRepository:
errors: list[str] = []
try:
config = read_json(self.root / "repository.json")
if not json_content_hash_matches(config):
errors.append("repository.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert.")
if int(config.get("schema_version", 0)) != 1:
errors.append("repository.json: nicht unterstützte schema_version")
policy = config.get("member_number_policy") or {}
@@ -155,6 +157,7 @@ class MemberRepository:
for member_dir in self._member_directories():
try:
member, _contributions = self.preflight_member_record(member_dir.name)
errors.extend(f"{member_dir.name}/{warning}" for warning in self.member_hash_warnings(member_dir.name))
validate_member_dates(
birth_date=member.birth_date,
accepted_at=member.accepted_at,
@@ -183,6 +186,7 @@ class MemberRepository:
for asset_dir in self._asset_directories():
try:
asset = self.get_asset(asset_dir.name)
errors.extend(f"{asset_dir.name}/{warning}" for warning in self.asset_hash_warnings(asset_dir.name))
if asset.asset_id != asset_dir.name:
errors.append(f"{asset_dir.name}/asset.json: asset_id stimmt nicht mit Ordner überein")
if asset.schema_version != 1:
@@ -1211,6 +1215,44 @@ class MemberRepository:
raise RepositoryError("repository.json enthält keine gültige Konfiguration.")
return configuration
def member_hash_warnings(self, member_id: str) -> list[str]:
warnings: list[str] = []
try:
member_raw = read_json(self._member_path(member_id) / "member.json")
if not json_content_hash_matches(member_raw):
warnings.append("member.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert.")
except (OSError, ValueError, TypeError, json.JSONDecodeError):
pass
try:
contributions_raw = read_json(self._member_path(member_id) / "contributions.json")
if not json_content_hash_matches(contributions_raw):
warnings.append(
"contributions.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert."
)
except (OSError, ValueError, TypeError, json.JSONDecodeError):
pass
return warnings
def asset_hash_warnings(self, asset_id: str) -> list[str]:
warnings: list[str] = []
try:
asset_raw = read_json(self._asset_path(asset_id) / "asset.json")
if not json_content_hash_matches(asset_raw):
warnings.append("asset.json: Hash fehlt oder stimmt nicht; Datei wurde vermutlich extern geändert.")
except (OSError, ValueError, TypeError, json.JSONDecodeError):
pass
return warnings
def refresh_member_record_hashes(self, member_id: str) -> None:
member = self.get_member(member_id)
contributions = self.get_contributions(member_id)
write_json_atomic(self._member_path(member_id) / "member.json", member.to_dict())
write_json_atomic(self._member_path(member_id) / "contributions.json", contributions.to_dict())
def refresh_asset_record_hashes(self, asset_id: str) -> None:
asset = self.get_asset(asset_id)
write_json_atomic(self._asset_path(asset_id) / "asset.json", asset.to_dict())
def get_member_number_policy(self) -> dict[str, str]:
try:
config = read_json(self.root / "repository.json")
+43
View File
@@ -510,3 +510,46 @@ class AssetClaimDialog(tk.Toplevel):
return
self.destroy()
self.on_created(str(result["claim"]["claim_id"]))
class IntegrityWarningDialog(tk.Toplevel):
def __init__(
self,
master: tk.Misc,
*,
title: str,
warnings: list[str],
on_confirm: Callable[[], None],
):
super().__init__(master)
self.on_confirm = on_confirm
self.title(title)
self.transient(master.winfo_toplevel())
self.resizable(False, False)
self.warnings = warnings
self._build_ui()
self.bind("<Escape>", lambda _event: self.destroy())
self.after_idle(self._activate_modal)
def _build_ui(self) -> None:
frame = ttk.Frame(self, padding=18)
frame.pack(fill="both", expand=True)
message = (
"ACHTUNG: Die zugehörigen JSON-Dateien wurden vermutlich extern geändert.\n\n"
"Haben Sie alle Daten geprüft und soll der Hash jetzt aktualisiert werden?"
)
ttk.Label(frame, text=message, justify="left").grid(row=0, column=0, sticky="w")
ttk.Label(frame, text="\n".join(f"{item}" for item in self.warnings), style="Warning.TLabel", justify="left").grid(
row=1, column=0, sticky="w", pady=(12, 0)
)
buttons = ttk.Frame(frame)
buttons.grid(row=2, column=0, sticky="e", pady=(18, 0))
ttk.Button(buttons, text="Nein", command=self.destroy).pack(side="left", padx=(0, 8))
ttk.Button(buttons, text="Ja, bestätigen", style="Accent.TButton", command=self._confirm).pack(side="left")
def _activate_modal(self) -> None:
_activate_modal_window(self)
def _confirm(self) -> None:
self.destroy()
self.on_confirm()
+12 -2
View File
@@ -471,7 +471,7 @@ class MainWindow(ttk.Frame):
tab = HousekeeperTab(
self.notebook,
self.findings,
self.open_member,
self._open_housekeeper_target,
self.run_housekeeper,
self.delete_housekeeper_task,
lambda: self.tabs.close(key),
@@ -545,12 +545,22 @@ class MainWindow(ttk.Frame):
self.tabs.refresh_icons(self.icons)
def _show_validation_warning(self) -> None:
display_errors = [item for item in self.validation_errors if "Hash fehlt oder stimmt nicht" not in item]
if not display_errors:
return
messagebox.showwarning(
"Datenprüfung",
"Der Store enthält ungültige Akten:\n\n" + "\n".join(self.validation_errors[:12]),
"Der Store enthält ungültige Akten:\n\n" + "\n".join(display_errors[:12]),
parent=self,
)
def _open_housekeeper_target(self, finding: HousekeeperFinding) -> None:
if finding.target_type == "asset" and finding.asset_id:
self.open_asset(finding.asset_id)
return
if finding.member_id:
self.open_member(finding.member_id)
def _member_reference_label(self, member_id: str) -> str:
if not member_id:
return ""
+7 -3
View File
@@ -485,14 +485,14 @@ class HousekeeperTab(ttk.Frame):
self,
master: tk.Misc,
findings: list[HousekeeperFinding],
on_open_member: Callable[[str], None],
on_open_target: Callable[[HousekeeperFinding], None],
on_refresh: Callable[[], list[HousekeeperFinding]],
on_delete: Callable[[str], list[HousekeeperFinding]],
on_close: Callable[[], None],
):
super().__init__(master, padding=12)
self.findings = findings
self.on_open_member = on_open_member
self.on_open_target = on_open_target
self.on_refresh = on_refresh
self.on_delete = on_delete
self.on_close = on_close
@@ -602,11 +602,15 @@ class HousekeeperTab(ttk.Frame):
def _open_selected(self) -> None:
selected = self.tree.selection()
if selected:
self.on_open_member(self.findings[int(selected[0])].member_id)
self.on_open_target(self.findings[int(selected[0])])
def _finding_details(finding: HousekeeperFinding) -> str:
lines = [f"{finding.severity.upper()} · {finding.code}", finding.title]
if finding.target_type == "asset" and finding.asset_id:
lines.append(f"Asset: {finding.asset_id}")
elif finding.member_id:
lines.append(f"Mitglied: {finding.member_id}")
if finding.key:
lines.append(f"Key: {finding.key}")
if finding.due_date: