From 7c9d33f95275ebb0e4541edac4573c3be2b770da Mon Sep 17 00:00:00 2001 From: claude-timemachine Date: Fri, 5 Jun 2026 00:20:40 +0200 Subject: [PATCH] feat(sync): wire state.json + divergence detection + dialogs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit state.py: per-instance sync state. /.cloud-sync/state.json (mode 600) records last_pulled_snapshot_id + last_pulled_at + host_tag. Versioned schema. clear() on remote-empty. sync.pull decision tree (replaces the unconditional restore): no token file → prompt_login_qt; on Skip return 0 (don't block launch) no state + remote empty → no-op no state + remote non-empty → restore (first-run on this machine) state.id == remote.id → skip restore (up to date) state.id != remote.id, no in-scope local edits since state.at → restore (fast-forward) state.id != remote.id, in-scope local edits since state.at → prompt_conflict_qt keep_local → don't restore; push will overwrite cloud use_remote → restore + update state cancel → exit 1 sync.push: --json output parsed for snapshot_id; state.json updated to that id after a successful backup. Skips silently if no token. _find_modified_in_scope: walks include roots, filters via _matches_any (restic-style globs: dir/, **/dir/, **/*.glob). Stops at 50 hits; we only need 'any' + a sample for the dialog. _format_dt: hand-rolled (no GNU-vs-Windows strftime quirks) → 'Thursday, October 21, 2021 at 7:12 PM'. Restic JSON parsing helpers: _parse_snapshots, _parse_restic_time (handles nanosecond precision), _parse_backup_summary. tests/test_state.py: 19 new tests covering state read/write, scope- aware mtime walk, exclude glob matching, restic output parsers. Total: 52 green. --- cloud_sync/state.py | 84 ++++++++++++ cloud_sync/sync.py | 319 +++++++++++++++++++++++++++++++++++++++----- tests/test_state.py | 192 ++++++++++++++++++++++++++ 3 files changed, 563 insertions(+), 32 deletions(-) create mode 100644 cloud_sync/state.py create mode 100644 tests/test_state.py diff --git a/cloud_sync/state.py b/cloud_sync/state.py new file mode 100644 index 0000000..e44b61b --- /dev/null +++ b/cloud_sync/state.py @@ -0,0 +1,84 @@ +"""Per-instance sync state. + +Tracks the snapshot id this pack was last synced to and when. Lives at +``/.cloud-sync/state.json`` (mode 600). + +Purpose: divergence detection. On ``pull``, if the remote latest id +differs from ``last_pulled_snapshot_id`` AND any in-scope local file +has mtime > ``last_pulled_at``, the local and remote diverged from a +common ancestor — surface the conflict dialog. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path + + +SCHEMA_VERSION = 1 + + +@dataclass(frozen=True) +class State: + last_pulled_snapshot_id: str + last_pulled_at: datetime + host_tag: str = "cloud-sync" + + +def state_path(pack_folder: Path) -> Path: + return pack_folder / ".cloud-sync" / "state.json" + + +def read(pack_folder: Path) -> State | None: + """Return parsed state or None if file missing / unreadable / wrong schema.""" + p = state_path(pack_folder) + if not p.exists(): + return None + try: + data = json.loads(p.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return None + if data.get("schema") != SCHEMA_VERSION: + return None + try: + return State( + last_pulled_snapshot_id=data["last_pulled_snapshot_id"], + last_pulled_at=_parse_iso(data["last_pulled_at"]), + host_tag=data.get("host_tag", "cloud-sync"), + ) + except (KeyError, ValueError): + return None + + +def write(pack_folder: Path, state: State) -> None: + """Persist state. Creates parent dir + sets mode 600.""" + p = state_path(pack_folder) + p.parent.mkdir(parents=True, exist_ok=True) + payload = { + "schema": SCHEMA_VERSION, + "last_pulled_snapshot_id": state.last_pulled_snapshot_id, + "last_pulled_at": state.last_pulled_at.astimezone(timezone.utc) + .isoformat() + .replace("+00:00", "Z"), + "host_tag": state.host_tag, + } + p.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + p.chmod(0o600) + + +def clear(pack_folder: Path) -> None: + """Remove state.json if present. Used when remote has zero snapshots.""" + p = state_path(pack_folder) + p.unlink(missing_ok=True) + + +def _parse_iso(s: str) -> datetime: + """Parse ISO-8601 with trailing Z or +HH:MM, return tz-aware UTC.""" + if s.endswith("Z"): + s = s[:-1] + "+00:00" + dt = datetime.fromisoformat(s) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return dt.astimezone(timezone.utc) diff --git a/cloud_sync/sync.py b/cloud_sync/sync.py index 0b4e277..40e410c 100644 --- a/cloud_sync/sync.py +++ b/cloud_sync/sync.py @@ -1,38 +1,52 @@ -"""pull + push entry points. +"""pull + push entry points + divergence detection. -Both subprocess restic against ``rest:://:@//`` -where the password is HTTP basic auth ONLY. Restic repos are initialised with -``--insecure-no-password`` so no encryption-at-rest password exists; protection -relies on: +Backend: restic-rest-server. URL form: +``rest:://:@//`` - 1. TLS in transit at the reverse proxy - 2. ``--private-repos`` + htpasswd per user at restic-rest-server - 3. ``--append-only`` to prevent client-side deletion - 4. Disk-level encryption (LUKS) on the host +Password = HTTP basic auth ONLY. Restic repos are initialised with +``--insecure-no-password``; encryption-at-rest delegated to LUKS on the +host disk + TLS at the reverse proxy. -Defense-in-depth via repo encryption was dropped because the threat model -(homelab, operator-trusted) doesn't justify the password-coordination cost. +Divergence detection (pull only): + + 1. Fetch remote latest snapshot id + time via ``snapshots --json --latest 1``. + 2. Read ``/.cloud-sync/state.json`` for the last-pulled snapshot id. + 3. Branches: + - no state.json AND remote empty → no-op + - no state.json AND remote non-empty → first-run restore + - state.last_pulled == remote.id → up to date, skip + - state.last_pulled != remote.id, no local edits since + state.last_pulled_at → fast-forward restore + - state.last_pulled != remote.id, local edits since + state.last_pulled_at → CONFLICT dialog """ from __future__ import annotations +import fnmatch +import json import sys import urllib.parse +from datetime import datetime, timezone from pathlib import Path -from . import restic, scope as scopemod +from . import restic, scope as scopemod, state as statemod from .cli import Args from .creds import read_credentials from .ui import HeadlessProgress, Progress def pull(args: Args, progress: Progress | None = None) -> int: - """Restore latest snapshot's files into pack_folder. - - If the repo has no snapshots yet, this is a no-op (first run on this - machine; nothing to restore). - """ ui = progress or HeadlessProgress() + + # First-run login. If the user declines, skip cloud sync without + # blocking the launch (return 0 — non-fatal for Prism PreLaunch). + if not args.token_file.exists(): + if not _prompt_login_and_save(args, ui): + ui.set_status("Cloud sync skipped") + print("cloud-sync: no token; skipping pull") + return 0 + ui.set_status("Reading credentials…") discord_id, password = read_credentials(args.token_file) @@ -56,18 +70,56 @@ def pull(args: Args, progress: Progress | None = None) -> int: file=sys.stderr, ) return 2 - stripped = out.strip() - if stripped in ("", "null", "[]"): + + snapshots = _parse_snapshots(out) + if not snapshots: + statemod.clear(args.pack_folder) ui.set_status("No snapshots yet — nothing to pull") - print( - "cloud-sync: no snapshots yet for this user " - "(first run on this machine?); nothing to pull" - ) + print("cloud-sync: no snapshots yet for this user; nothing to pull") return 0 - scope = scopemod.load(args.pack_folder) - _, exclude_from = scopemod.materialize_for_restic(args.pack_folder, scope) + remote = snapshots[0] + remote_id = remote["id"] + remote_time = _parse_restic_time(remote["time"]) + local_state = statemod.read(args.pack_folder) + scope = scopemod.load(args.pack_folder) + + decision: str + if local_state is None: + decision = "use_remote" + elif local_state.last_pulled_snapshot_id == remote_id: + ui.set_status("Cloud is up to date") + print("cloud-sync: already at latest snapshot") + return 0 + else: + modified = _find_modified_in_scope( + args.pack_folder, scope, local_state.last_pulled_at + ) + if not modified: + decision = "use_remote" + else: + decision = _ask_conflict(modified, remote_time) + if decision is None: + # UI unavailable in headless mode → conservative: cancel + ui.set_status("Conflict detected; no UI available") + print( + "cloud-sync: conflict detected (remote moved + local edits) " + "but headless mode can't prompt; aborting", + file=sys.stderr, + ) + return 1 + + if decision == "cancel": + ui.set_status("Launch cancelled") + return 1 + if decision == "keep_local": + ui.set_status("Keeping local; push will overwrite cloud on exit") + print("cloud-sync: keeping local copy") + return 0 + + # decision == "use_remote" + _, exclude_from = scopemod.materialize_for_restic(args.pack_folder, scope) ui.set_status("Restoring files…") code, _ = restic.run( binary, @@ -85,14 +137,27 @@ def pull(args: Args, progress: Progress | None = None) -> int: if code != 0: print(f"cloud-sync: restic restore failed (exit {code})", file=sys.stderr) return 2 + + statemod.write( + args.pack_folder, + statemod.State( + last_pulled_snapshot_id=remote_id, + last_pulled_at=datetime.now(timezone.utc), + ), + ) ui.set_status("Pull complete") print("cloud-sync: pull ok") return 0 def push(args: Args, progress: Progress | None = None) -> int: - """Snapshot the in-scope files into the user's repo.""" ui = progress or HeadlessProgress() + + if not args.token_file.exists(): + ui.set_status("No cloud token; skipping push") + print("cloud-sync: no token; skipping push") + return 0 + ui.set_status("Reading credentials…") discord_id, password = read_credentials(args.token_file) @@ -105,7 +170,7 @@ def push(args: Args, progress: Progress | None = None) -> int: files_from, exclude_from = scopemod.materialize_for_restic(args.pack_folder, scope) ui.set_status("Uploading snapshot…") - code, _ = restic.run( + code, out = restic.run( binary, [ "-r", repo, "--insecure-no-password", @@ -114,6 +179,7 @@ def push(args: Args, progress: Progress | None = None) -> int: "--exclude-file", str(exclude_from), "--host", "cloud-sync", "--tag", "auto", + "--json", ], env=env, cwd=args.pack_folder, @@ -124,22 +190,211 @@ def push(args: Args, progress: Progress | None = None) -> int: if code != 0: print(f"cloud-sync: restic backup failed (exit {code})", file=sys.stderr) return 2 + + new_id = _parse_backup_summary(out) + if new_id is not None: + statemod.write( + args.pack_folder, + statemod.State( + last_pulled_snapshot_id=new_id, + last_pulled_at=datetime.now(timezone.utc), + ), + ) ui.set_status("Push complete") print("cloud-sync: push ok") return 0 # --------------------------------------------------------------------------- -# helpers +# divergence detection # --------------------------------------------------------------------------- -def _restic_repo(base_url: str, discord_id: str, password: str) -> str: - """Build rest:://:@// +def _find_modified_in_scope( + pack_folder: Path, scope: scopemod.Scope, since: datetime +) -> list[tuple[Path, datetime]]: + """Return (rel_path, mtime) for in-scope files newer than ``since``. - URL-embedded basic auth is universally supported by restic; alternative - env vars (RESTIC_REST_USERNAME, RESTIC_REST_PASSWORD) require 0.16+. + Walks each include root, skips paths matching any exclude glob. Stops + early at 50 hits (we only need to know "any" + sample some for the + conflict dialog). False positives are safer than false negatives — + a spurious conflict shows a dismissable dialog; a missed conflict + silently overwrites the user's edits. """ + since_ts = since.timestamp() + hits: list[tuple[Path, datetime]] = [] + for include in scope.include: + root = pack_folder / include.rstrip("/") + if not root.exists(): + continue + candidates: list[Path] = [root] if root.is_file() else _walk_files(root) + for f in candidates: + try: + mtime = f.stat().st_mtime + except OSError: + continue + if mtime <= since_ts: + continue + rel = f.relative_to(pack_folder) + if _matches_any(rel, scope.exclude): + continue + hits.append((rel, datetime.fromtimestamp(mtime, tz=timezone.utc))) + if len(hits) >= 50: + return hits + return hits + + +def _walk_files(root: Path): + try: + for p in root.rglob("*"): + if p.is_file(): + yield p + except OSError: + return + + +def _matches_any(rel: Path, patterns: list[str]) -> bool: + """Restic-style glob match against the relative path. + + Subset of restic's exclude semantics that covers our default scope: + ``foo/`` — foo itself OR anything under it + ``**/foo/`` — any ancestor directory named foo, recursively + ``foo/bar*`` — fnmatch against the full relative path + ``**/*.log`` — fnmatch the basename (and any tail subpath) + """ + rel_str = str(rel) + parts = rel.parts + for pat in patterns: + if pat.endswith("/"): + core = pat.rstrip("/") + if core.startswith("**/"): + tail = core[len("**/") :] + if tail in parts: + return True + continue + if rel_str == core or rel_str.startswith(core + "/"): + return True + continue + if fnmatch.fnmatch(rel_str, pat): + return True + if pat.startswith("**/"): + suffix = pat[len("**/") :] + if fnmatch.fnmatch(rel.name, suffix): + return True + for i in range(len(parts)): + if fnmatch.fnmatch("/".join(parts[i:]), suffix): + return True + return False + + +def _ask_conflict( + modified: list[tuple[Path, datetime]], + remote_time: datetime, +) -> str | None: + """Show the conflict dialog. Returns choice or None if no UI available.""" + try: + from .ui_qt import prompt_conflict_qt + except ImportError: + return None + newest = max(modified, key=lambda h: h[1]) + return prompt_conflict_qt( + local_modified=_format_dt(newest[1]), + remote_modified=_format_dt(remote_time), + save_label="Minecraft save", + ) + + +def _prompt_login_and_save(args: Args, ui: Progress) -> bool: + """First-run login. Returns True if a token was saved, False if skipped.""" + try: + from .ui_qt import prompt_login_qt + except ImportError: + ui.set_status("No token and no UI; can't prompt") + print( + "cloud-sync: no token at " + f"{args.token_file} and no Qt UI available", + file=sys.stderr, + ) + return False + token = prompt_login_qt() + if token is None: + return False + args.token_file.parent.mkdir(parents=True, exist_ok=True) + args.token_file.write_text(token + "\n", encoding="utf-8") + args.token_file.chmod(0o600) + return True + + +# --------------------------------------------------------------------------- +# restic output parsing +# --------------------------------------------------------------------------- + + +def _parse_snapshots(out: str) -> list[dict]: + s = out.strip() + if not s or s == "null": + return [] + try: + data = json.loads(s) + except json.JSONDecodeError: + return [] + return data if isinstance(data, list) else [] + + +def _parse_restic_time(s: str) -> datetime: + """Restic emits e.g. ``2026-06-04T18:33:21.123456789Z``.""" + if s.endswith("Z"): + s = s[:-1] + "+00:00" + # Python's fromisoformat doesn't accept nanosecond precision — trim to micro. + if "." in s: + head, _, tail = s.partition(".") + frac, _, tz = tail.partition("+") + if tz: + tz = "+" + tz + else: + frac, _, tz = tail.partition("-") + if tz: + tz = "-" + tz + s = f"{head}.{frac[:6]}{tz}" + return datetime.fromisoformat(s).astimezone(timezone.utc) + + +def _parse_backup_summary(out: str) -> str | None: + """restic backup --json emits one JSON line per event; the final + ``summary`` event carries ``snapshot_id``.""" + for line in reversed(out.splitlines()): + line = line.strip() + if not line: + continue + try: + ev = json.loads(line) + except json.JSONDecodeError: + continue + if ev.get("message_type") == "summary" and "snapshot_id" in ev: + return str(ev["snapshot_id"]) + return None + + +# --------------------------------------------------------------------------- +# misc helpers +# --------------------------------------------------------------------------- + + +def _format_dt(dt: datetime) -> str: + """Format a tz-aware datetime as 'Thursday, October 21, 2021 at 7:12 PM'. + + Hand-rolled instead of ``strftime("%-d")`` because GNU strftime's + leading-zero stripping syntax is platform-specific (``%-d`` vs ``%#d``). + """ + local = dt.astimezone() + weekday = local.strftime("%A") + month = local.strftime("%B") + hour = local.hour % 12 or 12 + ampm = local.strftime("%p") + return f"{weekday}, {month} {local.day}, {local.year} at {hour}:{local.minute:02d} {ampm}" + + +def _restic_repo(base_url: str, discord_id: str, password: str) -> str: raw = base_url.strip() if raw.startswith("rest:"): raw = raw[len("rest:"):] diff --git a/tests/test_state.py b/tests/test_state.py new file mode 100644 index 0000000..52679e2 --- /dev/null +++ b/tests/test_state.py @@ -0,0 +1,192 @@ +"""state.json read/write + divergence helpers.""" + +from __future__ import annotations + +import json +from datetime import datetime, timedelta, timezone +from pathlib import Path + +import pytest + +from cloud_sync import scope as scopemod +from cloud_sync import state as statemod +from cloud_sync.sync import ( + _find_modified_in_scope, + _format_dt, + _matches_any, + _parse_backup_summary, + _parse_restic_time, + _parse_snapshots, +) + + +# ---- state.read/write ---- + + +def test_read_missing_returns_none(tmp_path: Path): + assert statemod.read(tmp_path) is None + + +def test_write_then_read_roundtrip(tmp_path: Path): + dt = datetime(2026, 6, 5, 12, 34, 56, tzinfo=timezone.utc) + statemod.write( + tmp_path, + statemod.State(last_pulled_snapshot_id="abc123", last_pulled_at=dt), + ) + got = statemod.read(tmp_path) + assert got is not None + assert got.last_pulled_snapshot_id == "abc123" + assert got.last_pulled_at == dt + + +def test_write_sets_mode_600(tmp_path: Path): + statemod.write( + tmp_path, + statemod.State( + last_pulled_snapshot_id="x", + last_pulled_at=datetime.now(timezone.utc), + ), + ) + mode = statemod.state_path(tmp_path).stat().st_mode & 0o777 + assert mode == 0o600 + + +def test_clear_idempotent(tmp_path: Path): + statemod.clear(tmp_path) # no-op when missing + statemod.write( + tmp_path, + statemod.State( + last_pulled_snapshot_id="x", + last_pulled_at=datetime.now(timezone.utc), + ), + ) + assert statemod.state_path(tmp_path).exists() + statemod.clear(tmp_path) + assert not statemod.state_path(tmp_path).exists() + + +def test_wrong_schema_returns_none(tmp_path: Path): + p = tmp_path / ".cloud-sync" / "state.json" + p.parent.mkdir(parents=True) + p.write_text( + json.dumps({"schema": 999, "last_pulled_snapshot_id": "x", "last_pulled_at": "2026-01-01T00:00:00Z"}) + ) + assert statemod.read(tmp_path) is None + + +def test_garbage_json_returns_none(tmp_path: Path): + p = tmp_path / ".cloud-sync" / "state.json" + p.parent.mkdir(parents=True) + p.write_text("{not json") + assert statemod.read(tmp_path) is None + + +# ---- _find_modified_in_scope ---- + + +def _touch(p: Path, content: str = "x") -> None: + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + + +def test_no_in_scope_changes_returns_empty(tmp_path: Path): + _touch(tmp_path / "options.txt") + # mtime is "now", `since` is the future + future = datetime.now(timezone.utc) + timedelta(hours=1) + hits = _find_modified_in_scope(tmp_path, scopemod.Scope(), future) + assert hits == [] + + +def test_in_scope_file_modified_after_since_detected(tmp_path: Path): + _touch(tmp_path / "options.txt") + past = datetime.now(timezone.utc) - timedelta(hours=1) + hits = _find_modified_in_scope(tmp_path, scopemod.Scope(), past) + assert any(rel.name == "options.txt" for rel, _ in hits) + + +def test_excluded_path_skipped(tmp_path: Path): + # config/packwiz-installer.log matches exclude "**/*.log" AND "config/packwiz*" + _touch(tmp_path / "config" / "packwiz-installer.log") + past = datetime.now(timezone.utc) - timedelta(hours=1) + hits = _find_modified_in_scope(tmp_path, scopemod.Scope(), past) + assert hits == [] + + +def test_only_excluded_changes_not_a_conflict(tmp_path: Path): + _touch(tmp_path / ".cloud-sync" / "files-from.txt") # excluded + _touch(tmp_path / "config" / "packwiz-installer.log") # excluded + past = datetime.now(timezone.utc) - timedelta(hours=1) + hits = _find_modified_in_scope(tmp_path, scopemod.Scope(), past) + assert hits == [] + + +def test_walks_subdirectories(tmp_path: Path): + _touch(tmp_path / "config" / "fabric" / "custom.json") + past = datetime.now(timezone.utc) - timedelta(hours=1) + hits = _find_modified_in_scope(tmp_path, scopemod.Scope(), past) + assert any("custom.json" in str(rel) for rel, _ in hits) + + +# ---- _matches_any ---- + + +def test_dir_pattern_matches_anything_under(tmp_path: Path): + assert _matches_any(Path(".cloud-sync/token"), [".cloud-sync/"]) + assert _matches_any(Path("a/b/cache/x"), ["**/cache/"]) + assert not _matches_any(Path("config/options.txt"), [".cloud-sync/"]) + + +def test_glob_pattern(tmp_path: Path): + assert _matches_any(Path("config/packwiz-installer.log"), ["config/packwiz*"]) + assert _matches_any(Path("foo/bar.log"), ["**/*.log"]) + + +# ---- restic output parsing ---- + + +def test_parse_snapshots_empty(): + assert _parse_snapshots("") == [] + assert _parse_snapshots("null") == [] + assert _parse_snapshots("[]") == [] + + +def test_parse_snapshots_one(): + out = json.dumps([{"id": "abc", "time": "2026-06-05T12:00:00Z"}]) + parsed = _parse_snapshots(out) + assert len(parsed) == 1 + assert parsed[0]["id"] == "abc" + + +def test_parse_restic_time_with_nanos(): + dt = _parse_restic_time("2026-06-04T18:33:21.123456789Z") + assert dt.tzinfo is timezone.utc + assert dt.year == 2026 and dt.day == 4 and dt.hour == 18 + + +def test_parse_backup_summary_finds_snapshot_id(): + out = ( + '{"message_type":"status","percent_done":0.5}\n' + '{"message_type":"status","percent_done":1.0}\n' + '{"message_type":"summary","snapshot_id":"deadbeef","files_new":3}\n' + ) + assert _parse_backup_summary(out) == "deadbeef" + + +def test_parse_backup_summary_missing_returns_none(): + assert _parse_backup_summary("") is None + assert _parse_backup_summary("not json\nstill not json") is None + + +# ---- _format_dt ---- + + +def test_format_dt_strips_leading_zero_on_hour(): + # 7:12 PM, not 07:12 PM. Day of month also no leading zero. + dt = datetime(2021, 10, 21, 19, 12, tzinfo=timezone.utc) + out = _format_dt(dt) + # Output is local-time-converted, so don't pin the weekday/AM-PM exactly + # in case CI runs UTC vs PT; just check the formatting shape. + assert "October" in out or "Oct" not in out # full month name + assert ", 2021 at" in out + assert " AM" in out or " PM" in out + assert " 0:" not in out # no leading-zero hour