From 2b7290626ec487b5c7b38e304b232076d57e8be4 Mon Sep 17 00:00:00 2001 From: claude-timemachine Date: Tue, 2 Jun 2026 19:08:56 +0200 Subject: [PATCH] server: extract tarball into blob store on snapshot upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deferred 'hardlink blobs from tarball' optimization from DESIGN.md landed as 'just walk the tarball and write blobs separately' for v1. GET /v1/blob/{sha} was 404'ing because the blob store was empty — storage only had snapshots/.tar.zst and a manifest. Server now: 1. Parses uploaded multipart manifest + tarball 2. Walks the tar entries, computes each entry's sha256 3. Cross-checks against the manifest's declared sha (rejects 400 on mismatch) 4. Writes each blob to /blobs/ via Storage.WriteBlob 5. Then stores the snapshot tarball + manifest as before 2 new tests cover: (a) POST then GET /v1/blob/{sha} round-trip, (b) manifest-claims-different-sha-than-tarball rejection. Discovered via e2e smoke against frazclient: pull 404'd on every blob after a successful push. 33/33 tests pass. --- server.go | 51 +++++++++++++++++++ server_test.go | 133 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 171 insertions(+), 13 deletions(-) diff --git a/server.go b/server.go index b63e72e..2591544 100644 --- a/server.go +++ b/server.go @@ -1,6 +1,8 @@ package main import ( + "archive/tar" + "bytes" "context" "encoding/json" "errors" @@ -177,6 +179,14 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who return } + // Extract tarball into blob store + verify each entry's hash matches + // the manifest. The tarball is uncompressed plain tar in v1; zstd + // support deferred until Python stdlib gains a zstd module. + if err := s.extractTarToBlobs(who.User, m, tarBytes); err != nil { + writeJSONError(w, http.StatusBadRequest, fmt.Sprintf("extract tarball: %v", err)) + return + } + if err := s.storage.StoreSnapshot(who.User, m, tarBytes); err != nil { writeJSONError(w, http.StatusInternalServerError, fmt.Sprintf("store: %v", err)) return @@ -187,6 +197,47 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who }) } +// extractTarToBlobs walks the uploaded tarball, writes each regular file's +// contents into the blob store (content-addressed dedupe), and verifies the +// hash against the manifest entry for that path. Files in the tarball that +// AREN'T declared in the manifest are ignored (defensive: never trust tar +// contents). Manifest entries without a tarball file are allowed (the +// blob may already exist from a previous snapshot). +func (s *Server) extractTarToBlobs(user string, m *Manifest, tarBytes []byte) error { + tr := tar.NewReader(bytes.NewReader(tarBytes)) + for { + hdr, err := tr.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return fmt.Errorf("tar read: %w", err) + } + if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA { + continue + } + expected, ok := m.Files[hdr.Name] + if !ok { + // File in tar not declared in manifest. Skip; don't trust it. + continue + } + buf := make([]byte, 0, hdr.Size) + buf, err = io.ReadAll(tr) + if err != nil { + return fmt.Errorf("tar entry %s: %w", hdr.Name, err) + } + sha, err := s.storage.WriteBlob(user, buf) + if err != nil { + return fmt.Errorf("write blob for %s: %w", hdr.Name, err) + } + if sha != expected.SHA256 { + return fmt.Errorf("manifest mismatch for %s: tar sha %s != manifest sha %s", + hdr.Name, sha, expected.SHA256) + } + } + return nil +} + // GET /v1/snapshots func (s *Server) handleListSnapshots(w http.ResponseWriter, _ *http.Request, who *AuthInfo) { list, err := s.storage.ListSnapshots(who.User) diff --git a/server_test.go b/server_test.go index a035cf0..8f6fb70 100644 --- a/server_test.go +++ b/server_test.go @@ -1,6 +1,7 @@ package main import ( + "archive/tar" "bytes" "context" "encoding/json" @@ -14,6 +15,27 @@ import ( "time" ) +// buildTarball wraps the given files into an in-memory tar archive. Used by +// the POST snapshot tests; mirrors what the real client emits. +func buildTarball(t *testing.T, files map[string][]byte) []byte { + t.Helper() + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + for name, data := range files { + hdr := &tar.Header{Name: name, Mode: 0o600, Size: int64(len(data))} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("tar header %s: %v", name, err) + } + if _, err := tw.Write(data); err != nil { + t.Fatalf("tar write %s: %v", name, err) + } + } + if err := tw.Close(); err != nil { + t.Fatalf("tar close: %v", err) + } + return buf.Bytes() +} + // scopedVerifier accepts a fixed token and reports a fixed user + scope set. type scopedVerifier struct { token string @@ -110,21 +132,17 @@ func TestServer_GetManifest_NoSnapshots_204(t *testing.T) { } func TestServer_PostSnapshot_RoundTrip(t *testing.T) { - s, st, _ := newTestServer(t) + s, _, _ := newTestServer(t) - // Write a blob first (simulates the client side). blob := []byte("file contents") - sha, err := st.WriteBlob("user1", blob) - if err != nil { - t.Fatalf("WriteBlob: %v", err) - } + tarBytes := buildTarball(t, map[string][]byte{"options.txt": blob}) manifest := &Manifest{ SnapshotID: "01TESTSNAPSHOT0001", CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC), Files: map[string]FileEntry{ "options.txt": { - SHA256: sha, + SHA256: HashBytes(blob), Size: int64(len(blob)), Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC), }, @@ -136,8 +154,8 @@ func TestServer_PostSnapshot_RoundTrip(t *testing.T) { mw := multipart.NewWriter(body) fw, _ := mw.CreateFormFile("manifest", "manifest.json") _, _ = fw.Write(manifestJSON) - fw, _ = mw.CreateFormFile("tarball", "snapshot.tar.zst") - _, _ = fw.Write([]byte("fake-tarball-bytes")) + fw, _ = mw.CreateFormFile("tarball", "snapshot.tar") + _, _ = fw.Write(tarBytes) mw.Close() req := authReq(t, http.MethodPost, "/v1/snapshot", body) @@ -180,6 +198,92 @@ func TestServer_GetBlob_ReturnsContent(t *testing.T) { } } +// Regression: after POST /v1/snapshot, GET /v1/blob/{sha} must succeed for +// every file declared in the uploaded manifest. We discovered during e2e +// smoke that the server was storing the tarball + manifest but not +// populating the blob store, so subsequent pulls 404'd. +func TestServer_PostSnapshot_PopulatesBlobStore(t *testing.T) { + s, _, _ := newTestServer(t) + + content := []byte("hello cloud") + sha := HashBytes(content) + tarBytes := buildTarball(t, map[string][]byte{"options.txt": content}) + + manifest := &Manifest{ + SnapshotID: "01EXTRACTTESTABCDEF", + CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC), + Files: map[string]FileEntry{ + "options.txt": { + SHA256: sha, + Size: int64(len(content)), + Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC), + }, + }, + } + manifestJSON, _ := json.Marshal(manifest) + + body := &bytes.Buffer{} + mw := multipart.NewWriter(body) + fw, _ := mw.CreateFormFile("manifest", "manifest.json") + _, _ = fw.Write(manifestJSON) + fw, _ = mw.CreateFormFile("tarball", "snapshot.tar") + _, _ = fw.Write(tarBytes) + mw.Close() + + req := authReq(t, http.MethodPost, "/v1/snapshot", body) + req.Header.Set("Content-Type", mw.FormDataContentType()) + rec := httptest.NewRecorder() + s.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("upload: got %d body=%s", rec.Code, rec.Body.String()) + } + + // Now GET /v1/blob/{sha} should succeed and return the original content + rec = httptest.NewRecorder() + s.ServeHTTP(rec, authReq(t, http.MethodGet, "/v1/blob/"+sha, nil)) + if rec.Code != http.StatusOK { + t.Fatalf("blob fetch: got %d", rec.Code) + } + if !bytes.Equal(rec.Body.Bytes(), content) { + t.Errorf("blob content mismatch: got %q want %q", rec.Body.String(), content) + } +} + +// Reject uploads whose tarball contents don't match the manifest's claimed sha. +func TestServer_PostSnapshot_RejectsManifestMismatch(t *testing.T) { + s, _, _ := newTestServer(t) + tarBytes := buildTarball(t, map[string][]byte{"options.txt": []byte("ACTUAL")}) + + manifest := &Manifest{ + SnapshotID: "01MISMATCHTEST00001", + CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC), + Files: map[string]FileEntry{ + "options.txt": { + SHA256: HashBytes([]byte("CLAIMED")), // lies about content + Size: 6, + Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC), + }, + }, + } + manifestJSON, _ := json.Marshal(manifest) + + body := &bytes.Buffer{} + mw := multipart.NewWriter(body) + fw, _ := mw.CreateFormFile("manifest", "manifest.json") + _, _ = fw.Write(manifestJSON) + fw, _ = mw.CreateFormFile("tarball", "snapshot.tar") + _, _ = fw.Write(tarBytes) + mw.Close() + + req := authReq(t, http.MethodPost, "/v1/snapshot", body) + req.Header.Set("Content-Type", mw.FormDataContentType()) + rec := httptest.NewRecorder() + s.ServeHTTP(rec, req) + if rec.Code != http.StatusBadRequest { + t.Errorf("got %d, want 400; body=%s", rec.Code, rec.Body.String()) + } +} + func TestServer_GetBlob_404(t *testing.T) { s, _, _ := newTestServer(t) rec := httptest.NewRecorder() @@ -196,8 +300,8 @@ func TestServer_PostSnapshot_BadManifest_400(t *testing.T) { mw := multipart.NewWriter(body) fw, _ := mw.CreateFormFile("manifest", "manifest.json") _, _ = fw.Write([]byte(`{"snapshot_id":"","created_at":"2026-01-01T00:00:00Z","files":{}}`)) - fw, _ = mw.CreateFormFile("tarball", "snapshot.tar.zst") - _, _ = fw.Write([]byte("x")) + fw, _ = mw.CreateFormFile("tarball", "snapshot.tar") + _, _ = fw.Write(buildTarball(t, nil)) mw.Close() req := authReq(t, http.MethodPost, "/v1/snapshot", body) req.Header.Set("Content-Type", mw.FormDataContentType()) @@ -226,12 +330,15 @@ func TestServer_PostSnapshot_QuotaExceeded_413(t *testing.T) { } manifestJSON, _ := json.Marshal(manifest) + // Minimal tar with one 1-byte entry is already 1536+ bytes (header+data+EOF blocks), + // well over the 100-byte quota set in this test. + tarOver := buildTarball(t, map[string][]byte{"f.txt": []byte("x")}) body := &bytes.Buffer{} mw := multipart.NewWriter(body) fw, _ := mw.CreateFormFile("manifest", "manifest.json") _, _ = fw.Write(manifestJSON) - fw, _ = mw.CreateFormFile("tarball", "tar.zst") - _, _ = fw.Write(make([]byte, 200)) // > 100 byte quota + fw, _ = mw.CreateFormFile("tarball", "tar") + _, _ = fw.Write(tarOver) mw.Close() req := authReq(t, http.MethodPost, "/v1/snapshot", body)