server: extract tarball into blob store on snapshot upload
CI / validate (push) Successful in 13s
CI / docker (push) Successful in 14s

The deferred 'hardlink blobs from tarball' optimization from DESIGN.md
landed as 'just walk the tarball and write blobs separately' for v1.
GET /v1/blob/{sha} was 404'ing because the blob store was empty —
storage only had snapshots/<id>.tar.zst and a manifest.

Server now:
1. Parses uploaded multipart manifest + tarball
2. Walks the tar entries, computes each entry's sha256
3. Cross-checks against the manifest's declared sha (rejects 400 on mismatch)
4. Writes each blob to <user>/blobs/ via Storage.WriteBlob
5. Then stores the snapshot tarball + manifest as before

2 new tests cover: (a) POST then GET /v1/blob/{sha} round-trip,
(b) manifest-claims-different-sha-than-tarball rejection.

Discovered via e2e smoke against frazclient: pull 404'd on every blob
after a successful push. 33/33 tests pass.
This commit is contained in:
2026-06-02 19:08:56 +02:00
parent a73e254862
commit 2b7290626e
2 changed files with 171 additions and 13 deletions
+51
View File
@@ -1,6 +1,8 @@
package main
import (
"archive/tar"
"bytes"
"context"
"encoding/json"
"errors"
@@ -177,6 +179,14 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who
return
}
// Extract tarball into blob store + verify each entry's hash matches
// the manifest. The tarball is uncompressed plain tar in v1; zstd
// support deferred until Python stdlib gains a zstd module.
if err := s.extractTarToBlobs(who.User, m, tarBytes); err != nil {
writeJSONError(w, http.StatusBadRequest, fmt.Sprintf("extract tarball: %v", err))
return
}
if err := s.storage.StoreSnapshot(who.User, m, tarBytes); err != nil {
writeJSONError(w, http.StatusInternalServerError, fmt.Sprintf("store: %v", err))
return
@@ -187,6 +197,47 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who
})
}
// extractTarToBlobs walks the uploaded tarball, writes each regular file's
// contents into the blob store (content-addressed dedupe), and verifies the
// hash against the manifest entry for that path. Files in the tarball that
// AREN'T declared in the manifest are ignored (defensive: never trust tar
// contents). Manifest entries without a tarball file are allowed (the
// blob may already exist from a previous snapshot).
func (s *Server) extractTarToBlobs(user string, m *Manifest, tarBytes []byte) error {
tr := tar.NewReader(bytes.NewReader(tarBytes))
for {
hdr, err := tr.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("tar read: %w", err)
}
if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA {
continue
}
expected, ok := m.Files[hdr.Name]
if !ok {
// File in tar not declared in manifest. Skip; don't trust it.
continue
}
buf := make([]byte, 0, hdr.Size)
buf, err = io.ReadAll(tr)
if err != nil {
return fmt.Errorf("tar entry %s: %w", hdr.Name, err)
}
sha, err := s.storage.WriteBlob(user, buf)
if err != nil {
return fmt.Errorf("write blob for %s: %w", hdr.Name, err)
}
if sha != expected.SHA256 {
return fmt.Errorf("manifest mismatch for %s: tar sha %s != manifest sha %s",
hdr.Name, sha, expected.SHA256)
}
}
return nil
}
// GET /v1/snapshots
func (s *Server) handleListSnapshots(w http.ResponseWriter, _ *http.Request, who *AuthInfo) {
list, err := s.storage.ListSnapshots(who.User)
+120 -13
View File
@@ -1,6 +1,7 @@
package main
import (
"archive/tar"
"bytes"
"context"
"encoding/json"
@@ -14,6 +15,27 @@ import (
"time"
)
// buildTarball wraps the given files into an in-memory tar archive. Used by
// the POST snapshot tests; mirrors what the real client emits.
func buildTarball(t *testing.T, files map[string][]byte) []byte {
t.Helper()
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
for name, data := range files {
hdr := &tar.Header{Name: name, Mode: 0o600, Size: int64(len(data))}
if err := tw.WriteHeader(hdr); err != nil {
t.Fatalf("tar header %s: %v", name, err)
}
if _, err := tw.Write(data); err != nil {
t.Fatalf("tar write %s: %v", name, err)
}
}
if err := tw.Close(); err != nil {
t.Fatalf("tar close: %v", err)
}
return buf.Bytes()
}
// scopedVerifier accepts a fixed token and reports a fixed user + scope set.
type scopedVerifier struct {
token string
@@ -110,21 +132,17 @@ func TestServer_GetManifest_NoSnapshots_204(t *testing.T) {
}
func TestServer_PostSnapshot_RoundTrip(t *testing.T) {
s, st, _ := newTestServer(t)
s, _, _ := newTestServer(t)
// Write a blob first (simulates the client side).
blob := []byte("file contents")
sha, err := st.WriteBlob("user1", blob)
if err != nil {
t.Fatalf("WriteBlob: %v", err)
}
tarBytes := buildTarball(t, map[string][]byte{"options.txt": blob})
manifest := &Manifest{
SnapshotID: "01TESTSNAPSHOT0001",
CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC),
Files: map[string]FileEntry{
"options.txt": {
SHA256: sha,
SHA256: HashBytes(blob),
Size: int64(len(blob)),
Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC),
},
@@ -136,8 +154,8 @@ func TestServer_PostSnapshot_RoundTrip(t *testing.T) {
mw := multipart.NewWriter(body)
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
_, _ = fw.Write(manifestJSON)
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar.zst")
_, _ = fw.Write([]byte("fake-tarball-bytes"))
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar")
_, _ = fw.Write(tarBytes)
mw.Close()
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
@@ -180,6 +198,92 @@ func TestServer_GetBlob_ReturnsContent(t *testing.T) {
}
}
// Regression: after POST /v1/snapshot, GET /v1/blob/{sha} must succeed for
// every file declared in the uploaded manifest. We discovered during e2e
// smoke that the server was storing the tarball + manifest but not
// populating the blob store, so subsequent pulls 404'd.
func TestServer_PostSnapshot_PopulatesBlobStore(t *testing.T) {
s, _, _ := newTestServer(t)
content := []byte("hello cloud")
sha := HashBytes(content)
tarBytes := buildTarball(t, map[string][]byte{"options.txt": content})
manifest := &Manifest{
SnapshotID: "01EXTRACTTESTABCDEF",
CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC),
Files: map[string]FileEntry{
"options.txt": {
SHA256: sha,
Size: int64(len(content)),
Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC),
},
},
}
manifestJSON, _ := json.Marshal(manifest)
body := &bytes.Buffer{}
mw := multipart.NewWriter(body)
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
_, _ = fw.Write(manifestJSON)
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar")
_, _ = fw.Write(tarBytes)
mw.Close()
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
req.Header.Set("Content-Type", mw.FormDataContentType())
rec := httptest.NewRecorder()
s.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("upload: got %d body=%s", rec.Code, rec.Body.String())
}
// Now GET /v1/blob/{sha} should succeed and return the original content
rec = httptest.NewRecorder()
s.ServeHTTP(rec, authReq(t, http.MethodGet, "/v1/blob/"+sha, nil))
if rec.Code != http.StatusOK {
t.Fatalf("blob fetch: got %d", rec.Code)
}
if !bytes.Equal(rec.Body.Bytes(), content) {
t.Errorf("blob content mismatch: got %q want %q", rec.Body.String(), content)
}
}
// Reject uploads whose tarball contents don't match the manifest's claimed sha.
func TestServer_PostSnapshot_RejectsManifestMismatch(t *testing.T) {
s, _, _ := newTestServer(t)
tarBytes := buildTarball(t, map[string][]byte{"options.txt": []byte("ACTUAL")})
manifest := &Manifest{
SnapshotID: "01MISMATCHTEST00001",
CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC),
Files: map[string]FileEntry{
"options.txt": {
SHA256: HashBytes([]byte("CLAIMED")), // lies about content
Size: 6,
Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC),
},
},
}
manifestJSON, _ := json.Marshal(manifest)
body := &bytes.Buffer{}
mw := multipart.NewWriter(body)
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
_, _ = fw.Write(manifestJSON)
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar")
_, _ = fw.Write(tarBytes)
mw.Close()
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
req.Header.Set("Content-Type", mw.FormDataContentType())
rec := httptest.NewRecorder()
s.ServeHTTP(rec, req)
if rec.Code != http.StatusBadRequest {
t.Errorf("got %d, want 400; body=%s", rec.Code, rec.Body.String())
}
}
func TestServer_GetBlob_404(t *testing.T) {
s, _, _ := newTestServer(t)
rec := httptest.NewRecorder()
@@ -196,8 +300,8 @@ func TestServer_PostSnapshot_BadManifest_400(t *testing.T) {
mw := multipart.NewWriter(body)
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
_, _ = fw.Write([]byte(`{"snapshot_id":"","created_at":"2026-01-01T00:00:00Z","files":{}}`))
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar.zst")
_, _ = fw.Write([]byte("x"))
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar")
_, _ = fw.Write(buildTarball(t, nil))
mw.Close()
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
req.Header.Set("Content-Type", mw.FormDataContentType())
@@ -226,12 +330,15 @@ func TestServer_PostSnapshot_QuotaExceeded_413(t *testing.T) {
}
manifestJSON, _ := json.Marshal(manifest)
// Minimal tar with one 1-byte entry is already 1536+ bytes (header+data+EOF blocks),
// well over the 100-byte quota set in this test.
tarOver := buildTarball(t, map[string][]byte{"f.txt": []byte("x")})
body := &bytes.Buffer{}
mw := multipart.NewWriter(body)
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
_, _ = fw.Write(manifestJSON)
fw, _ = mw.CreateFormFile("tarball", "tar.zst")
_, _ = fw.Write(make([]byte, 200)) // > 100 byte quota
fw, _ = mw.CreateFormFile("tarball", "tar")
_, _ = fw.Write(tarOver)
mw.Close()
req := authReq(t, http.MethodPost, "/v1/snapshot", body)