server: extract tarball into blob store on snapshot upload
The deferred 'hardlink blobs from tarball' optimization from DESIGN.md
landed as 'just walk the tarball and write blobs separately' for v1.
GET /v1/blob/{sha} was 404'ing because the blob store was empty —
storage only had snapshots/<id>.tar.zst and a manifest.
Server now:
1. Parses uploaded multipart manifest + tarball
2. Walks the tar entries, computes each entry's sha256
3. Cross-checks against the manifest's declared sha (rejects 400 on mismatch)
4. Writes each blob to <user>/blobs/ via Storage.WriteBlob
5. Then stores the snapshot tarball + manifest as before
2 new tests cover: (a) POST then GET /v1/blob/{sha} round-trip,
(b) manifest-claims-different-sha-than-tarball rejection.
Discovered via e2e smoke against frazclient: pull 404'd on every blob
after a successful push. 33/33 tests pass.
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"archive/tar"
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
@@ -177,6 +179,14 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract tarball into blob store + verify each entry's hash matches
|
||||||
|
// the manifest. The tarball is uncompressed plain tar in v1; zstd
|
||||||
|
// support deferred until Python stdlib gains a zstd module.
|
||||||
|
if err := s.extractTarToBlobs(who.User, m, tarBytes); err != nil {
|
||||||
|
writeJSONError(w, http.StatusBadRequest, fmt.Sprintf("extract tarball: %v", err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if err := s.storage.StoreSnapshot(who.User, m, tarBytes); err != nil {
|
if err := s.storage.StoreSnapshot(who.User, m, tarBytes); err != nil {
|
||||||
writeJSONError(w, http.StatusInternalServerError, fmt.Sprintf("store: %v", err))
|
writeJSONError(w, http.StatusInternalServerError, fmt.Sprintf("store: %v", err))
|
||||||
return
|
return
|
||||||
@@ -187,6 +197,47 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extractTarToBlobs walks the uploaded tarball, writes each regular file's
|
||||||
|
// contents into the blob store (content-addressed dedupe), and verifies the
|
||||||
|
// hash against the manifest entry for that path. Files in the tarball that
|
||||||
|
// AREN'T declared in the manifest are ignored (defensive: never trust tar
|
||||||
|
// contents). Manifest entries without a tarball file are allowed (the
|
||||||
|
// blob may already exist from a previous snapshot).
|
||||||
|
func (s *Server) extractTarToBlobs(user string, m *Manifest, tarBytes []byte) error {
|
||||||
|
tr := tar.NewReader(bytes.NewReader(tarBytes))
|
||||||
|
for {
|
||||||
|
hdr, err := tr.Next()
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("tar read: %w", err)
|
||||||
|
}
|
||||||
|
if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
expected, ok := m.Files[hdr.Name]
|
||||||
|
if !ok {
|
||||||
|
// File in tar not declared in manifest. Skip; don't trust it.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
buf := make([]byte, 0, hdr.Size)
|
||||||
|
buf, err = io.ReadAll(tr)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("tar entry %s: %w", hdr.Name, err)
|
||||||
|
}
|
||||||
|
sha, err := s.storage.WriteBlob(user, buf)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("write blob for %s: %w", hdr.Name, err)
|
||||||
|
}
|
||||||
|
if sha != expected.SHA256 {
|
||||||
|
return fmt.Errorf("manifest mismatch for %s: tar sha %s != manifest sha %s",
|
||||||
|
hdr.Name, sha, expected.SHA256)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// GET /v1/snapshots
|
// GET /v1/snapshots
|
||||||
func (s *Server) handleListSnapshots(w http.ResponseWriter, _ *http.Request, who *AuthInfo) {
|
func (s *Server) handleListSnapshots(w http.ResponseWriter, _ *http.Request, who *AuthInfo) {
|
||||||
list, err := s.storage.ListSnapshots(who.User)
|
list, err := s.storage.ListSnapshots(who.User)
|
||||||
|
|||||||
+120
-13
@@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"archive/tar"
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
@@ -14,6 +15,27 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// buildTarball wraps the given files into an in-memory tar archive. Used by
|
||||||
|
// the POST snapshot tests; mirrors what the real client emits.
|
||||||
|
func buildTarball(t *testing.T, files map[string][]byte) []byte {
|
||||||
|
t.Helper()
|
||||||
|
var buf bytes.Buffer
|
||||||
|
tw := tar.NewWriter(&buf)
|
||||||
|
for name, data := range files {
|
||||||
|
hdr := &tar.Header{Name: name, Mode: 0o600, Size: int64(len(data))}
|
||||||
|
if err := tw.WriteHeader(hdr); err != nil {
|
||||||
|
t.Fatalf("tar header %s: %v", name, err)
|
||||||
|
}
|
||||||
|
if _, err := tw.Write(data); err != nil {
|
||||||
|
t.Fatalf("tar write %s: %v", name, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := tw.Close(); err != nil {
|
||||||
|
t.Fatalf("tar close: %v", err)
|
||||||
|
}
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
// scopedVerifier accepts a fixed token and reports a fixed user + scope set.
|
// scopedVerifier accepts a fixed token and reports a fixed user + scope set.
|
||||||
type scopedVerifier struct {
|
type scopedVerifier struct {
|
||||||
token string
|
token string
|
||||||
@@ -110,21 +132,17 @@ func TestServer_GetManifest_NoSnapshots_204(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestServer_PostSnapshot_RoundTrip(t *testing.T) {
|
func TestServer_PostSnapshot_RoundTrip(t *testing.T) {
|
||||||
s, st, _ := newTestServer(t)
|
s, _, _ := newTestServer(t)
|
||||||
|
|
||||||
// Write a blob first (simulates the client side).
|
|
||||||
blob := []byte("file contents")
|
blob := []byte("file contents")
|
||||||
sha, err := st.WriteBlob("user1", blob)
|
tarBytes := buildTarball(t, map[string][]byte{"options.txt": blob})
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("WriteBlob: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
manifest := &Manifest{
|
manifest := &Manifest{
|
||||||
SnapshotID: "01TESTSNAPSHOT0001",
|
SnapshotID: "01TESTSNAPSHOT0001",
|
||||||
CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC),
|
CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC),
|
||||||
Files: map[string]FileEntry{
|
Files: map[string]FileEntry{
|
||||||
"options.txt": {
|
"options.txt": {
|
||||||
SHA256: sha,
|
SHA256: HashBytes(blob),
|
||||||
Size: int64(len(blob)),
|
Size: int64(len(blob)),
|
||||||
Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC),
|
Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC),
|
||||||
},
|
},
|
||||||
@@ -136,8 +154,8 @@ func TestServer_PostSnapshot_RoundTrip(t *testing.T) {
|
|||||||
mw := multipart.NewWriter(body)
|
mw := multipart.NewWriter(body)
|
||||||
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
|
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
|
||||||
_, _ = fw.Write(manifestJSON)
|
_, _ = fw.Write(manifestJSON)
|
||||||
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar.zst")
|
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar")
|
||||||
_, _ = fw.Write([]byte("fake-tarball-bytes"))
|
_, _ = fw.Write(tarBytes)
|
||||||
mw.Close()
|
mw.Close()
|
||||||
|
|
||||||
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
|
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
|
||||||
@@ -180,6 +198,92 @@ func TestServer_GetBlob_ReturnsContent(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Regression: after POST /v1/snapshot, GET /v1/blob/{sha} must succeed for
|
||||||
|
// every file declared in the uploaded manifest. We discovered during e2e
|
||||||
|
// smoke that the server was storing the tarball + manifest but not
|
||||||
|
// populating the blob store, so subsequent pulls 404'd.
|
||||||
|
func TestServer_PostSnapshot_PopulatesBlobStore(t *testing.T) {
|
||||||
|
s, _, _ := newTestServer(t)
|
||||||
|
|
||||||
|
content := []byte("hello cloud")
|
||||||
|
sha := HashBytes(content)
|
||||||
|
tarBytes := buildTarball(t, map[string][]byte{"options.txt": content})
|
||||||
|
|
||||||
|
manifest := &Manifest{
|
||||||
|
SnapshotID: "01EXTRACTTESTABCDEF",
|
||||||
|
CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC),
|
||||||
|
Files: map[string]FileEntry{
|
||||||
|
"options.txt": {
|
||||||
|
SHA256: sha,
|
||||||
|
Size: int64(len(content)),
|
||||||
|
Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
manifestJSON, _ := json.Marshal(manifest)
|
||||||
|
|
||||||
|
body := &bytes.Buffer{}
|
||||||
|
mw := multipart.NewWriter(body)
|
||||||
|
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
|
||||||
|
_, _ = fw.Write(manifestJSON)
|
||||||
|
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar")
|
||||||
|
_, _ = fw.Write(tarBytes)
|
||||||
|
mw.Close()
|
||||||
|
|
||||||
|
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
|
||||||
|
req.Header.Set("Content-Type", mw.FormDataContentType())
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
s.ServeHTTP(rec, req)
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("upload: got %d body=%s", rec.Code, rec.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now GET /v1/blob/{sha} should succeed and return the original content
|
||||||
|
rec = httptest.NewRecorder()
|
||||||
|
s.ServeHTTP(rec, authReq(t, http.MethodGet, "/v1/blob/"+sha, nil))
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("blob fetch: got %d", rec.Code)
|
||||||
|
}
|
||||||
|
if !bytes.Equal(rec.Body.Bytes(), content) {
|
||||||
|
t.Errorf("blob content mismatch: got %q want %q", rec.Body.String(), content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reject uploads whose tarball contents don't match the manifest's claimed sha.
|
||||||
|
func TestServer_PostSnapshot_RejectsManifestMismatch(t *testing.T) {
|
||||||
|
s, _, _ := newTestServer(t)
|
||||||
|
tarBytes := buildTarball(t, map[string][]byte{"options.txt": []byte("ACTUAL")})
|
||||||
|
|
||||||
|
manifest := &Manifest{
|
||||||
|
SnapshotID: "01MISMATCHTEST00001",
|
||||||
|
CreatedAt: time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC),
|
||||||
|
Files: map[string]FileEntry{
|
||||||
|
"options.txt": {
|
||||||
|
SHA256: HashBytes([]byte("CLAIMED")), // lies about content
|
||||||
|
Size: 6,
|
||||||
|
Mtime: time.Date(2026, 6, 2, 11, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
manifestJSON, _ := json.Marshal(manifest)
|
||||||
|
|
||||||
|
body := &bytes.Buffer{}
|
||||||
|
mw := multipart.NewWriter(body)
|
||||||
|
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
|
||||||
|
_, _ = fw.Write(manifestJSON)
|
||||||
|
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar")
|
||||||
|
_, _ = fw.Write(tarBytes)
|
||||||
|
mw.Close()
|
||||||
|
|
||||||
|
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
|
||||||
|
req.Header.Set("Content-Type", mw.FormDataContentType())
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
s.ServeHTTP(rec, req)
|
||||||
|
if rec.Code != http.StatusBadRequest {
|
||||||
|
t.Errorf("got %d, want 400; body=%s", rec.Code, rec.Body.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestServer_GetBlob_404(t *testing.T) {
|
func TestServer_GetBlob_404(t *testing.T) {
|
||||||
s, _, _ := newTestServer(t)
|
s, _, _ := newTestServer(t)
|
||||||
rec := httptest.NewRecorder()
|
rec := httptest.NewRecorder()
|
||||||
@@ -196,8 +300,8 @@ func TestServer_PostSnapshot_BadManifest_400(t *testing.T) {
|
|||||||
mw := multipart.NewWriter(body)
|
mw := multipart.NewWriter(body)
|
||||||
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
|
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
|
||||||
_, _ = fw.Write([]byte(`{"snapshot_id":"","created_at":"2026-01-01T00:00:00Z","files":{}}`))
|
_, _ = fw.Write([]byte(`{"snapshot_id":"","created_at":"2026-01-01T00:00:00Z","files":{}}`))
|
||||||
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar.zst")
|
fw, _ = mw.CreateFormFile("tarball", "snapshot.tar")
|
||||||
_, _ = fw.Write([]byte("x"))
|
_, _ = fw.Write(buildTarball(t, nil))
|
||||||
mw.Close()
|
mw.Close()
|
||||||
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
|
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
|
||||||
req.Header.Set("Content-Type", mw.FormDataContentType())
|
req.Header.Set("Content-Type", mw.FormDataContentType())
|
||||||
@@ -226,12 +330,15 @@ func TestServer_PostSnapshot_QuotaExceeded_413(t *testing.T) {
|
|||||||
}
|
}
|
||||||
manifestJSON, _ := json.Marshal(manifest)
|
manifestJSON, _ := json.Marshal(manifest)
|
||||||
|
|
||||||
|
// Minimal tar with one 1-byte entry is already 1536+ bytes (header+data+EOF blocks),
|
||||||
|
// well over the 100-byte quota set in this test.
|
||||||
|
tarOver := buildTarball(t, map[string][]byte{"f.txt": []byte("x")})
|
||||||
body := &bytes.Buffer{}
|
body := &bytes.Buffer{}
|
||||||
mw := multipart.NewWriter(body)
|
mw := multipart.NewWriter(body)
|
||||||
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
|
fw, _ := mw.CreateFormFile("manifest", "manifest.json")
|
||||||
_, _ = fw.Write(manifestJSON)
|
_, _ = fw.Write(manifestJSON)
|
||||||
fw, _ = mw.CreateFormFile("tarball", "tar.zst")
|
fw, _ = mw.CreateFormFile("tarball", "tar")
|
||||||
_, _ = fw.Write(make([]byte, 200)) // > 100 byte quota
|
_, _ = fw.Write(tarOver)
|
||||||
mw.Close()
|
mw.Close()
|
||||||
|
|
||||||
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
|
req := authReq(t, http.MethodPost, "/v1/snapshot", body)
|
||||||
|
|||||||
Reference in New Issue
Block a user