server: extract tarball into blob store on snapshot upload
CI / validate (push) Successful in 13s
CI / docker (push) Successful in 14s

The deferred 'hardlink blobs from tarball' optimization from DESIGN.md
landed as 'just walk the tarball and write blobs separately' for v1.
GET /v1/blob/{sha} was 404'ing because the blob store was empty —
storage only had snapshots/<id>.tar.zst and a manifest.

Server now:
1. Parses uploaded multipart manifest + tarball
2. Walks the tar entries, computes each entry's sha256
3. Cross-checks against the manifest's declared sha (rejects 400 on mismatch)
4. Writes each blob to <user>/blobs/ via Storage.WriteBlob
5. Then stores the snapshot tarball + manifest as before

2 new tests cover: (a) POST then GET /v1/blob/{sha} round-trip,
(b) manifest-claims-different-sha-than-tarball rejection.

Discovered via e2e smoke against frazclient: pull 404'd on every blob
after a successful push. 33/33 tests pass.
This commit is contained in:
2026-06-02 19:08:56 +02:00
parent a73e254862
commit 2b7290626e
2 changed files with 171 additions and 13 deletions
+51
View File
@@ -1,6 +1,8 @@
package main
import (
"archive/tar"
"bytes"
"context"
"encoding/json"
"errors"
@@ -177,6 +179,14 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who
return
}
// Extract tarball into blob store + verify each entry's hash matches
// the manifest. The tarball is uncompressed plain tar in v1; zstd
// support deferred until Python stdlib gains a zstd module.
if err := s.extractTarToBlobs(who.User, m, tarBytes); err != nil {
writeJSONError(w, http.StatusBadRequest, fmt.Sprintf("extract tarball: %v", err))
return
}
if err := s.storage.StoreSnapshot(who.User, m, tarBytes); err != nil {
writeJSONError(w, http.StatusInternalServerError, fmt.Sprintf("store: %v", err))
return
@@ -187,6 +197,47 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who
})
}
// extractTarToBlobs walks the uploaded tarball, writes each regular file's
// contents into the blob store (content-addressed dedupe), and verifies the
// hash against the manifest entry for that path. Files in the tarball that
// AREN'T declared in the manifest are ignored (defensive: never trust tar
// contents). Manifest entries without a tarball file are allowed (the
// blob may already exist from a previous snapshot).
func (s *Server) extractTarToBlobs(user string, m *Manifest, tarBytes []byte) error {
tr := tar.NewReader(bytes.NewReader(tarBytes))
for {
hdr, err := tr.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("tar read: %w", err)
}
if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA {
continue
}
expected, ok := m.Files[hdr.Name]
if !ok {
// File in tar not declared in manifest. Skip; don't trust it.
continue
}
buf := make([]byte, 0, hdr.Size)
buf, err = io.ReadAll(tr)
if err != nil {
return fmt.Errorf("tar entry %s: %w", hdr.Name, err)
}
sha, err := s.storage.WriteBlob(user, buf)
if err != nil {
return fmt.Errorf("write blob for %s: %w", hdr.Name, err)
}
if sha != expected.SHA256 {
return fmt.Errorf("manifest mismatch for %s: tar sha %s != manifest sha %s",
hdr.Name, sha, expected.SHA256)
}
}
return nil
}
// GET /v1/snapshots
func (s *Server) handleListSnapshots(w http.ResponseWriter, _ *http.Request, who *AuthInfo) {
list, err := s.storage.ListSnapshots(who.User)