server: extract tarball into blob store on snapshot upload
The deferred 'hardlink blobs from tarball' optimization from DESIGN.md
landed as 'just walk the tarball and write blobs separately' for v1.
GET /v1/blob/{sha} was 404'ing because the blob store was empty —
storage only had snapshots/<id>.tar.zst and a manifest.
Server now:
1. Parses uploaded multipart manifest + tarball
2. Walks the tar entries, computes each entry's sha256
3. Cross-checks against the manifest's declared sha (rejects 400 on mismatch)
4. Writes each blob to <user>/blobs/ via Storage.WriteBlob
5. Then stores the snapshot tarball + manifest as before
2 new tests cover: (a) POST then GET /v1/blob/{sha} round-trip,
(b) manifest-claims-different-sha-than-tarball rejection.
Discovered via e2e smoke against frazclient: pull 404'd on every blob
after a successful push. 33/33 tests pass.
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@@ -177,6 +179,14 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who
|
||||
return
|
||||
}
|
||||
|
||||
// Extract tarball into blob store + verify each entry's hash matches
|
||||
// the manifest. The tarball is uncompressed plain tar in v1; zstd
|
||||
// support deferred until Python stdlib gains a zstd module.
|
||||
if err := s.extractTarToBlobs(who.User, m, tarBytes); err != nil {
|
||||
writeJSONError(w, http.StatusBadRequest, fmt.Sprintf("extract tarball: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
if err := s.storage.StoreSnapshot(who.User, m, tarBytes); err != nil {
|
||||
writeJSONError(w, http.StatusInternalServerError, fmt.Sprintf("store: %v", err))
|
||||
return
|
||||
@@ -187,6 +197,47 @@ func (s *Server) handlePostSnapshot(w http.ResponseWriter, r *http.Request, who
|
||||
})
|
||||
}
|
||||
|
||||
// extractTarToBlobs walks the uploaded tarball, writes each regular file's
|
||||
// contents into the blob store (content-addressed dedupe), and verifies the
|
||||
// hash against the manifest entry for that path. Files in the tarball that
|
||||
// AREN'T declared in the manifest are ignored (defensive: never trust tar
|
||||
// contents). Manifest entries without a tarball file are allowed (the
|
||||
// blob may already exist from a previous snapshot).
|
||||
func (s *Server) extractTarToBlobs(user string, m *Manifest, tarBytes []byte) error {
|
||||
tr := tar.NewReader(bytes.NewReader(tarBytes))
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("tar read: %w", err)
|
||||
}
|
||||
if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA {
|
||||
continue
|
||||
}
|
||||
expected, ok := m.Files[hdr.Name]
|
||||
if !ok {
|
||||
// File in tar not declared in manifest. Skip; don't trust it.
|
||||
continue
|
||||
}
|
||||
buf := make([]byte, 0, hdr.Size)
|
||||
buf, err = io.ReadAll(tr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("tar entry %s: %w", hdr.Name, err)
|
||||
}
|
||||
sha, err := s.storage.WriteBlob(user, buf)
|
||||
if err != nil {
|
||||
return fmt.Errorf("write blob for %s: %w", hdr.Name, err)
|
||||
}
|
||||
if sha != expected.SHA256 {
|
||||
return fmt.Errorf("manifest mismatch for %s: tar sha %s != manifest sha %s",
|
||||
hdr.Name, sha, expected.SHA256)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GET /v1/snapshots
|
||||
func (s *Server) handleListSnapshots(w http.ResponseWriter, _ *http.Request, who *AuthInfo) {
|
||||
list, err := s.storage.ListSnapshots(who.User)
|
||||
|
||||
Reference in New Issue
Block a user