package main import ( "crypto/sha256" "encoding/hex" "encoding/json" "errors" "fmt" "io" "os" "path/filepath" "sort" "strings" "sync" ) // Storage owns the on-disk layout described in DESIGN.md: // // //manifest.json latest snapshot's manifest // //snapshots/.tar.zst immutable per-snapshot tarball // //snapshots/.manifest.json historical manifest // //blobs// content-addressed dedupe // // Methods are safe for concurrent use across users; mutations to a single // user are serialized via a per-user mutex. type Storage struct { root string mu sync.Mutex locks map[string]*sync.Mutex // per-user serialization } func NewStorage(root string) (*Storage, error) { if root == "" { return nil, errors.New("storage root is empty") } abs, err := filepath.Abs(root) if err != nil { return nil, fmt.Errorf("resolve storage root: %w", err) } if err := os.MkdirAll(abs, 0o700); err != nil { return nil, fmt.Errorf("create storage root: %w", err) } return &Storage{root: abs, locks: map[string]*sync.Mutex{}}, nil } // lockUser returns a mutex held until the caller calls .Unlock(). Per-user // serialization keeps concurrent pushes from interleaving manifest writes. func (s *Storage) lockUser(user string) *sync.Mutex { s.mu.Lock() defer s.mu.Unlock() m, ok := s.locks[user] if !ok { m = &sync.Mutex{} s.locks[user] = m } m.Lock() return m } func (s *Storage) userDir(user string) string { return filepath.Join(s.root, user) } func (s *Storage) blobsDir(user string) string { return filepath.Join(s.userDir(user), "blobs") } func (s *Storage) snapsDir(user string) string { return filepath.Join(s.userDir(user), "snapshots") } func (s *Storage) manifestPath(user string) string { return filepath.Join(s.userDir(user), "manifest.json") } func (s *Storage) blobPath(user, sha string) string { return filepath.Join(s.blobsDir(user), sha[:2], sha) } // ReadManifest returns the user's latest manifest. Returns os.ErrNotExist // (verbatim) when the user has no snapshots yet so callers can map to HTTP 204. func (s *Storage) ReadManifest(user string) (*Manifest, error) { if err := validateUserID(user); err != nil { return nil, err } f, err := os.Open(s.manifestPath(user)) if err != nil { return nil, err } defer f.Close() var m Manifest if err := json.NewDecoder(f).Decode(&m); err != nil { return nil, fmt.Errorf("decode manifest: %w", err) } return &m, nil } // WriteBlob stores raw file content under blobs//. Returns the // SHA computed from the data. If a blob with the same SHA already exists, // the write is skipped (content-addressed dedupe). Atomic via tmp-rename. func (s *Storage) WriteBlob(user string, data []byte) (string, error) { if err := validateUserID(user); err != nil { return "", err } sum := sha256.Sum256(data) sha := hex.EncodeToString(sum[:]) dest := s.blobPath(user, sha) if _, err := os.Stat(dest); err == nil { return sha, nil // already have it } if err := os.MkdirAll(filepath.Dir(dest), 0o700); err != nil { return "", fmt.Errorf("create blob dir: %w", err) } tmp := dest + ".tmp" if err := os.WriteFile(tmp, data, 0o600); err != nil { return "", fmt.Errorf("write blob tmp: %w", err) } if err := os.Rename(tmp, dest); err != nil { _ = os.Remove(tmp) return "", fmt.Errorf("rename blob: %w", err) } return sha, nil } // ReadBlob opens a blob by SHA. Caller closes. func (s *Storage) ReadBlob(user, sha string) (io.ReadCloser, error) { if err := validateUserID(user); err != nil { return nil, err } if len(sha) != 64 { return nil, fmt.Errorf("invalid sha256: length %d", len(sha)) } return os.Open(s.blobPath(user, sha)) } // StoreSnapshot persists a snapshot atomically: // 1. Write tarball to snapshots/.tar.zst.tmp // 2. Write per-snapshot manifest to snapshots/.manifest.json.tmp // 3. Rename both into place // 4. Update latest user/manifest.json (atomic rename) // // The caller has already written the constituent blobs via WriteBlob before // invoking this — the tarball just bundles them. func (s *Storage) StoreSnapshot(user string, m *Manifest, tarball []byte) error { if err := validateUserID(user); err != nil { return err } if err := m.Validate(); err != nil { return err } unlock := s.lockUser(user) defer unlock.Unlock() if err := os.MkdirAll(s.snapsDir(user), 0o700); err != nil { return fmt.Errorf("create snapshots dir: %w", err) } tarPath := filepath.Join(s.snapsDir(user), m.SnapshotID+".tar.zst") manPath := filepath.Join(s.snapsDir(user), m.SnapshotID+".manifest.json") if _, err := os.Stat(tarPath); err == nil { return fmt.Errorf("snapshot %s already exists", m.SnapshotID) } tarTmp := tarPath + ".tmp" if err := os.WriteFile(tarTmp, tarball, 0o600); err != nil { return fmt.Errorf("write tarball: %w", err) } manBytes, err := json.MarshalIndent(m, "", " ") if err != nil { _ = os.Remove(tarTmp) return fmt.Errorf("marshal manifest: %w", err) } manTmp := manPath + ".tmp" if err := os.WriteFile(manTmp, manBytes, 0o600); err != nil { _ = os.Remove(tarTmp) return fmt.Errorf("write snapshot manifest: %w", err) } if err := os.Rename(tarTmp, tarPath); err != nil { _ = os.Remove(tarTmp) _ = os.Remove(manTmp) return fmt.Errorf("rename tarball: %w", err) } if err := os.Rename(manTmp, manPath); err != nil { _ = os.Remove(manTmp) return fmt.Errorf("rename snapshot manifest: %w", err) } // Update latest manifest. Atomic rename so readers never see partial. latestTmp := s.manifestPath(user) + ".tmp" if err := os.WriteFile(latestTmp, manBytes, 0o600); err != nil { return fmt.Errorf("write latest manifest: %w", err) } if err := os.Rename(latestTmp, s.manifestPath(user)); err != nil { _ = os.Remove(latestTmp) return fmt.Errorf("rename latest manifest: %w", err) } return nil } // SnapshotInfo is one element of ListSnapshots. type SnapshotInfo struct { ID string `json:"id"` CreatedAt string `json:"created_at"` SizeBytes int64 `json:"size_bytes"` } // ListSnapshots returns the user's snapshot history, newest first. func (s *Storage) ListSnapshots(user string) ([]SnapshotInfo, error) { if err := validateUserID(user); err != nil { return nil, err } dir := s.snapsDir(user) entries, err := os.ReadDir(dir) if err != nil { if errors.Is(err, os.ErrNotExist) { return nil, nil } return nil, err } var out []SnapshotInfo for _, e := range entries { if !strings.HasSuffix(e.Name(), ".tar.zst") { continue } id := strings.TrimSuffix(e.Name(), ".tar.zst") info, err := e.Info() if err != nil { continue } // Read the per-snapshot manifest for created_at. manPath := filepath.Join(dir, id+".manifest.json") mf, err := os.Open(manPath) var createdAt string if err == nil { var sm Manifest if json.NewDecoder(mf).Decode(&sm) == nil { createdAt = sm.CreatedAt.UTC().Format("2006-01-02T15:04:05Z") } mf.Close() } out = append(out, SnapshotInfo{ ID: id, CreatedAt: createdAt, SizeBytes: info.Size(), }) } sort.Slice(out, func(i, j int) bool { return out[i].ID > out[j].ID }) return out, nil } // OpenSnapshot returns the historical tarball reader. Caller closes. func (s *Storage) OpenSnapshot(user, id string) (io.ReadCloser, error) { if err := validateUserID(user); err != nil { return nil, err } if !looksLikeSnapshotID(id) { return nil, fmt.Errorf("invalid snapshot id: %q", id) } return os.Open(filepath.Join(s.snapsDir(user), id+".tar.zst")) } // DeleteSnapshot removes a historical snapshot. Refuses to delete the latest // (returns error). The blob GC step is separate. func (s *Storage) DeleteSnapshot(user, id string) error { if err := validateUserID(user); err != nil { return err } if !looksLikeSnapshotID(id) { return fmt.Errorf("invalid snapshot id: %q", id) } unlock := s.lockUser(user) defer unlock.Unlock() // Refuse to delete if id matches the latest manifest latest, err := s.ReadManifest(user) if err == nil && latest.SnapshotID == id { return errors.New("cannot delete latest snapshot") } tarPath := filepath.Join(s.snapsDir(user), id+".tar.zst") manPath := filepath.Join(s.snapsDir(user), id+".manifest.json") if err := os.Remove(tarPath); err != nil && !errors.Is(err, os.ErrNotExist) { return err } if err := os.Remove(manPath); err != nil && !errors.Is(err, os.ErrNotExist) { return err } return nil } // UsageBytes is total bytes occupied by a user's snapshots + blobs. // Cheap walk; called for quota checks. Doesn't double-count files // referenced from both blobs/ and snapshots/. func (s *Storage) UsageBytes(user string) (int64, error) { if err := validateUserID(user); err != nil { return 0, err } var total int64 walk := func(p string) error { return filepath.Walk(p, func(_ string, info os.FileInfo, err error) error { if err != nil { if errors.Is(err, os.ErrNotExist) { return nil } return err } if info.IsDir() { return nil } total += info.Size() return nil }) } for _, d := range []string{s.blobsDir(user), s.snapsDir(user)} { if err := walk(d); err != nil { return 0, err } } return total, nil } // validateUserID guards against directory traversal via user identifiers. // Discord IDs are numeric strings; we accept anything safely path-segment-able. func validateUserID(user string) error { if user == "" { return errors.New("empty user id") } if strings.ContainsAny(user, "/\\.") { return fmt.Errorf("user id contains invalid character: %q", user) } return nil } // looksLikeSnapshotID: ULIDs are 26 chars, Crockford base32. We allow // any non-empty alphanumeric+underscore of reasonable length to keep tests // using simple IDs flexible — the security need here is path-safety, not // strict ULID compliance. func looksLikeSnapshotID(id string) bool { if len(id) == 0 || len(id) > 64 { return false } for _, c := range id { switch { case c >= 'A' && c <= 'Z': case c >= 'a' && c <= 'z': case c >= '0' && c <= '9': case c == '-' || c == '_': default: return false } } return true }