Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go/.golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ linters:
- path: _test\.go
linters:
- dupl
- unparam
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer we just fix the linter complaint than add more exceptions - could we just have runWriter always use "out" instead of taking in pathPrefix? (or even just inline //nolint:unparam)

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok i will look into

- path-except: _test\.go
text: use `testutility.GetCurrentWorkingDirectory`
paths:
Expand Down
31 changes: 30 additions & 1 deletion go/cmd/exporter/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package main

import (
"context"
"errors"
"hash/crc32"
"log/slog"
"os"
"path/filepath"
Expand All @@ -11,6 +13,9 @@ import (
"github.com/google/osv.dev/go/osv/clients"
)

// crc32cTable uses the Castagnoli polynomial, matching GCS's own checksum algorithm.
var crc32cTable = crc32.MakeTable(crc32.Castagnoli)

// writeMsg holds the data for a file to be written.
type writeMsg struct {
path string
Expand All @@ -31,7 +36,10 @@ func writer(ctx context.Context, cancel context.CancelFunc, inCh <-chan writeMsg
}
path := filepath.Join(pathPrefix, msg.path)
if client != nil {
// Write to the bucket.
// Skip the upload if the object already has the same content.
if gcsContentUnchanged(ctx, client, path, msg.data) {
break
}
err := client.WriteObject(ctx, path, msg.data, &clients.WriteOptions{
ContentType: msg.mimeType,
})
Expand Down Expand Up @@ -62,3 +70,24 @@ func writer(ctx context.Context, cancel context.CancelFunc, inCh <-chan writeMsg
}
}
}

// gcsContentUnchanged returns true if the object at path already has the same
// CRC32C checksum as data, meaning the upload would be a no-op. Any error
// reading the object's attributes (other than ErrNotFound) is logged and
// treated as "content changed" so the upload proceeds.
func gcsContentUnchanged(ctx context.Context, client clients.CloudStorage, path string, data []byte) bool {
attrs, err := client.ReadObjectAttrs(ctx, path)
if err != nil {
if !errors.Is(err, clients.ErrNotFound) {
logger.WarnContext(ctx, "failed to read object attrs, proceeding with upload", slog.String("path", path), slog.Any("err", err))
}

return false
}
if attrs.CRC32C == crc32.Checksum(data, crc32cTable) {
logger.InfoContext(ctx, "skipping upload, content unchanged", slog.String("path", path))

return true
}
return false
}
134 changes: 134 additions & 0 deletions go/cmd/exporter/writer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package main

import (
"context"
"path/filepath"
"sync"
"testing"

"github.com/google/osv.dev/go/osv/clients"
"github.com/google/osv.dev/go/testutils"
)

// runWriter sends msgs to a writer goroutine and waits for it to finish.
func runWriter(t *testing.T, storage clients.CloudStorage, pathPrefix string, msgs []writeMsg) {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

inCh := make(chan writeMsg, len(msgs))
for _, m := range msgs {
inCh <- m
}
close(inCh)

var wg sync.WaitGroup
wg.Add(1)
go writer(ctx, cancel, inCh, storage, pathPrefix, &wg)
wg.Wait()
}

func TestWriter_GCS_SkipsUnchangedContent(t *testing.T) {
storage := testutils.NewMockStorage()
data := []byte(`{"id":"OSV-1"}`)

// Pre-populate using the same path the writer will compute.
objPath := filepath.Join("out", "OSV-1.json")
if err := storage.WriteObject(t.Context(), objPath, data, nil); err != nil {
t.Fatalf("setup: %v", err)
}
attrsBefore, _ := storage.ReadObjectAttrs(t.Context(), objPath)

runWriter(t, storage, "out", []writeMsg{
{path: "OSV-1.json", mimeType: "application/json", data: data},
})

attrsAfter, err := storage.ReadObjectAttrs(t.Context(), objPath)
if err != nil {
t.Fatalf("ReadObjectAttrs: %v", err)
}
if attrsAfter.Generation != attrsBefore.Generation {
t.Errorf("expected generation %d (skipped), got %d", attrsBefore.Generation, attrsAfter.Generation)
}
}

func TestWriter_GCS_UploadsChangedContent(t *testing.T) {
storage := testutils.NewMockStorage()

objPath := filepath.Join("out", "OSV-1.json")
if err := storage.WriteObject(t.Context(), objPath, []byte(`{"id":"OSV-1","old":true}`), nil); err != nil {
t.Fatalf("setup: %v", err)
}
attrsBefore, _ := storage.ReadObjectAttrs(t.Context(), objPath)

runWriter(t, storage, "out", []writeMsg{
{path: "OSV-1.json", mimeType: "application/json", data: []byte(`{"id":"OSV-1","old":false}`)},
})

attrsAfter, err := storage.ReadObjectAttrs(t.Context(), objPath)
if err != nil {
t.Fatalf("ReadObjectAttrs: %v", err)
}
if attrsAfter.Generation <= attrsBefore.Generation {
t.Errorf("expected generation > %d (uploaded), got %d", attrsBefore.Generation, attrsAfter.Generation)
}
}

func TestWriter_GCS_UploadsNewObject(t *testing.T) {
storage := testutils.NewMockStorage()

runWriter(t, storage, "out", []writeMsg{
{path: "OSV-1.json", mimeType: "application/json", data: []byte(`{"id":"OSV-1"}`)},
})

objPath := filepath.Join("out", "OSV-1.json")
attrs, err := storage.ReadObjectAttrs(t.Context(), objPath)
if err != nil {
t.Fatalf("expected object to exist after upload: %v", err)
}
if attrs.Generation != 1 {
t.Errorf("expected generation 1 for new object, got %d", attrs.Generation)
}
}

func TestWriter_GCS_SkipsMultipleUnchanged(t *testing.T) {
storage := testutils.NewMockStorage()

type entry struct {
msgPath string
objPath string
data []byte
}
entries := []entry{
{"A.json", filepath.Join("out", "A.json"), []byte(`{"id":"A"}`)},
{"B.json", filepath.Join("out", "B.json"), []byte(`{"id":"B"}`)},
{"C.json", filepath.Join("out", "C.json"), []byte(`{"id":"C"}`)},
}
for _, e := range entries {
if err := storage.WriteObject(t.Context(), e.objPath, e.data, nil); err != nil {
t.Fatalf("setup %s: %v", e.objPath, err)
}
}

gensBefore := make(map[string]int64)
for _, e := range entries {
attrs, _ := storage.ReadObjectAttrs(t.Context(), e.objPath)
gensBefore[e.objPath] = attrs.Generation
}

msgs := make([]writeMsg, len(entries))
for i, e := range entries {
msgs[i] = writeMsg{path: e.msgPath, mimeType: "application/json", data: e.data}
}
runWriter(t, storage, "out", msgs)

for _, e := range entries {
attrs, err := storage.ReadObjectAttrs(t.Context(), e.objPath)
if err != nil {
t.Fatalf("ReadObjectAttrs(%s): %v", e.objPath, err)
}
if attrs.Generation != gensBefore[e.objPath] {
t.Errorf("%s: expected generation %d (skipped), got %d", e.objPath, gensBefore[e.objPath], attrs.Generation)
}
}
}
Loading