diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index d587f98e806..e74cf218fd0 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -170,6 +170,7 @@ func main() { close(jobs) wg.Wait() + timesBlocked := int64(0) if gcsHelper != nil { timesBlocked = gcsHelper.GetTimesBlocked() diff --git a/vulnfeeds/cmd/mirrors/cpe-repo-gen/main.go b/vulnfeeds/cmd/mirrors/cpe-repo-gen/main.go index 153a0fab545..27e853df36d 100644 --- a/vulnfeeds/cmd/mirrors/cpe-repo-gen/main.go +++ b/vulnfeeds/cmd/mirrors/cpe-repo-gen/main.go @@ -385,7 +385,7 @@ func analyzeCPEDictionary(cpes []CPE) (productToRepo c.VendorProductToRepoMap, d logger.Info("Disregarding derived repo", slog.String("repo", repo), slog.String("vendor", vp.Vendor), slog.String("product", vp.Product), slog.Any("err", err)) continue } - if !git.ValidRepoAndHasUsableRefs(repo) { + if valid, _ := git.ValidRepoAndHasUsableRefs(repo); !valid { logger.Info("Disregarding derived repo as unusable", slog.String("repo", repo), slog.String("vendor", vp.Vendor), slog.String("product", vp.Product)) continue } @@ -407,7 +407,7 @@ func validateRepos(prm c.VendorProductToRepoMap) (validated c.VendorProductToRep entryCount++ // As a side-effect, this also omits any with no repos. for _, r := range prm[vp] { - if !git.ValidRepoAndHasUsableRefs(r) { + if valid, _ := git.ValidRepoAndHasUsableRefs(r); !valid { logger.Info("Invalid repo", slog.Int("count", entryCount), slog.Int("total", len(prm)), slog.String("repo", r), slog.String("vendor", vp.Vendor), slog.String("product", vp.Product)) continue } diff --git a/vulnfeeds/conversion/versions.go b/vulnfeeds/conversion/versions.go index 6a8b6dbbed7..74553503fa3 100644 --- a/vulnfeeds/conversion/versions.go +++ b/vulnfeeds/conversion/versions.go @@ -25,6 +25,7 @@ import ( "slices" "strings" "sync" + "time" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" @@ -1058,7 +1059,7 @@ func (c *VPRepoCache) MaybeUpdate(vp *VendorProduct, repo string) { return } // Avoid polluting the cache with existent-but-useless repos. - if git.ValidRepoAndHasUsableRefs(repo) { + if valid, _ := git.ValidRepoAndHasUsableRefs(repo); valid { c.m[*vp] = append(c.m[*vp], repo) } } @@ -1232,11 +1233,7 @@ func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Re continue } // If it's any other repo-shaped URL, it's only useful if it has tags. - if (err == nil && !git.ValidRepo(repo)) || (err != nil && !git.ValidRepoAndHasUsableRefs(repo)) { - if repoTagsCache != nil { - repoTagsCache.SetInvalid(repo) - } - + if isValid, _ := validateRepo(repo, err == nil, repoTagsCache); !isValid { continue } repos = append(repos, repo) @@ -1281,3 +1278,22 @@ func ReposFromReferencesCVEList(refs []models.Reference, tagDenyList []string, m return repos } + +func validateRepo(repo string, isCommit bool, cache git.RepoTagsCache) (bool, error) { + var valid bool + var err error + if isCommit { + valid, err = git.ValidRepo(repo) + } else { + valid, err = git.ValidRepoAndHasUsableRefs(repo) + } + if !valid && cache != nil { + if err != nil && (errors.Is(err, git.ErrRateLimit) || strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "Too Many Requests")) { + cache.SetInvalidWithTTL(repo, 1*time.Hour) + } else { + cache.SetInvalid(repo) + } + } + + return valid, err +} diff --git a/vulnfeeds/git/repository.go b/vulnfeeds/git/repository.go index 714adb316a7..977cd194405 100644 --- a/vulnfeeds/git/repository.go +++ b/vulnfeeds/git/repository.go @@ -82,6 +82,7 @@ type RepoTagsCache interface { Get(repo string) (RepoTagsMap, bool) Set(repo string, tags RepoTagsMap) SetInvalid(repo string) + SetInvalidWithTTL(repo string, ttl time.Duration) IsInvalid(repo string) bool SetCanonicalLink(repo string, canonicalLink string) GetCanonicalLink(repo string) (string, bool) @@ -124,6 +125,10 @@ func (c *InMemoryRepoTagsCache) SetInvalid(repo string) { c.invalid[repo] = true } +func (c *InMemoryRepoTagsCache) SetInvalidWithTTL(repo string, _ time.Duration) { + c.SetInvalid(repo) +} + func (c *InMemoryRepoTagsCache) IsInvalid(repo string) bool { c.RLock() defer c.RUnlock() @@ -191,11 +196,14 @@ func (c *RedisRepoTagsCache) Set(repo string, tags RepoTagsMap) { } func (c *RedisRepoTagsCache) SetInvalid(repo string) { + c.SetInvalidWithTTL(repo, randomTTL()) +} + +func (c *RedisRepoTagsCache) SetInvalidWithTTL(repo string, ttl time.Duration) { ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) defer cancel() key := "invalid_repo:" + repo - ttl := randomTTL() c.redisClient.Set(ctx, key, "true", ttl) } @@ -246,7 +254,9 @@ func NewRepoTagsCache() RepoTagsCache { Protocol: 2, }) - return &RedisRepoTagsCache{redisClient: client} + return &RedisRepoTagsCache{ + redisClient: client, + } } return &InMemoryRepoTagsCache{ @@ -412,16 +422,14 @@ func RepoTags(repoURL string, repoTagsCache RepoTagsCache) (tags Tags, e error) } } // Cache miss. - var refs []*plumbing.Reference - var err error - if os.Getenv("GITTER_HOST") != "" { - refs, err = gitterRepoRefs(repoURL) - } else { - refs, err = RemoteRepoRefsWithRetry(repoURL, 3) - } + refs, err := getRepoRefs(repoURL) if err != nil { if repoTagsCache != nil { - repoTagsCache.SetInvalid(repoURL) + if errors.Is(err, ErrRateLimit) || strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "Too Many Requests") { + repoTagsCache.SetInvalidWithTTL(repoURL, 1*time.Hour) + } else { + repoTagsCache.SetInvalid(repoURL) + } } return tags, err @@ -547,34 +555,28 @@ func RefBranches(refs []*plumbing.Reference) (branches []*plumbing.Reference) { return branches } -// Validate the repo by attempting to query it's references. -// *** Does external calls to verify repos *** -func ValidRepo(repoURL string) bool { +func getRepoRefs(repoURL string) ([]*plumbing.Reference, error) { if os.Getenv("GITTER_HOST") != "" { - _, err := gitterRepoRefs(repoURL) - return err == nil + return gitterRepoRefs(repoURL) } - _, err := RemoteRepoRefsWithRetry(repoURL, 3) - return err == nil + return RemoteRepoRefsWithRetry(repoURL, 3) } -// Otherwise functional repos that don't have any tags are not valid. +// Validate the repo by attempting to query it's references. // *** Does external calls to verify repos *** -func ValidRepoAndHasUsableRefs(repoURL string) (valid bool) { - if os.Getenv("GITTER_HOST") != "" { - refs, err := gitterRepoRefs(repoURL) - if err != nil || len(refs) == 0 { - return false - } +func ValidRepo(repoURL string) (bool, error) { + _, err := getRepoRefs(repoURL) + return err == nil, err +} - return len(RefTags(refs)) > 0 - } - refs, err := RemoteRepoRefsWithRetry(repoURL, 3) - // Return false if there's an error, or if the repo has no refs (e.g. is empty) +// Otherwise functional repos that don't have any tags are not valid. +// *** Does external calls to verify repos *** +func ValidRepoAndHasUsableRefs(repoURL string) (bool, error) { + refs, err := getRepoRefs(repoURL) if err != nil || len(refs) == 0 { - return false + return false, err } - // Repos with no tags aren't useful. - return len(RefTags(refs)) > 0 + + return len(RefTags(refs)) > 0, nil } diff --git a/vulnfeeds/git/repository_test.go b/vulnfeeds/git/repository_test.go index 89dd8075b26..5d950150982 100644 --- a/vulnfeeds/git/repository_test.go +++ b/vulnfeeds/git/repository_test.go @@ -383,7 +383,7 @@ func TestValidRepo(t *testing.T) { if time.Now().Before(tc.disableExpiryDate) { t.Skipf("test %q: TestValidRepo(%q) has been skipped due to known outage and will be reenabled on %s.", tc.description, tc.repoURL, tc.disableExpiryDate) } - got := ValidRepoAndHasUsableRefs(tc.repoURL) + got, _ := ValidRepoAndHasUsableRefs(tc.repoURL) if diff := cmp.Diff(got, tc.expectedResult); diff != "" { t.Errorf("test %q: ValidRepo(%q) was incorrect: %s", tc.description, tc.repoURL, diff) t.Logf("Confirm that %s is reachable with `git ls-remote %s`", tc.repoURL, tc.repoURL) @@ -396,7 +396,7 @@ func TestInvalidRepos(t *testing.T) { testutils.SetupGitVCR(t) redundantRepos := []string{} for _, repo := range models.InvalidRepos { - if !ValidRepoAndHasUsableRefs(repo) { + if valid, _ := ValidRepoAndHasUsableRefs(repo); !valid { redundantRepos = append(redundantRepos, repo) } } @@ -537,12 +537,37 @@ func TestValidRepoWithGitter(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { responseStatus = tc.status - if got := ValidRepo(tc.repo); got != tc.wantValidRepo { + if got, _ := ValidRepo(tc.repo); got != tc.wantValidRepo { t.Errorf("ValidRepo(%q) = %v, want %v", tc.repo, got, tc.wantValidRepo) } - if got := ValidRepoAndHasUsableRefs(tc.repo); got != tc.wantValidRepoAndHasRefs { + if got, _ := ValidRepoAndHasUsableRefs(tc.repo); got != tc.wantValidRepoAndHasRefs { t.Errorf("ValidRepoAndHasUsableRefs(%q) = %v, want %v", tc.repo, got, tc.wantValidRepoAndHasRefs) } }) } } + +func TestInMemoryRepoTagsCache(t *testing.T) { + cache := &InMemoryRepoTagsCache{} + repo := "https://github.com/example/test-repo" + + // Test that it starts as valid + if cache.IsInvalid(repo) { + t.Errorf("Expected repo to be valid initially") + } + + // Test SetInvalid + cache.SetInvalid(repo) + if !cache.IsInvalid(repo) { + t.Errorf("Expected repo to be invalid after SetInvalid") + } + + // Reset the cache + cache = &InMemoryRepoTagsCache{} + + // Test SetInvalidWithTTL (which behaves as SetInvalid) + cache.SetInvalidWithTTL(repo, 1*time.Hour) + if !cache.IsInvalid(repo) { + t.Errorf("Expected repo to be invalid after SetInvalidWithTTL") + } +}