diff --git a/internal/validators/registries/export_test.go b/internal/validators/registries/export_test.go index 89878de24..5a1649ea8 100644 --- a/internal/validators/registries/export_test.go +++ b/internal/validators/registries/export_test.go @@ -7,3 +7,14 @@ package registries // // Intended for cargo_test.go's positive-path and transient-error tests only. var ValidateCargoREADME = validateCargoREADME + +// ValidatePyPIPackage and ValidateNPMPackage expose the package-private +// validatePyPIPackage / validateNPMPackage to the external _test package so +// httptest-driven tests can exercise the metadata fetch and status-disambiguation +// pipeline (missing package vs missing/unpropagated version vs transient upstream) +// against a mock server, bypassing the exact-baseURL guard that the public +// ValidatePyPI / ValidateNPM enforce. +var ( + ValidatePyPIPackage = validatePyPIPackage + ValidateNPMPackage = validateNPMPackage +) diff --git a/internal/validators/registries/npm.go b/internal/validators/registries/npm.go index 7c665a995..a6ed17023 100644 --- a/internal/validators/registries/npm.go +++ b/internal/validators/registries/npm.go @@ -52,6 +52,14 @@ func ValidateNPM(ctx context.Context, pkg model.Package, serverName string) erro pkg.RegistryBaseURL, model.RegistryTypeNPM, model.RegistryURLNPM) } + return validateNPMPackage(ctx, pkg, serverName) +} + +// validateNPMPackage performs the version-metadata fetch and the mcpName check. +// It is split out from ValidateNPM so that httptest-based tests can drive the HTTP +// pipeline against a mock server (exposed via export_test.go), bypassing the +// exact-baseURL guard that ValidateNPM enforces for callers. +func validateNPMPackage(ctx context.Context, pkg model.Package, serverName string) error { client := &http.Client{Timeout: 10 * time.Second} requestURL := pkg.RegistryBaseURL + "/" + url.PathEscape(pkg.Identifier) + "/" + url.PathEscape(pkg.Version) @@ -60,7 +68,7 @@ func ValidateNPM(ctx context.Context, pkg model.Package, serverName string) erro return fmt.Errorf("failed to create request: %w", err) } - req.Header.Set("User-Agent", "MCP-Registry-Validator/1.0") + req.Header.Set("User-Agent", userAgent) req.Header.Set("Accept", "application/json") resp, err := client.Do(req) @@ -70,7 +78,7 @@ func ValidateNPM(ctx context.Context, pkg model.Package, serverName string) erro defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return fmt.Errorf("NPM package '%s' not found (status: %d)", pkg.Identifier, resp.StatusCode) + return npmFetchStatusError(ctx, client, pkg, resp.StatusCode) } var npmResp NPMPackageResponse @@ -88,3 +96,90 @@ func ValidateNPM(ctx context.Context, pkg model.Package, serverName string) erro return nil } + +// npmPackageState is the outcome of probing the package-level NPM metadata +// endpoint, used to disambiguate a 404 from the version-specific endpoint. +type npmPackageState int + +const ( + // npmPackageUnknown: the probe returned a status we can't classify. + npmPackageUnknown npmPackageState = iota + // npmPackageExists: the package exists (200) but the requested version does not. + npmPackageExists + // npmPackageMissing: the package itself does not exist (404). + npmPackageMissing + // npmPackageTransient: the probe failed for a retryable reason (network error, + // 429, or 5xx). Existence is undetermined and the caller should not report + // "not found". + npmPackageTransient +) + +// npmFetchStatusError maps a non-200 status from the version-specific metadata +// endpoint to a caller-actionable error. A 404 is delegated to npmVersion404Error +// for disambiguation. 429/5xx are transient, not "not found". +func npmFetchStatusError(ctx context.Context, client *http.Client, pkg model.Package, status int) error { + switch { + case status == http.StatusNotFound: + return npmVersion404Error(ctx, client, pkg) + case status == http.StatusTooManyRequests: + return fmt.Errorf("NPM rate-limited the metadata request for package '%s' (status: 429). Likely transient, retry later", pkg.Identifier) + case status >= 500 && status < 600: + return fmt.Errorf("NPM upstream error fetching metadata for package '%s' (status: %d). Likely transient, retry later", pkg.Identifier, status) + default: + return fmt.Errorf("NPM package '%s' metadata fetch failed (status: %d)", pkg.Identifier, status) + } +} + +// npmVersion404Error disambiguates a 404 from the version-specific endpoint: a +// genuinely-missing package versus a package that exists but whose requested +// version is absent (commonly because a freshly published release has not yet +// propagated). It probes the package-level endpoint so the publisher gets an +// actionable message rather than a blanket "not found". +func npmVersion404Error(ctx context.Context, client *http.Client, pkg model.Package) error { + switch probeNPMPackage(ctx, client, pkg.RegistryBaseURL, pkg.Identifier) { + case npmPackageExists: + return fmt.Errorf("NPM package '%s' exists, but version '%s' was not found (status: 404). A newly published release can take a moment to appear on the registry. Wait and retry, or publish version '%s' before registering it", pkg.Identifier, pkg.Version, pkg.Version) + case npmPackageMissing: + return fmt.Errorf("NPM package '%s' not found (status: 404)", pkg.Identifier) + case npmPackageTransient: + return fmt.Errorf("NPM could not confirm package '%s' version '%s' (version status: 404, package check inconclusive). Likely transient, retry later", pkg.Identifier, pkg.Version) + case npmPackageUnknown: + // Probe returned an unclassifiable status, so fall through to the + // best-effort message below. + } + return fmt.Errorf("NPM package '%s' version '%s' not found (status: 404)", pkg.Identifier, pkg.Version) +} + +// probeNPMPackage checks whether a package exists on the NPM registry regardless +// of version, with a HEAD request to the package-level endpoint (/{name}). Only +// the status code is used, so HEAD avoids downloading the (large) packument. +func probeNPMPackage(ctx context.Context, client *http.Client, baseURL, identifier string) npmPackageState { + // The probe only refines the 404 error message, so it must not extend the + // validator's worst case by another full client timeout. + ctx, cancel := context.WithTimeout(ctx, 3*time.Second) + defer cancel() + + probeURL := baseURL + "/" + url.PathEscape(identifier) + req, err := http.NewRequestWithContext(ctx, http.MethodHead, probeURL, nil) + if err != nil { + return npmPackageUnknown + } + req.Header.Set("User-Agent", userAgent) + + resp, err := client.Do(req) + if err != nil { + return npmPackageTransient + } + defer resp.Body.Close() + + switch { + case resp.StatusCode == http.StatusOK: + return npmPackageExists + case resp.StatusCode == http.StatusNotFound: + return npmPackageMissing + case resp.StatusCode == http.StatusTooManyRequests, resp.StatusCode >= 500 && resp.StatusCode < 600: + return npmPackageTransient + default: + return npmPackageUnknown + } +} diff --git a/internal/validators/registries/npm_test.go b/internal/validators/registries/npm_test.go index f9bf7da0f..7adb82827 100644 --- a/internal/validators/registries/npm_test.go +++ b/internal/validators/registries/npm_test.go @@ -2,13 +2,186 @@ package registries_test import ( "context" + "fmt" + "io" + "net/http" + "net/http/httptest" + "strings" "testing" + "time" "github.com/modelcontextprotocol/registry/internal/validators/registries" "github.com/modelcontextprotocol/registry/pkg/model" "github.com/stretchr/testify/assert" ) +// newNPMMock stands in for registry.npmjs.org: it routes the version fetch and +// package probe by path shape and returns the given statuses (versionBody used on 200). +func newNPMMock(versionStatus int, versionBody string, packageStatus int) *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Route on the escaped path: a scoped name like @scope/pkg is sent as the + // single segment @scope%2Fpkg, and decoding it would wrongly split it in two. + parts := strings.Split(strings.Trim(r.URL.EscapedPath(), "/"), "/") + // Pin the method per endpoint (GET fetch, HEAD probe) so a method + // regression in the validator surfaces as a 405 instead of passing. + switch len(parts) { + case 2: // /{name}/{version} + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + if versionStatus != http.StatusOK { + w.WriteHeader(versionStatus) + return + } + w.Header().Set("Content-Type", "application/json") + _, _ = io.WriteString(w, versionBody) + case 1: // /{name} (package-existence probe) + if r.Method != http.MethodHead { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + w.WriteHeader(packageStatus) + default: + w.WriteHeader(http.StatusInternalServerError) + } + })) +} + +// TestValidateNPM_VersionNotYetVisible is the #553 regression for npm: version 404s +// while the package exists, so the error must report a missing version, not a missing package. +func TestValidateNPM_VersionNotYetVisible(t *testing.T) { + ctx := context.Background() + mock := newNPMMock(http.StatusNotFound, "", http.StatusOK) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypeNPM, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "9.9.9"} + err := registries.ValidateNPMPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "exists, but version '9.9.9'", "package-exists/version-missing must be distinguished from package-missing") +} + +// TestValidateNPM_PackageMissing: both endpoints 404, so "not found" is correct. +func TestValidateNPM_PackageMissing(t *testing.T) { + ctx := context.Background() + mock := newNPMMock(http.StatusNotFound, "", http.StatusNotFound) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypeNPM, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidateNPMPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "not found") + assert.NotContains(t, err.Error(), "exists, but version", "a genuinely missing package must not claim the version exists") +} + +// TestValidateNPM_TransientUpstream: a 5xx on the version fetch must be retryable, +// not "not found". +func TestValidateNPM_TransientUpstream(t *testing.T) { + ctx := context.Background() + mock := newNPMMock(http.StatusBadGateway, "", http.StatusOK) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypeNPM, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidateNPMPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "transient") + assert.NotContains(t, err.Error(), "not found", "transient upstream errors must not be reported as 'not found'") +} + +// TestValidateNPM_VersionNotFoundProbeInconclusive: version 404 plus a transient +// probe (503) leaves existence undetermined, so the validator must not say "not found". +func TestValidateNPM_VersionNotFoundProbeInconclusive(t *testing.T) { + ctx := context.Background() + mock := newNPMMock(http.StatusNotFound, "", http.StatusServiceUnavailable) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypeNPM, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidateNPMPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "transient") + assert.NotContains(t, err.Error(), "not found", "an inconclusive probe must not assert the package is missing") +} + +// TestValidateNPM_ProbeDeadlineBounded: a hung probe must be cut off by the +// probe's own short deadline instead of riding out the client's full 10s +// timeout, and the cutoff must read as inconclusive rather than "not found". +func TestValidateNPM_ProbeDeadlineBounded(t *testing.T) { + ctx := context.Background() + mock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + parts := strings.Split(strings.Trim(r.URL.EscapedPath(), "/"), "/") + if len(parts) == 2 { // version fetch + w.WriteHeader(http.StatusNotFound) + return + } + // Package probe: hang until the client gives up. + <-r.Context().Done() + })) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypeNPM, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + start := time.Now() + err := registries.ValidateNPMPackage(ctx, pkg, "io.github.test/demo") + elapsed := time.Since(start) + assert.Error(t, err) + assert.Contains(t, err.Error(), "transient", "a probe cut off by its deadline is inconclusive, not 'not found'") + assert.Less(t, elapsed, 8*time.Second, "a hung probe must be bounded by the probe deadline, not the client timeout") +} + +// TestValidateNPM_ScopedVersionNotYetVisible covers scoped names (@scope/name), the +// escaped-single-segment case, through the version fetch and the HEAD probe. +func TestValidateNPM_ScopedVersionNotYetVisible(t *testing.T) { + ctx := context.Background() + mock := newNPMMock(http.StatusNotFound, "", http.StatusOK) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypeNPM, RegistryBaseURL: mock.URL, Identifier: "@scope/demo", Version: "9.9.9"} + err := registries.ValidateNPMPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "exists, but version '9.9.9'", "scoped names must route correctly through both endpoints") +} + +// TestValidateNPM_VersionEndpointRateLimited: a 429 on the version fetch is reported +// as rate-limited/transient. +func TestValidateNPM_VersionEndpointRateLimited(t *testing.T) { + ctx := context.Background() + mock := newNPMMock(http.StatusTooManyRequests, "", http.StatusOK) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypeNPM, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidateNPMPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "rate-limited") + assert.NotContains(t, err.Error(), "not found") +} + +// TestValidateNPM_VersionNotFoundProbeUnclassified: the version 404s and the probe +// returns an unclassifiable status, so the validator falls back to a plain +// version-not-found message. +func TestValidateNPM_VersionNotFoundProbeUnclassified(t *testing.T) { + ctx := context.Background() + mock := newNPMMock(http.StatusNotFound, "", http.StatusTeapot) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypeNPM, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidateNPMPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "version '1.0.0' not found") + assert.NotContains(t, err.Error(), "exists, but version") +} + +// TestValidateNPM_PositivePathMock: a version response with the matching mcpName validates. +func TestValidateNPM_PositivePathMock(t *testing.T) { + ctx := context.Background() + const serverName = "io.github.test/demo" + body := fmt.Sprintf(`{"mcpName":%q}`, serverName) + mock := newNPMMock(http.StatusOK, body, http.StatusOK) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypeNPM, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidateNPMPackage(ctx, pkg, serverName) + assert.NoError(t, err, "a version response with the matching mcpName should validate") +} + func TestValidateNPM_RealPackages(t *testing.T) { ctx := context.Background() @@ -126,6 +299,12 @@ func TestValidateNPM_RealPackages(t *testing.T) { err := registries.ValidateNPM(ctx, pkg, tt.serverName) + // A live 429/5xx from the registry is inconclusive, not a failure + // of the case under test. + if err != nil && strings.Contains(err.Error(), "retry later") { + t.Skipf("transient registry response: %v", err) + } + if tt.expectError { assert.Error(t, err) assert.Contains(t, err.Error(), tt.errorMessage) diff --git a/internal/validators/registries/pypi.go b/internal/validators/registries/pypi.go index ec160bd97..339ed911e 100644 --- a/internal/validators/registries/pypi.go +++ b/internal/validators/registries/pypi.go @@ -50,6 +50,14 @@ func ValidatePyPI(ctx context.Context, pkg model.Package, serverName string) err pkg.RegistryBaseURL, model.RegistryTypePyPI, model.RegistryURLPyPI) } + return validatePyPIPackage(ctx, pkg, serverName) +} + +// validatePyPIPackage performs the version-metadata fetch and the mcp-name token +// check. It is split out from ValidatePyPI so that httptest-based tests can drive +// the HTTP pipeline against a mock server (exposed via export_test.go), bypassing +// the exact-baseURL guard that ValidatePyPI enforces for callers. +func validatePyPIPackage(ctx context.Context, pkg model.Package, serverName string) error { client := &http.Client{Timeout: 10 * time.Second} // PathEscape so an identifier that smuggles "/" or ".." cannot redirect @@ -63,7 +71,7 @@ func ValidatePyPI(ctx context.Context, pkg model.Package, serverName string) err return fmt.Errorf("failed to create request: %w", err) } - req.Header.Set("User-Agent", "MCP-Registry-Validator/1.0") + req.Header.Set("User-Agent", userAgent) req.Header.Set("Accept", "application/json") resp, err := client.Do(req) @@ -73,7 +81,7 @@ func ValidatePyPI(ctx context.Context, pkg model.Package, serverName string) err defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return fmt.Errorf("PyPI package '%s' not found (status: %d)", pkg.Identifier, resp.StatusCode) + return pypiFetchStatusError(ctx, client, pkg, resp.StatusCode) } var pypiResp PyPIPackageResponse @@ -98,3 +106,90 @@ func ValidatePyPI(ctx context.Context, pkg model.Package, serverName string) err return fmt.Errorf("PyPI package '%s' ownership validation failed. The server name '%s' must appear as 'mcp-name: %s' in the package README", pkg.Identifier, serverName, serverName) } + +// pypiPackageState is the outcome of probing the package-level PyPI metadata +// endpoint, used to disambiguate a 404 from the version-specific endpoint. +type pypiPackageState int + +const ( + // pypiPackageUnknown: the probe returned a status we can't classify. + pypiPackageUnknown pypiPackageState = iota + // pypiPackageExists: the package exists (200) but the requested version does not. + pypiPackageExists + // pypiPackageMissing: the package itself does not exist (404). + pypiPackageMissing + // pypiPackageTransient: the probe failed for a retryable reason (network error, + // 429, or 5xx). Existence is undetermined and the caller should not report + // "not found". + pypiPackageTransient +) + +// pypiFetchStatusError maps a non-200 status from the version-specific metadata +// endpoint to a caller-actionable error. A 404 is delegated to pypiVersion404Error +// for disambiguation. 429/5xx are transient, not "not found". +func pypiFetchStatusError(ctx context.Context, client *http.Client, pkg model.Package, status int) error { + switch { + case status == http.StatusNotFound: + return pypiVersion404Error(ctx, client, pkg) + case status == http.StatusTooManyRequests: + return fmt.Errorf("PyPI rate-limited the metadata request for package '%s' (status: 429). Likely transient, retry later", pkg.Identifier) + case status >= 500 && status < 600: + return fmt.Errorf("PyPI upstream error fetching metadata for package '%s' (status: %d). Likely transient, retry later", pkg.Identifier, status) + default: + return fmt.Errorf("PyPI package '%s' metadata fetch failed (status: %d)", pkg.Identifier, status) + } +} + +// pypiVersion404Error disambiguates a 404 from the version-specific endpoint: a +// genuinely-missing package versus a package that exists but whose requested +// version is absent (commonly because a freshly published release has not yet +// propagated). It probes the package-level endpoint so the publisher gets an +// actionable message rather than a blanket "not found". +func pypiVersion404Error(ctx context.Context, client *http.Client, pkg model.Package) error { + switch probePyPIPackage(ctx, client, pkg.RegistryBaseURL, pkg.Identifier) { + case pypiPackageExists: + return fmt.Errorf("PyPI package '%s' exists, but version '%s' was not found (status: 404). A newly published release can take a moment to appear on PyPI. Wait and retry, or publish version '%s' before registering it", pkg.Identifier, pkg.Version, pkg.Version) + case pypiPackageMissing: + return fmt.Errorf("PyPI package '%s' not found (status: 404)", pkg.Identifier) + case pypiPackageTransient: + return fmt.Errorf("PyPI could not confirm package '%s' version '%s' (version status: 404, package check inconclusive). Likely transient, retry later", pkg.Identifier, pkg.Version) + case pypiPackageUnknown: + // Probe returned an unclassifiable status, so fall through to the + // best-effort message below. + } + return fmt.Errorf("PyPI package '%s' version '%s' not found (status: 404)", pkg.Identifier, pkg.Version) +} + +// probePyPIPackage checks whether a package exists on PyPI regardless of version, +// with a HEAD request to the package-level endpoint (/pypi/{name}/json). Only the +// status code is used, so HEAD avoids downloading the (large) all-versions body. +func probePyPIPackage(ctx context.Context, client *http.Client, baseURL, identifier string) pypiPackageState { + // The probe only refines the 404 error message, so it must not extend the + // validator's worst case by another full client timeout. + ctx, cancel := context.WithTimeout(ctx, 3*time.Second) + defer cancel() + + probeURL := fmt.Sprintf("%s/pypi/%s/json", baseURL, url.PathEscape(identifier)) + req, err := http.NewRequestWithContext(ctx, http.MethodHead, probeURL, nil) + if err != nil { + return pypiPackageUnknown + } + req.Header.Set("User-Agent", userAgent) + + resp, err := client.Do(req) + if err != nil { + return pypiPackageTransient + } + defer resp.Body.Close() + + switch { + case resp.StatusCode == http.StatusOK: + return pypiPackageExists + case resp.StatusCode == http.StatusNotFound: + return pypiPackageMissing + case resp.StatusCode == http.StatusTooManyRequests, resp.StatusCode >= 500 && resp.StatusCode < 600: + return pypiPackageTransient + default: + return pypiPackageUnknown + } +} diff --git a/internal/validators/registries/pypi_test.go b/internal/validators/registries/pypi_test.go index aadb05f1e..e240082a9 100644 --- a/internal/validators/registries/pypi_test.go +++ b/internal/validators/registries/pypi_test.go @@ -2,13 +2,174 @@ package registries_test import ( "context" + "fmt" + "io" + "net/http" + "net/http/httptest" + "strings" "testing" + "time" "github.com/modelcontextprotocol/registry/internal/validators/registries" "github.com/modelcontextprotocol/registry/pkg/model" "github.com/stretchr/testify/assert" ) +// newPyPIMock stands in for pypi.org: it routes the version fetch and package +// probe by path shape and returns the given statuses (versionBody is used on 200). +func newPyPIMock(versionStatus int, versionBody string, packageStatus int) *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Route on the escaped path so an identifier containing an encoded + // separator is not silently re-split into extra segments. + parts := strings.Split(strings.Trim(r.URL.EscapedPath(), "/"), "/") + // Pin the method per endpoint (GET fetch, HEAD probe) so a method + // regression in the validator surfaces as a 405 instead of passing. + switch len(parts) { + case 4: // /pypi/{name}/{version}/json + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + if versionStatus != http.StatusOK { + w.WriteHeader(versionStatus) + return + } + w.Header().Set("Content-Type", "application/json") + _, _ = io.WriteString(w, versionBody) + case 3: // /pypi/{name}/json (package-existence probe) + if r.Method != http.MethodHead { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + w.WriteHeader(packageStatus) + default: + w.WriteHeader(http.StatusInternalServerError) + } + })) +} + +// TestValidatePyPI_VersionNotYetVisible is the #553 regression: version 404s while +// the package exists, so the error must report a missing version, not a missing package. +func TestValidatePyPI_VersionNotYetVisible(t *testing.T) { + ctx := context.Background() + mock := newPyPIMock(http.StatusNotFound, "", http.StatusOK) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypePyPI, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "9.9.9"} + err := registries.ValidatePyPIPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "exists, but version '9.9.9'", "package-exists/version-missing must be distinguished from package-missing") +} + +// TestValidatePyPI_PackageMissing: both the version and the package endpoints 404, +// so the package genuinely does not exist and "not found" is correct. +func TestValidatePyPI_PackageMissing(t *testing.T) { + ctx := context.Background() + mock := newPyPIMock(http.StatusNotFound, "", http.StatusNotFound) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypePyPI, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidatePyPIPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "not found") + assert.NotContains(t, err.Error(), "exists, but version", "a genuinely missing package must not claim the version exists") +} + +// TestValidatePyPI_TransientUpstream: a 5xx on the version fetch is upstream +// availability, not "package missing", and must be reported as retryable. +func TestValidatePyPI_TransientUpstream(t *testing.T) { + ctx := context.Background() + mock := newPyPIMock(http.StatusServiceUnavailable, "", http.StatusOK) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypePyPI, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidatePyPIPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "transient") + assert.NotContains(t, err.Error(), "not found", "transient upstream errors must not be reported as 'not found'") +} + +// TestValidatePyPI_VersionNotFoundProbeInconclusive: version 404 plus a transient +// probe (429) leaves existence undetermined, so the validator must not say "not found". +func TestValidatePyPI_VersionNotFoundProbeInconclusive(t *testing.T) { + ctx := context.Background() + mock := newPyPIMock(http.StatusNotFound, "", http.StatusTooManyRequests) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypePyPI, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidatePyPIPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "transient") + assert.NotContains(t, err.Error(), "not found", "an inconclusive probe must not assert the package is missing") +} + +// TestValidatePyPI_ProbeDeadlineBounded: a hung probe must be cut off by the +// probe's own short deadline instead of riding out the client's full 10s +// timeout, and the cutoff must read as inconclusive rather than "not found". +func TestValidatePyPI_ProbeDeadlineBounded(t *testing.T) { + ctx := context.Background() + mock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + parts := strings.Split(strings.Trim(r.URL.EscapedPath(), "/"), "/") + if len(parts) == 4 { // version fetch + w.WriteHeader(http.StatusNotFound) + return + } + // Package probe: hang until the client gives up. + <-r.Context().Done() + })) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypePyPI, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + start := time.Now() + err := registries.ValidatePyPIPackage(ctx, pkg, "io.github.test/demo") + elapsed := time.Since(start) + assert.Error(t, err) + assert.Contains(t, err.Error(), "transient", "a probe cut off by its deadline is inconclusive, not 'not found'") + assert.Less(t, elapsed, 8*time.Second, "a hung probe must be bounded by the probe deadline, not the client timeout") +} + +// TestValidatePyPI_VersionEndpointRateLimited: a 429 on the version fetch is reported +// as rate-limited/transient. +func TestValidatePyPI_VersionEndpointRateLimited(t *testing.T) { + ctx := context.Background() + mock := newPyPIMock(http.StatusTooManyRequests, "", http.StatusOK) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypePyPI, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidatePyPIPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "rate-limited") + assert.NotContains(t, err.Error(), "not found") +} + +// TestValidatePyPI_VersionNotFoundProbeUnclassified: the version 404s and the probe +// returns an unclassifiable status, so the validator falls back to a plain +// version-not-found message without claiming the package is present or absent. +func TestValidatePyPI_VersionNotFoundProbeUnclassified(t *testing.T) { + ctx := context.Background() + mock := newPyPIMock(http.StatusNotFound, "", http.StatusTeapot) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypePyPI, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidatePyPIPackage(ctx, pkg, "io.github.test/demo") + assert.Error(t, err) + assert.Contains(t, err.Error(), "version '1.0.0' not found") + assert.NotContains(t, err.Error(), "exists, but version") +} + +// TestValidatePyPI_PositivePathMock: a version README carrying the exact mcp-name token validates. +func TestValidatePyPI_PositivePathMock(t *testing.T) { + ctx := context.Background() + const serverName = "io.github.test/demo" + body := fmt.Sprintf(`{"info":{"description":"# Demo\n\nmcp-name: %s\n"}}`, serverName) + mock := newPyPIMock(http.StatusOK, body, http.StatusOK) + defer mock.Close() + + pkg := model.Package{RegistryType: model.RegistryTypePyPI, RegistryBaseURL: mock.URL, Identifier: "demo-pkg", Version: "1.0.0"} + err := registries.ValidatePyPIPackage(ctx, pkg, serverName) + assert.NoError(t, err, "a version README containing the exact mcp-name token should validate") +} + func TestValidatePyPI_RealPackages(t *testing.T) { ctx := context.Background() @@ -79,6 +240,12 @@ func TestValidatePyPI_RealPackages(t *testing.T) { err := registries.ValidatePyPI(ctx, pkg, tt.serverName) + // A live 429/5xx from the registry is inconclusive, not a failure + // of the case under test. + if err != nil && strings.Contains(err.Error(), "retry later") { + t.Skipf("transient registry response: %v", err) + } + if tt.expectError { assert.Error(t, err) assert.Contains(t, err.Error(), tt.errorMessage)