diff --git a/app/src/App/API.purs b/app/src/App/API.purs index afabd0bc..7fb542ba 100644 --- a/app/src/App/API.purs +++ b/app/src/App/API.purs @@ -2,6 +2,7 @@ module Registry.App.API ( AuthenticatedEffects , COMPILER_CACHE , CompilerCache(..) + , LicenseValidationError(..) , PackageSetUpdateEffects , PublishEffects , _compilerCache @@ -12,8 +13,10 @@ module Registry.App.API , getPacchettiBotti , packageSetUpdate , packagingTeam + , printLicenseValidationError , publish , removeIgnoredTarballFiles + , validateLicense ) where import Registry.App.Prelude @@ -53,6 +56,7 @@ import Parsing.Combinators.Array as Parsing.Combinators.Array import Parsing.String as Parsing.String import Registry.API.V1 (PackageSetJobData) import Registry.App.Auth as Auth +import Registry.App.CLI.Licensee as Licensee import Registry.App.CLI.Purs (CompilerFailure(..), compilerFailureCodec) import Registry.App.CLI.Purs as Purs import Registry.App.CLI.PursVersions as PursVersions @@ -90,6 +94,7 @@ import Registry.Foreign.Octokit (Team) import Registry.Foreign.Tmp as Tmp import Registry.Internal.Codec as Internal.Codec import Registry.Internal.Path as Internal.Path +import Registry.License as License import Registry.Location as Location import Registry.Manifest as Manifest import Registry.Metadata as Metadata @@ -519,6 +524,11 @@ publish maybeLegacyIndex payload = do when (Operation.Validation.isMetadataPackage (Manifest receivedManifest)) do Except.throw "The `metadata` package cannot be uploaded to the registry because it is a protected package." + Log.info "Verifying licenses are consistent among manifest files..." + validateLicense downloadedPackage receivedManifest.license >>= case _ of + Nothing -> Log.debug "License validation passed." + Just err -> Except.throw $ printLicenseValidationError err + for_ (Operation.Validation.isNotUnpublished (Manifest receivedManifest) (Metadata metadata)) \info -> do Except.throw $ String.joinWith "\n" [ "You tried to upload a version that has been unpublished: " <> Version.print receivedManifest.version @@ -1322,3 +1332,82 @@ instance FsEncodable CompilerCache where } Exists.mkExists $ Cache.AsJson cacheKey codec next + +-- | Errors that can occur when validating license consistency +data LicenseValidationError = LicenseMismatch + { manifestLicense :: License + , detectedLicenses :: Array License + } + +derive instance Eq LicenseValidationError + +printLicenseValidationError :: LicenseValidationError -> String +printLicenseValidationError = case _ of + LicenseMismatch { manifestLicense, detectedLicenses } -> Array.fold + [ "License mismatch: The manifest specifies license '" + , License.print manifestLicense + , "' but the following license(s) were detected in your repository: " + , String.joinWith ", " (map License.print detectedLicenses) + , ". Please ensure your manifest license accurately represents all licenses " + , "in your repository. If multiple licenses apply, join them using SPDX " + , "conjunctions (e.g., 'MIT AND Apache-2.0' or 'MIT OR Apache-2.0')." + ] + +-- | Validate that the license in the manifest is consistent with licenses +-- | detected in the repository (LICENSE file, package.json, bower.json). +-- | +-- | This check ensures that the SPDX identifier asserted in the package +-- | manifest accurately represents the licenses present in the repository. +-- | If multiple distinct licenses are detected, they must all be represented +-- | in the manifest license (e.g., joined with AND or OR). +validateLicense :: forall r. FilePath -> License -> Run (LOG + AFF + r) (Maybe LicenseValidationError) +validateLicense packageDir manifestLicense = do + Log.debug "Detecting licenses from repository files..." + detected <- Run.liftAff $ Licensee.detect packageDir + case detected of + Left err -> do + Log.warn $ "License detection failed, relying on manifest: " <> err + pure Nothing + Right detectedStrings -> do + let + parsedLicenses :: Array License + parsedLicenses = Array.mapMaybe (hush <<< License.parse) detectedStrings + + Log.debug $ "Detected licenses: " <> String.joinWith ", " detectedStrings + + if Array.null parsedLicenses then do + Log.debug "No licenses detected from repository files, nothing to validate." + pure Nothing + else case License.extractIds manifestLicense of + Left err -> do + -- This shouldn't be possible (we have already validated the license) + -- as part of constructing the manifest + Log.warn $ "Could not extract license IDs from manifest: " <> err + pure Nothing + Right manifestIds -> do + let + manifestIdSet = Set.fromFoldable manifestIds + + -- A detected license is covered if all its IDs are in the manifest IDs + isCovered :: License -> Boolean + isCovered license = case License.extractIds license of + Left _ -> false + Right ids -> Array.all (\id -> Set.member id manifestIdSet) ids + + uncoveredLicenses :: Array License + uncoveredLicenses = Array.filter (not <<< isCovered) parsedLicenses + + if Array.null uncoveredLicenses then do + Log.debug "All detected licenses are covered by the manifest license." + pure Nothing + else do + Log.warn $ Array.fold + [ "License mismatch detected: manifest has '" + , License.print manifestLicense + , "' but detected " + , String.joinWith ", " (map License.print parsedLicenses) + ] + pure $ Just $ LicenseMismatch + { manifestLicense + , detectedLicenses: uncoveredLicenses + } diff --git a/app/test/App/API.purs b/app/test/App/API.purs index 28f17f90..36d9a488 100644 --- a/app/test/App/API.purs +++ b/app/test/App/API.purs @@ -14,6 +14,7 @@ import Effect.Ref as Ref import Node.FS.Aff as FS.Aff import Node.Path as Path import Node.Process as Process +import Registry.App.API (LicenseValidationError(..), validateLicense) import Registry.App.API as API import Registry.App.CLI.Tar as Tar import Registry.App.Effect.Env as Env @@ -27,6 +28,7 @@ import Registry.Foreign.FSExtra as FS.Extra import Registry.Foreign.FastGlob as FastGlob import Registry.Foreign.Tmp as Tmp import Registry.Internal.Codec as Internal.Codec +import Registry.License as License import Registry.Manifest as Manifest import Registry.ManifestIndex as ManifestIndex import Registry.PackageName as PackageName @@ -57,6 +59,9 @@ spec = do Spec.describe "Verifies build plans" do checkBuildPlanToResolutions + Spec.describe "Validates licenses match" do + licenseValidation + Spec.describe "Includes correct files in tarballs" do removeIgnoredTarballFiles copySourceFiles @@ -441,3 +446,47 @@ copySourceFiles = Spec.hoistSpec identity (\_ -> Assert.Run.runBaseEffects) $ Sp writeFiles = Run.liftAff <<< traverse_ (\path -> FS.Aff.writeTextFile UTF8 (inTmp path) "module Module where") pure { source: tmp, destination: destTmp, writeDirectories, writeFiles } + +licenseValidation :: Spec.Spec Unit +licenseValidation = do + let fixtures = Path.concat [ "app", "fixtures", "licenses", "halogen-hooks" ] + + Spec.describe "validateLicense" do + Spec.it "Passes when manifest license covers all detected licenses" do + -- The halogen-hooks fixture has MIT in LICENSE and Apache-2.0 in package.json + let manifestLicense = unsafeLicense "MIT AND Apache-2.0" + result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense + Assert.shouldEqual Nothing result + + Spec.it "Fails when manifest license does not cover a detected license" do + -- Manifest says MIT only, but Apache-2.0 is also in package.json + let manifestLicense = unsafeLicense "MIT" + result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense + case result of + Just (LicenseMismatch { detectedLicenses }) -> + -- Should detect that Apache-2.0 is not covered + Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0" + _ -> + Assert.fail "Expected LicenseMismatch error" + + Spec.it "Fails when manifest has completely different license" do + -- Manifest says BSD-3-Clause, but fixture has MIT and Apache-2.0 + let manifestLicense = unsafeLicense "BSD-3-Clause" + result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense + case result of + Just (LicenseMismatch { manifestLicense: ml, detectedLicenses }) -> do + Assert.shouldEqual "BSD-3-Clause" (License.print ml) + -- Both MIT and Apache-2.0 should be in the detected licenses + Assert.shouldContain (map License.print detectedLicenses) "MIT" + Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0" + _ -> + Assert.fail "Expected LicenseMismatch error" + + Spec.it "Passes when manifest uses OR conjunction" do + -- OR conjunction is also valid - means either license applies + let manifestLicense = unsafeLicense "MIT OR Apache-2.0" + result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense + Assert.shouldEqual Nothing result + +unsafeLicense :: String -> License +unsafeLicense str = unsafeFromRight $ License.parse str diff --git a/lib/src/License.js b/lib/src/License.js index 451fc774..1faaccfe 100644 --- a/lib/src/License.js +++ b/lib/src/License.js @@ -8,3 +8,34 @@ export const parseSPDXLicenseIdImpl = (onError, onSuccess, identifier) => { return onError(`Invalid SPDX identifier ${identifier}`); } }; + +// Extract all license IDs from a parsed SPDX expression AST. +// The AST structure from spdx-expression-parse is: +// - Simple: { license: 'MIT' } +// - With exception: { license: 'GPL-2.0', exception: 'Classpath-exception-2.0' } +// - Compound: { left: {...}, conjunction: 'and'|'or', right: {...} } +const extractLicenseIds = (ast) => { + const ids = new Set(); + + const walk = (node) => { + if (!node) return; + if (node.license) { + // Normalize to uppercase for case-insensitive comparison + ids.add(node.license.toUpperCase()); + } + if (node.left) walk(node.left); + if (node.right) walk(node.right); + }; + + walk(ast); + return Array.from(ids); +}; + +export const extractLicenseIdsImpl = (onError, onSuccess, expression) => { + try { + const ast = parse(expression); + return onSuccess(extractLicenseIds(ast)); + } catch (_) { + return onError(`Invalid SPDX expression: ${expression}`); + } +}; diff --git a/lib/src/License.purs b/lib/src/License.purs index fa5ab972..8e8d1678 100644 --- a/lib/src/License.purs +++ b/lib/src/License.purs @@ -9,6 +9,7 @@ module Registry.License ( License , SPDXConjunction(..) , codec + , extractIds , joinWith , parse , print @@ -52,6 +53,14 @@ foreign import parseSPDXLicenseIdImpl :: forall r. Fn3 (String -> r) (String -> parse :: String -> Either String License parse = runFn3 parseSPDXLicenseIdImpl Left (Right <<< License) +foreign import extractLicenseIdsImpl :: forall r. Fn3 (String -> r) (Array String -> r) String r + +-- | Extract all license identifiers from a SPDX expression. +-- | Returns an array of uppercase license IDs for case-insensitive comparison. +-- | For example, "MIT AND Apache-2.0" returns ["MIT", "APACHE-2.0"]. +extractIds :: License -> Either String (Array String) +extractIds (License expr) = runFn3 extractLicenseIdsImpl Left Right expr + -- | A valid conjunction for SPDX license identifiers. AND means that both -- | licenses must be satisfied; OR means that at least one license must be -- | satisfied. diff --git a/lib/test/Registry/License.purs b/lib/test/Registry/License.purs index d2a6a7cd..41e914a8 100644 --- a/lib/test/Registry/License.purs +++ b/lib/test/Registry/License.purs @@ -44,6 +44,49 @@ spec = do Left err -> Assert.fail $ "joinWith created unparseable expression: " <> License.print joined <> " - Error: " <> err Right _ -> pure unit + Spec.describe "extractIds" do + Spec.it "extracts single license ID" do + case License.parse "MIT" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> Assert.shouldEqual [ "MIT" ] ids + + Spec.it "extracts IDs from AND expression" do + case License.parse "MIT AND Apache-2.0" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> do + Assert.shouldContain ids "MIT" + Assert.shouldContain ids "APACHE-2.0" + + Spec.it "extracts IDs from OR expression" do + case License.parse "MIT OR BSD-3-Clause" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> do + Assert.shouldContain ids "MIT" + Assert.shouldContain ids "BSD-3-CLAUSE" + + Spec.it "extracts IDs from nested expression" do + case License.parse "MIT AND (Apache-2.0 OR BSD-3-Clause)" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> do + Assert.shouldContain ids "MIT" + Assert.shouldContain ids "APACHE-2.0" + Assert.shouldContain ids "BSD-3-CLAUSE" + + Spec.it "normalizes license IDs to uppercase" do + case License.parse "mit" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> Assert.shouldEqual [ "MIT" ] ids + valid :: Array String valid = [ "MIT"