Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions app/src/App/API.purs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module Registry.App.API
( AuthenticatedEffects
, COMPILER_CACHE
, CompilerCache(..)
, LicenseValidationError(..)
, PackageSetUpdateEffects
, PublishEffects
, _compilerCache
Expand All @@ -12,8 +13,10 @@ module Registry.App.API
, getPacchettiBotti
, packageSetUpdate
, packagingTeam
, printLicenseValidationError
, publish
, removeIgnoredTarballFiles
, validateLicense
) where

import Registry.App.Prelude
Expand Down Expand Up @@ -53,6 +56,7 @@ import Parsing.Combinators.Array as Parsing.Combinators.Array
import Parsing.String as Parsing.String
import Registry.API.V1 (PackageSetJobData)
import Registry.App.Auth as Auth
import Registry.App.CLI.Licensee as Licensee
import Registry.App.CLI.Purs (CompilerFailure(..), compilerFailureCodec)
import Registry.App.CLI.Purs as Purs
import Registry.App.CLI.PursVersions as PursVersions
Expand Down Expand Up @@ -90,6 +94,7 @@ import Registry.Foreign.Octokit (Team)
import Registry.Foreign.Tmp as Tmp
import Registry.Internal.Codec as Internal.Codec
import Registry.Internal.Path as Internal.Path
import Registry.License as License
import Registry.Location as Location
import Registry.Manifest as Manifest
import Registry.Metadata as Metadata
Expand Down Expand Up @@ -519,6 +524,11 @@ publish maybeLegacyIndex payload = do
when (Operation.Validation.isMetadataPackage (Manifest receivedManifest)) do
Except.throw "The `metadata` package cannot be uploaded to the registry because it is a protected package."

Log.info "Verifying licenses are consistent among manifest files..."
validateLicense downloadedPackage receivedManifest.license >>= case _ of
Nothing -> Log.debug "License validation passed."
Just err -> Except.throw $ printLicenseValidationError err

for_ (Operation.Validation.isNotUnpublished (Manifest receivedManifest) (Metadata metadata)) \info -> do
Except.throw $ String.joinWith "\n"
[ "You tried to upload a version that has been unpublished: " <> Version.print receivedManifest.version
Expand Down Expand Up @@ -1322,3 +1332,82 @@ instance FsEncodable CompilerCache where
}

Exists.mkExists $ Cache.AsJson cacheKey codec next

-- | Errors that can occur when validating license consistency
data LicenseValidationError = LicenseMismatch
{ manifestLicense :: License
, detectedLicenses :: Array License
}

derive instance Eq LicenseValidationError

printLicenseValidationError :: LicenseValidationError -> String
printLicenseValidationError = case _ of
LicenseMismatch { manifestLicense, detectedLicenses } -> Array.fold
[ "License mismatch: The manifest specifies license '"
, License.print manifestLicense
, "' but the following license(s) were detected in your repository: "
, String.joinWith ", " (map License.print detectedLicenses)
, ". Please ensure your manifest license accurately represents all licenses "
, "in your repository. If multiple licenses apply, join them using SPDX "
, "conjunctions (e.g., 'MIT AND Apache-2.0' or 'MIT OR Apache-2.0')."
]

-- | Validate that the license in the manifest is consistent with licenses
-- | detected in the repository (LICENSE file, package.json, bower.json).
-- |
-- | This check ensures that the SPDX identifier asserted in the package
-- | manifest accurately represents the licenses present in the repository.
-- | If multiple distinct licenses are detected, they must all be represented
-- | in the manifest license (e.g., joined with AND or OR).
validateLicense :: forall r. FilePath -> License -> Run (LOG + AFF + r) (Maybe LicenseValidationError)
validateLicense packageDir manifestLicense = do
Log.debug "Detecting licenses from repository files..."
detected <- Run.liftAff $ Licensee.detect packageDir
case detected of
Left err -> do
Log.warn $ "License detection failed, relying on manifest: " <> err
pure Nothing
Right detectedStrings -> do
let
parsedLicenses :: Array License
parsedLicenses = Array.mapMaybe (hush <<< License.parse) detectedStrings

Log.debug $ "Detected licenses: " <> String.joinWith ", " detectedStrings

if Array.null parsedLicenses then do
Log.debug "No licenses detected from repository files, nothing to validate."
pure Nothing
else case License.extractIds manifestLicense of
Left err -> do
-- This shouldn't be possible (we have already validated the license)
-- as part of constructing the manifest
Log.warn $ "Could not extract license IDs from manifest: " <> err
pure Nothing
Right manifestIds -> do
let
manifestIdSet = Set.fromFoldable manifestIds

-- A detected license is covered if all its IDs are in the manifest IDs
isCovered :: License -> Boolean
isCovered license = case License.extractIds license of
Left _ -> false
Right ids -> Array.all (\id -> Set.member id manifestIdSet) ids

uncoveredLicenses :: Array License
uncoveredLicenses = Array.filter (not <<< isCovered) parsedLicenses

if Array.null uncoveredLicenses then do
Log.debug "All detected licenses are covered by the manifest license."
pure Nothing
else do
Log.warn $ Array.fold
[ "License mismatch detected: manifest has '"
, License.print manifestLicense
, "' but detected "
, String.joinWith ", " (map License.print parsedLicenses)
]
pure $ Just $ LicenseMismatch
{ manifestLicense
, detectedLicenses: uncoveredLicenses
}
49 changes: 49 additions & 0 deletions app/test/App/API.purs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import Effect.Ref as Ref
import Node.FS.Aff as FS.Aff
import Node.Path as Path
import Node.Process as Process
import Registry.App.API (LicenseValidationError(..), validateLicense)
import Registry.App.API as API
import Registry.App.CLI.Tar as Tar
import Registry.App.Effect.Env as Env
Expand All @@ -27,6 +28,7 @@ import Registry.Foreign.FSExtra as FS.Extra
import Registry.Foreign.FastGlob as FastGlob
import Registry.Foreign.Tmp as Tmp
import Registry.Internal.Codec as Internal.Codec
import Registry.License as License
import Registry.Manifest as Manifest
import Registry.ManifestIndex as ManifestIndex
import Registry.PackageName as PackageName
Expand Down Expand Up @@ -57,6 +59,9 @@ spec = do
Spec.describe "Verifies build plans" do
checkBuildPlanToResolutions

Spec.describe "Validates licenses match" do
licenseValidation

Spec.describe "Includes correct files in tarballs" do
removeIgnoredTarballFiles
copySourceFiles
Expand Down Expand Up @@ -441,3 +446,47 @@ copySourceFiles = Spec.hoistSpec identity (\_ -> Assert.Run.runBaseEffects) $ Sp
writeFiles = Run.liftAff <<< traverse_ (\path -> FS.Aff.writeTextFile UTF8 (inTmp path) "module Module where")

pure { source: tmp, destination: destTmp, writeDirectories, writeFiles }

licenseValidation :: Spec.Spec Unit
licenseValidation = do
let fixtures = Path.concat [ "app", "fixtures", "licenses", "halogen-hooks" ]

Spec.describe "validateLicense" do
Spec.it "Passes when manifest license covers all detected licenses" do
-- The halogen-hooks fixture has MIT in LICENSE and Apache-2.0 in package.json
let manifestLicense = unsafeLicense "MIT AND Apache-2.0"
result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense
Assert.shouldEqual Nothing result

Spec.it "Fails when manifest license does not cover a detected license" do
-- Manifest says MIT only, but Apache-2.0 is also in package.json
let manifestLicense = unsafeLicense "MIT"
result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense
case result of
Just (LicenseMismatch { detectedLicenses }) ->
-- Should detect that Apache-2.0 is not covered
Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0"
_ ->
Assert.fail "Expected LicenseMismatch error"

Spec.it "Fails when manifest has completely different license" do
-- Manifest says BSD-3-Clause, but fixture has MIT and Apache-2.0
let manifestLicense = unsafeLicense "BSD-3-Clause"
result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense
case result of
Just (LicenseMismatch { manifestLicense: ml, detectedLicenses }) -> do
Assert.shouldEqual "BSD-3-Clause" (License.print ml)
-- Both MIT and Apache-2.0 should be in the detected licenses
Assert.shouldContain (map License.print detectedLicenses) "MIT"
Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0"
_ ->
Assert.fail "Expected LicenseMismatch error"

Spec.it "Passes when manifest uses OR conjunction" do
-- OR conjunction is also valid - means either license applies
let manifestLicense = unsafeLicense "MIT OR Apache-2.0"
result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense
Assert.shouldEqual Nothing result

unsafeLicense :: String -> License
unsafeLicense str = unsafeFromRight $ License.parse str
31 changes: 31 additions & 0 deletions lib/src/License.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,34 @@ export const parseSPDXLicenseIdImpl = (onError, onSuccess, identifier) => {
return onError(`Invalid SPDX identifier ${identifier}`);
}
};

// Extract all license IDs from a parsed SPDX expression AST.
// The AST structure from spdx-expression-parse is:
// - Simple: { license: 'MIT' }
// - With exception: { license: 'GPL-2.0', exception: 'Classpath-exception-2.0' }
// - Compound: { left: {...}, conjunction: 'and'|'or', right: {...} }
const extractLicenseIds = (ast) => {
const ids = new Set();

const walk = (node) => {
if (!node) return;
if (node.license) {
// Normalize to uppercase for case-insensitive comparison
ids.add(node.license.toUpperCase());
}
if (node.left) walk(node.left);
if (node.right) walk(node.right);
};

walk(ast);
return Array.from(ids);
};

export const extractLicenseIdsImpl = (onError, onSuccess, expression) => {
try {
const ast = parse(expression);
return onSuccess(extractLicenseIds(ast));
} catch (_) {
return onError(`Invalid SPDX expression: ${expression}`);
}
};
9 changes: 9 additions & 0 deletions lib/src/License.purs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ module Registry.License
( License
, SPDXConjunction(..)
, codec
, extractIds
, joinWith
, parse
, print
Expand Down Expand Up @@ -52,6 +53,14 @@ foreign import parseSPDXLicenseIdImpl :: forall r. Fn3 (String -> r) (String ->
parse :: String -> Either String License
parse = runFn3 parseSPDXLicenseIdImpl Left (Right <<< License)

foreign import extractLicenseIdsImpl :: forall r. Fn3 (String -> r) (Array String -> r) String r

-- | Extract all license identifiers from a SPDX expression.
-- | Returns an array of uppercase license IDs for case-insensitive comparison.
-- | For example, "MIT AND Apache-2.0" returns ["MIT", "APACHE-2.0"].
extractIds :: License -> Either String (Array String)
extractIds (License expr) = runFn3 extractLicenseIdsImpl Left Right expr

-- | A valid conjunction for SPDX license identifiers. AND means that both
-- | licenses must be satisfied; OR means that at least one license must be
-- | satisfied.
Expand Down
43 changes: 43 additions & 0 deletions lib/test/Registry/License.purs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,49 @@ spec = do
Left err -> Assert.fail $ "joinWith created unparseable expression: " <> License.print joined <> " - Error: " <> err
Right _ -> pure unit

Spec.describe "extractIds" do
Spec.it "extracts single license ID" do
case License.parse "MIT" of
Left err -> Assert.fail err
Right license -> case License.extractIds license of
Left err -> Assert.fail err
Right ids -> Assert.shouldEqual [ "MIT" ] ids

Spec.it "extracts IDs from AND expression" do
case License.parse "MIT AND Apache-2.0" of
Left err -> Assert.fail err
Right license -> case License.extractIds license of
Left err -> Assert.fail err
Right ids -> do
Assert.shouldContain ids "MIT"
Assert.shouldContain ids "APACHE-2.0"

Spec.it "extracts IDs from OR expression" do
case License.parse "MIT OR BSD-3-Clause" of
Left err -> Assert.fail err
Right license -> case License.extractIds license of
Left err -> Assert.fail err
Right ids -> do
Assert.shouldContain ids "MIT"
Assert.shouldContain ids "BSD-3-CLAUSE"

Spec.it "extracts IDs from nested expression" do
case License.parse "MIT AND (Apache-2.0 OR BSD-3-Clause)" of
Left err -> Assert.fail err
Right license -> case License.extractIds license of
Left err -> Assert.fail err
Right ids -> do
Assert.shouldContain ids "MIT"
Assert.shouldContain ids "APACHE-2.0"
Assert.shouldContain ids "BSD-3-CLAUSE"

Spec.it "normalizes license IDs to uppercase" do
case License.parse "mit" of
Left err -> Assert.fail err
Right license -> case License.extractIds license of
Left err -> Assert.fail err
Right ids -> Assert.shouldEqual [ "MIT" ] ids

valid :: Array String
valid =
[ "MIT"
Expand Down