From 699ef4143e478da30fc931880076fd98f3b7eb2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Thu, 9 Apr 2026 11:33:30 +0200 Subject: [PATCH 1/7] feat: add support for CNCF ModelPack format and enhance context size handling --- cmd/cli/commands/integration_test.go | 3 +- cmd/cli/commands/package.go | 53 +++- go.mod | 1 + go.sum | 2 + pkg/distribution/builder/builder.go | 257 ++++++++++++++++-- pkg/distribution/builder/builder_test.go | 15 +- pkg/distribution/builder/from_directory.go | 42 ++- pkg/distribution/internal/mutate/model.go | 74 ++++- pkg/distribution/internal/mutate/mutate.go | 11 + .../internal/partial/cncf_model.go | 146 ++++++++++ pkg/distribution/internal/partial/partial.go | 81 +++++- pkg/distribution/modelpack/convert.go | 134 +++++++++ pkg/distribution/modelpack/types.go | 41 ++- pkg/distribution/oci/manifest.go | 16 +- pkg/distribution/oci/remote/remote.go | 66 ++++- 15 files changed, 856 insertions(+), 86 deletions(-) create mode 100644 pkg/distribution/internal/partial/cncf_model.go create mode 100644 pkg/distribution/modelpack/convert.go diff --git a/cmd/cli/commands/integration_test.go b/cmd/cli/commands/integration_test.go index 0bda81091..c8413d58d 100644 --- a/cmd/cli/commands/integration_test.go +++ b/cmd/cli/commands/integration_test.go @@ -269,7 +269,8 @@ func createAndPushTestModel(t *testing.T, registryURL, modelRef string, contextS // Set context size if specified if contextSize != nil { - pkg = pkg.WithContextSize(*contextSize) + pkg, err = pkg.WithContextSize(*contextSize) + require.NoError(t, err) } // Construct the full reference with the local registry host for pushing from test host diff --git a/cmd/cli/commands/package.go b/cmd/cli/commands/package.go index d1c8dcdab..c455134a5 100644 --- a/cmd/cli/commands/package.go +++ b/cmd/cli/commands/package.go @@ -208,6 +208,8 @@ Packaging behavior: c.Flags().StringVar(&opts.mmprojPath, "mmproj", "", "absolute path to multimodal projector file") c.Flags().BoolVar(&opts.push, "push", false, "push to registry (if not set, the model is loaded into the Model Runner content store)") c.Flags().Uint64Var(&opts.contextSize, "context-size", 0, "context size in tokens") + c.Flags().StringVar(&opts.format, "format", "docker", + "output artifact format: \"docker\" (default) or \"cncf\" (CNCF ModelPack spec)") return c } @@ -222,21 +224,30 @@ type packageOptions struct { mmprojPath string push bool tag string + format string // "docker" (default) or "cncf" } -// builderInitResult contains the result of initializing a builder from various sources +// builderInitResult contains the result of initializing a builder from +// various sources. type builderInitResult struct { builder *builder.Builder - distClient *distribution.Client // Only set when building from existing model - cleanupFunc func() // Optional cleanup function for temporary files + distClient *distribution.Client // Only set when building from existing model. + cleanupFunc func() // Optional cleanup function for temporary files. } -// initializeBuilder creates a package builder from GGUF, Safetensors, DDUF, or existing model +// initializeBuilder creates a package builder from GGUF, Safetensors, DDUF, +// or existing model. func initializeBuilder(ctx context.Context, cmd *cobra.Command, client *desktop.Client, opts packageOptions) (*builderInitResult, error) { result := &builderInitResult{} + // Map the CLI format string to a BuildFormat constant. + buildFmt := builder.BuildFormatDocker + if opts.format == "cncf" { + buildFmt = builder.BuildFormatCNCF + } + if opts.fromModel != "" { - // Get the model store path + // Get the model store path. userHomeDir, err := os.UserHomeDir() if err != nil { return nil, fmt.Errorf("get user home directory: %w", err) @@ -246,14 +257,14 @@ func initializeBuilder(ctx context.Context, cmd *cobra.Command, client *desktop. modelStorePath = envPath } - // Create a distribution client to access the model store + // Create a distribution client to access the model store. distClient, err := distribution.NewClient(distribution.WithStoreRootPath(modelStorePath)) if err != nil { return nil, fmt.Errorf("create distribution client: %w", err) } result.distClient = distClient - // Package from existing model + // Package from existing model. cmd.PrintErrf("Reading model from store: %q\n", opts.fromModel) mdl, err := distClient.GetModel(opts.fromModel) @@ -266,35 +277,36 @@ func initializeBuilder(ctx context.Context, cmd *cobra.Command, client *desktop. } } - // Type assert to ModelArtifact - the Model from store implements both interfaces + // Type assert to ModelArtifact. modelArtifact, ok := mdl.(types.ModelArtifact) if !ok { return nil, fmt.Errorf("model does not implement ModelArtifact interface") } cmd.PrintErrf("Creating builder from existing model\n") - result.builder, err = builder.FromModel(modelArtifact) + result.builder, err = builder.FromModel(modelArtifact, builder.WithFormat(buildFmt)) if err != nil { return nil, fmt.Errorf("create builder from model: %w", err) } } else if opts.ggufPath != "" { cmd.PrintErrf("Adding GGUF file from %q\n", opts.ggufPath) - pkg, err := builder.FromPath(opts.ggufPath) + pkg, err := builder.FromPath(opts.ggufPath, builder.WithFormat(buildFmt)) if err != nil { return nil, fmt.Errorf("add gguf file: %w", err) } result.builder = pkg } else if opts.ddufPath != "" { cmd.PrintErrf("Adding DDUF file from %q\n", opts.ddufPath) - pkg, err := builder.FromPath(opts.ddufPath) + pkg, err := builder.FromPath(opts.ddufPath, builder.WithFormat(buildFmt)) if err != nil { return nil, fmt.Errorf("add dduf file: %w", err) } result.builder = pkg } else if opts.safetensorsDir != "" { - // Safetensors model from directory — uses V0.2 layer-per-file packaging + // Safetensors model from directory — uses V0.2 layer-per-file packaging. cmd.PrintErrf("Scanning directory %q for safetensors model...\n", opts.safetensorsDir) - pkg, err := builder.FromDirectory(opts.safetensorsDir) + pkg, err := builder.FromDirectory(opts.safetensorsDir, + builder.WithOutputFormat(buildFmt)) if err != nil { return nil, fmt.Errorf("create safetensors model from directory: %w", err) } @@ -344,9 +356,17 @@ func fetchModelFromDaemon(ctx context.Context, cmd *cobra.Command, client *deskt } func packageModel(ctx context.Context, cmd *cobra.Command, client *desktop.Client, opts packageOptions) error { - // Use daemon-side repackaging for simple config-only changes (no new layers) + // Validate format flag. + if opts.format != "docker" && opts.format != "cncf" { + return fmt.Errorf("invalid --format value %q: must be \"docker\" or \"cncf\"", opts.format) + } + + // Use daemon-side repackaging for simple config-only changes (no new + // layers). Disabled for CNCF format because the daemon produces + // Docker-format artifacts. canUseDaemonRepackage := opts.fromModel != "" && !opts.push && + opts.format != "cncf" && len(opts.licensePaths) == 0 && opts.chatTemplatePath == "" && opts.mmprojPath == "" && @@ -408,7 +428,10 @@ func packageModel(ctx context.Context, cmd *cobra.Command, client *desktop.Clien // Set context size if cmd.Flags().Changed("context-size") { cmd.PrintErrf("Setting context size %d\n", opts.contextSize) - pkg = pkg.WithContextSize(int32(opts.contextSize)) + pkg, err = pkg.WithContextSize(int32(opts.contextSize)) + if err != nil { + return err + } } // Add license files diff --git a/go.mod b/go.mod index 5c25cad70..5a5bc80aa 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( github.com/moby/moby/api v1.54.1 github.com/moby/moby/client v0.4.0 github.com/moby/term v0.5.2 + github.com/modelpack/model-spec v0.0.7 github.com/muesli/termenv v0.16.0 github.com/nxadm/tail v1.4.11 github.com/olekukonko/tablewriter v1.1.4 diff --git a/go.sum b/go.sum index 45ef72021..f35af8c8a 100644 --- a/go.sum +++ b/go.sum @@ -201,6 +201,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= +github.com/modelpack/model-spec v0.0.7 h1:3fAxau4xUqF0Pf1zzFC5lItF0gEaiXLxaCcPAH8PW8I= +github.com/modelpack/model-spec v0.0.7/go.mod h1:5Go37og1RmvcTdVI5Remd+PpQRNLlKSNwSNbXmEqu50= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= diff --git a/pkg/distribution/builder/builder.go b/pkg/distribution/builder/builder.go index d306b12fb..18dc87dce 100644 --- a/pkg/distribution/builder/builder.go +++ b/pkg/distribution/builder/builder.go @@ -2,22 +2,39 @@ package builder import ( "context" + "encoding/json" "fmt" "io" "time" + "github.com/opencontainers/go-digest" + "github.com/docker/model-runner/pkg/distribution/format" "github.com/docker/model-runner/pkg/distribution/internal/mutate" "github.com/docker/model-runner/pkg/distribution/internal/partial" + "github.com/docker/model-runner/pkg/distribution/modelpack" "github.com/docker/model-runner/pkg/distribution/oci" "github.com/docker/model-runner/pkg/distribution/types" ) +// BuildFormat specifies the output artifact format. +type BuildFormat string + +const ( + // BuildFormatDocker produces Docker-proprietary format artifacts + // (application/vnd.docker.ai.* media types). This is the default. + BuildFormatDocker BuildFormat = "docker" + // BuildFormatCNCF produces CNCF ModelPack format artifacts + // (application/vnd.cncf.model.* media types). + BuildFormatCNCF BuildFormat = "cncf" +) + // BuildOption configures the behavior of FromPath and FromPaths. type BuildOption func(*buildOptions) type buildOptions struct { created *time.Time + format BuildFormat } // WithCreated sets a specific creation timestamp for the model artifact. @@ -30,10 +47,18 @@ func WithCreated(t time.Time) BuildOption { } } -// Builder builds a model artifact +// WithFormat sets the output artifact format. Defaults to BuildFormatDocker. +func WithFormat(f BuildFormat) BuildOption { + return func(opts *buildOptions) { + opts.format = f + } +} + +// Builder builds a model artifact. type Builder struct { model types.ModelArtifact - originalLayers []oci.Layer // Snapshot of layers when created from existing model + originalLayers []oci.Layer // Snapshot of layers when created from existing model. + outputFormat BuildFormat // Output artifact format (docker or cncf). } // FromPath returns a *Builder that builds model artifacts from a file path. @@ -81,7 +106,8 @@ func fromFormat(f format.Format, paths []string, opts ...BuildOption) (*Builder, opt(options) } - // Create layers from paths + // Create layers from paths using the Docker media type initially. + // For CNCF output, media types are remapped below. layers := make([]oci.Layer, len(paths)) diffIDs := make([]oci.Hash, len(paths)) @@ -99,13 +125,13 @@ func fromFormat(f format.Format, paths []string, opts ...BuildOption) (*Builder, diffIDs[i] = diffID } - // Extract config metadata using format-specific logic + // Extract config metadata using format-specific logic. config, err := f.ExtractConfig(paths) if err != nil { return nil, fmt.Errorf("extract config: %w", err) } - // Use the provided creation time, or fall back to current time + // Use the provided creation time, or fall back to current time. var created time.Time if options.created != nil { created = *options.created @@ -113,7 +139,11 @@ func fromFormat(f format.Format, paths []string, opts ...BuildOption) (*Builder, created = time.Now() } - // Build the model + if options.format == BuildFormatCNCF { + return fromFormatCNCF(config, layers, diffIDs, types.Descriptor{Created: &created}) + } + + // Build the Docker-format model (default). mdl := &partial.BaseModel{ ModelConfigFile: types.ConfigFile{ Config: config, @@ -129,69 +159,252 @@ func fromFormat(f format.Format, paths []string, opts ...BuildOption) (*Builder, } return &Builder{ - model: mdl, + model: mdl, + outputFormat: BuildFormatDocker, + }, nil +} + +// fromFormatCNCF builds a CNCFModel from format-extracted config and layers. +func fromFormatCNCF( + config types.Config, + layers []oci.Layer, + diffIDs []oci.Hash, + desc types.Descriptor, +) (*Builder, error) { + // Convert DiffIDs from oci.Hash to digest.Digest. + cncfDiffIDs := make([]digest.Digest, len(diffIDs)) + for i, d := range diffIDs { + cncfDiffIDs[i] = digest.Digest(d.String()) + } + + // Remap layer media types to CNCF. + cncfLayers := make([]oci.Layer, len(layers)) + for i, l := range layers { + mt, err := l.MediaType() + if err != nil { + return nil, fmt.Errorf("get layer media type: %w", err) + } + fp := layerFilePath(l) + cncfMT := modelpack.MapLayerMediaType(mt, fp) + cncfLayers[i] = &remappedLayer{Layer: l, newMediaType: cncfMT} + } + + mp := modelpack.DockerConfigToModelPack(config, desc, cncfDiffIDs) + mdl := &partial.CNCFModel{ + ModelPackConfig: mp, + LayerList: cncfLayers, + } + return &Builder{ + model: mdl, + outputFormat: BuildFormatCNCF, }, nil } -// FromModel returns a *Builder that builds model artifacts from an existing model artifact -func FromModel(mdl types.ModelArtifact) (*Builder, error) { - // Capture original layers for comparison +// layerFilePath extracts the filepath annotation from a layer, if present. +func layerFilePath(l oci.Layer) string { + type descriptorProvider interface { + GetDescriptor() oci.Descriptor + } + if dp, ok := l.(descriptorProvider); ok { + if fp, ok := dp.GetDescriptor().Annotations[types.AnnotationFilePath]; ok { + return fp + } + } + return "" +} + +// remappedLayer wraps an existing layer and overrides its media type. +type remappedLayer struct { + oci.Layer + newMediaType oci.MediaType +} + +// MediaType returns the remapped media type. +func (r *remappedLayer) MediaType() (oci.MediaType, error) { + return r.newMediaType, nil +} + +// GetDescriptor returns a copy of the underlying descriptor with the +// overridden media type. +func (r *remappedLayer) GetDescriptor() oci.Descriptor { + type descriptorProvider interface { + GetDescriptor() oci.Descriptor + } + if dp, ok := r.Layer.(descriptorProvider); ok { + desc := dp.GetDescriptor() + desc.MediaType = r.newMediaType + return desc + } + return oci.Descriptor{MediaType: r.newMediaType} +} + +// FromModel returns a *Builder that builds model artifacts from an existing +// model artifact. Pass WithFormat(BuildFormatCNCF) to convert the artifact +// to CNCF ModelPack format on output. +func FromModel(mdl types.ModelArtifact, opts ...BuildOption) (*Builder, error) { + options := &buildOptions{} + for _, opt := range opts { + opt(options) + } + + // Capture original layers for comparison. layers, err := mdl.Layers() if err != nil { return nil, fmt.Errorf("getting model layers: %w", err) } + + if options.format == BuildFormatCNCF { + // Convert the source artifact eagerly to CNCF format. This is + // necessary because mutations (WithLicense, etc.) and lightweight + // repackaging both operate on the builder state before Build(). + cncfMdl, err := convertToCNCF(mdl) + if err != nil { + return nil, fmt.Errorf("convert to cncf format: %w", err) + } + return &Builder{ + model: cncfMdl, + originalLayers: layers, + outputFormat: BuildFormatCNCF, + }, nil + } + return &Builder{ model: mdl, originalLayers: layers, + outputFormat: BuildFormatDocker, + }, nil +} + +// convertToCNCF converts an existing model artifact to a CNCFModel. It remaps +// all layer media types and converts the config to CNCF ModelPack format. +func convertToCNCF(mdl types.ModelArtifact) (*partial.CNCFModel, error) { + layers, err := mdl.Layers() + if err != nil { + return nil, fmt.Errorf("get layers: %w", err) + } + + // Get the Docker-format config. + rawCfg, err := mdl.RawConfigFile() + if err != nil { + return nil, fmt.Errorf("get raw config: %w", err) + } + + // Remap layer media types and collect DiffIDs. + cncfLayers := make([]oci.Layer, len(layers)) + diffIDs := make([]digest.Digest, len(layers)) + for i, l := range layers { + mt, err := l.MediaType() + if err != nil { + return nil, fmt.Errorf("get layer media type: %w", err) + } + fp := layerFilePath(l) + cncfMT := modelpack.MapLayerMediaType(mt, fp) + cncfLayers[i] = &remappedLayer{Layer: l, newMediaType: cncfMT} + + diffID, err := l.DiffID() + if err != nil { + return nil, fmt.Errorf("get layer diffID: %w", err) + } + diffIDs[i] = digest.Digest(diffID.String()) + } + + // Build the CNCF config. If the source is already ModelPack format, use + // it directly (updating the DiffIDs from current layers). Otherwise + // convert from Docker format. + var mp modelpack.Model + if modelpack.IsModelPackConfig(rawCfg) { + if err := json.Unmarshal(rawCfg, &mp); err != nil { + return nil, fmt.Errorf("unmarshal modelpack config: %w", err) + } + mp.ModelFS.DiffIDs = diffIDs + } else { + var cf types.ConfigFile + if err := json.Unmarshal(rawCfg, &cf); err != nil { + return nil, fmt.Errorf("unmarshal docker config: %w", err) + } + mp = modelpack.DockerConfigToModelPack(cf.Config, cf.Descriptor, diffIDs) + } + + return &partial.CNCFModel{ + ModelPackConfig: mp, + LayerList: cncfLayers, }, nil } -// WithLicense adds a license file to the artifact +// resolveLayerMediaType returns the appropriate media type for an additional +// layer based on the builder's output format. For CNCF format, Docker media +// types are remapped to their CNCF equivalents. +func (b *Builder) resolveLayerMediaType(dockerMT oci.MediaType) oci.MediaType { + if b.outputFormat == BuildFormatCNCF { + return modelpack.MapLayerMediaType(dockerMT, "") + } + return dockerMT +} + +// WithLicense adds a license file to the artifact. func (b *Builder) WithLicense(path string) (*Builder, error) { - licenseLayer, err := partial.NewLayer(path, types.MediaTypeLicense) + mt := b.resolveLayerMediaType(types.MediaTypeLicense) + licenseLayer, err := partial.NewLayer(path, mt) if err != nil { return nil, fmt.Errorf("license layer from %q: %w", path, err) } return &Builder{ model: mutate.AppendLayers(b.model, licenseLayer), originalLayers: b.originalLayers, + outputFormat: b.outputFormat, }, nil } -func (b *Builder) WithContextSize(size int32) *Builder { +// WithContextSize sets the context size for the model artifact. +// Returns an error when the output format is CNCF (context size is not +// defined in the CNCF ModelPack specification). +func (b *Builder) WithContextSize(size int32) (*Builder, error) { + if b.outputFormat == BuildFormatCNCF { + return nil, fmt.Errorf( + "--context-size is not supported with --format cncf: " + + "the CNCF ModelPack specification does not define a context " + + "size field", + ) + } return &Builder{ model: mutate.ContextSize(b.model, size), originalLayers: b.originalLayers, - } + outputFormat: b.outputFormat, + }, nil } -// WithMultimodalProjector adds a Multimodal projector file to the artifact +// WithMultimodalProjector adds a multimodal projector file to the artifact. func (b *Builder) WithMultimodalProjector(path string) (*Builder, error) { - mmprojLayer, err := partial.NewLayer(path, types.MediaTypeMultimodalProjector) + mt := b.resolveLayerMediaType(types.MediaTypeMultimodalProjector) + mmprojLayer, err := partial.NewLayer(path, mt) if err != nil { return nil, fmt.Errorf("mmproj layer from %q: %w", path, err) } return &Builder{ model: mutate.AppendLayers(b.model, mmprojLayer), originalLayers: b.originalLayers, + outputFormat: b.outputFormat, }, nil } -// WithChatTemplateFile adds a Jinja chat template file to the artifact which takes precedence over template from GGUF. +// WithChatTemplateFile adds a Jinja chat template file to the artifact, +// taking precedence over any template embedded in the GGUF file. func (b *Builder) WithChatTemplateFile(path string) (*Builder, error) { - templateLayer, err := partial.NewLayer(path, types.MediaTypeChatTemplate) + mt := b.resolveLayerMediaType(types.MediaTypeChatTemplate) + templateLayer, err := partial.NewLayer(path, mt) if err != nil { return nil, fmt.Errorf("chat template layer from %q: %w", path, err) } return &Builder{ model: mutate.AppendLayers(b.model, templateLayer), originalLayers: b.originalLayers, + outputFormat: b.outputFormat, }, nil } -// WithConfigArchive adds a config archive (tar) file to the artifact +// WithConfigArchive adds a config archive (tar) file to the artifact. func (b *Builder) WithConfigArchive(path string) (*Builder, error) { - // Check if config archive already exists + // Check if config archive already exists. layers, err := b.model.Layers() if err != nil { return nil, fmt.Errorf("get model layers: %w", err) @@ -204,13 +417,15 @@ func (b *Builder) WithConfigArchive(path string) (*Builder, error) { } } - configLayer, err := partial.NewLayer(path, types.MediaTypeVLLMConfigArchive) + mt := b.resolveLayerMediaType(types.MediaTypeVLLMConfigArchive) + configLayer, err := partial.NewLayer(path, mt) if err != nil { return nil, fmt.Errorf("config archive layer from %q: %w", path, err) } return &Builder{ model: mutate.AppendLayers(b.model, configLayer), originalLayers: b.originalLayers, + outputFormat: b.outputFormat, }, nil } diff --git a/pkg/distribution/builder/builder_test.go b/pkg/distribution/builder/builder_test.go index 1c71cf502..6e4ff4d67 100644 --- a/pkg/distribution/builder/builder_test.go +++ b/pkg/distribution/builder/builder_test.go @@ -190,7 +190,10 @@ func TestWithMultimodalProjectorChaining(t *testing.T) { t.Fatalf("Failed to add multimodal projector: %v", err) } - b = b.WithContextSize(4096) + b, err = b.WithContextSize(4096) + if err != nil { + t.Fatalf("Failed to set context size: %v", err) + } // Build the model target := &fakeTarget{} @@ -256,7 +259,10 @@ func TestFromModel(t *testing.T) { } // Set initial context size - initialBuilder = initialBuilder.WithContextSize(2048) + initialBuilder, err = initialBuilder.WithContextSize(2048) + if err != nil { + t.Fatalf("Failed to set context size: %v", err) + } // Build the initial model initialTarget := &fakeTarget{} @@ -280,7 +286,10 @@ func TestFromModel(t *testing.T) { } // Step 3: Modify the context size to 4096 - repackagedBuilder = repackagedBuilder.WithContextSize(4096) + repackagedBuilder, err = repackagedBuilder.WithContextSize(4096) + if err != nil { + t.Fatalf("Failed to set context size: %v", err) + } // Step 4: Build the repackaged model repackagedTarget := &fakeTarget{} diff --git a/pkg/distribution/builder/from_directory.go b/pkg/distribution/builder/from_directory.go index 5c7f53a2a..d20982922 100644 --- a/pkg/distribution/builder/from_directory.go +++ b/pkg/distribution/builder/from_directory.go @@ -8,10 +8,13 @@ import ( "strings" "time" + "github.com/opencontainers/go-digest" + "github.com/docker/model-runner/pkg/distribution/files" "github.com/docker/model-runner/pkg/distribution/format" "github.com/docker/model-runner/pkg/distribution/internal/mutate" "github.com/docker/model-runner/pkg/distribution/internal/partial" + "github.com/docker/model-runner/pkg/distribution/modelpack" "github.com/docker/model-runner/pkg/distribution/oci" "github.com/docker/model-runner/pkg/distribution/types" ) @@ -32,6 +35,9 @@ type DirectoryOptions struct { // When set, it overrides the default behavior of using time.Now(). // This is useful for producing deterministic OCI digests. Created *time.Time + + // Format is the output artifact format. Defaults to BuildFormatDocker. + Format BuildFormat } // DirectoryOption is a functional option for configuring FromDirectory. @@ -62,6 +68,13 @@ func WithCreatedTime(t time.Time) DirectoryOption { } } +// WithOutputFormat sets the output artifact format for the directory builder. +func WithOutputFormat(f BuildFormat) DirectoryOption { + return func(opts *DirectoryOptions) { + opts.Format = f + } +} + // FromDirectory creates a Builder from a directory containing model files. // It recursively scans the directory and adds each non-hidden file as a separate layer. // Each layer's filepath annotation preserves the relative path from the directory root. @@ -232,7 +245,31 @@ func FromDirectory(dirPath string, opts ...DirectoryOption) (*Builder, error) { created = time.Now() } - // Build the model with V0.2 config (layer-per-file with annotations) + if options.Format == BuildFormatCNCF { + // Remap layer media types and convert config to CNCF format. + cncfLayers := make([]oci.Layer, len(layers)) + cncfDiffIDs := make([]digest.Digest, len(diffIDs)) + for i, l := range layers { + mt, err := l.MediaType() + if err != nil { + return nil, fmt.Errorf("get layer media type: %w", err) + } + fp := layerFilePath(l) + cncfLayers[i] = &remappedLayer{Layer: l, newMediaType: modelpack.MapLayerMediaType(mt, fp)} + cncfDiffIDs[i] = digest.Digest(diffIDs[i].String()) + } + mp := modelpack.DockerConfigToModelPack( + config, + types.Descriptor{Created: &created}, + cncfDiffIDs, + ) + return &Builder{ + model: &partial.CNCFModel{ModelPackConfig: mp, LayerList: cncfLayers}, + outputFormat: BuildFormatCNCF, + }, nil + } + + // Build the Docker-format model with V0.2 config (layer-per-file with annotations). mdl := &partial.BaseModel{ ModelConfigFile: types.ConfigFile{ Config: config, @@ -249,7 +286,8 @@ func FromDirectory(dirPath string, opts ...DirectoryOption) (*Builder, error) { } return &Builder{ - model: mdl, + model: mdl, + outputFormat: BuildFormatDocker, }, nil } diff --git a/pkg/distribution/internal/mutate/model.go b/pkg/distribution/internal/mutate/model.go index 9d9407207..a4e3ef711 100644 --- a/pkg/distribution/internal/mutate/model.go +++ b/pkg/distribution/internal/mutate/model.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/docker/model-runner/pkg/distribution/internal/partial" + "github.com/docker/model-runner/pkg/distribution/modelpack" "github.com/docker/model-runner/pkg/distribution/oci" "github.com/docker/model-runner/pkg/distribution/types" ) @@ -13,6 +14,7 @@ type model struct { base types.ModelArtifact appended []oci.Layer configMediaType oci.MediaType + artifactType string contextSize *int32 } @@ -98,18 +100,53 @@ func (m *model) Layers() ([]oci.Layer, error) { return append(ls, m.appended...), nil } -func (m *model) Manifest() (*oci.Manifest, error) { - manifest, err := partial.ManifestForLayers(m) - if err != nil { - return nil, err +// GetManifestOptions implements partial.WithManifestOptions and propagates +// the manifest options from the base model, applying any overrides set on +// this wrapper. This ensures artifactType and config media type survive +// through arbitrarily deep mutate chains. +func (m *model) GetManifestOptions() partial.ManifestOptions { + // Start with base model's manifest options. + var opts partial.ManifestOptions + if base, ok := m.base.(partial.WithManifestOptions); ok { + opts = base.GetManifestOptions() + } else if cmt, ok := m.base.(partial.WithConfigMediaType); ok { + opts.ConfigMediaType = cmt.GetConfigMediaType() } + // Apply overrides set on this wrapper. if m.configMediaType != "" { - manifest.Config.MediaType = m.configMediaType + opts.ConfigMediaType = m.configMediaType + } + if m.artifactType != "" { + opts.ArtifactType = m.artifactType + } + return opts +} + +func (m *model) Manifest() (*oci.Manifest, error) { + // ManifestForLayers reads GetManifestOptions() via the interface, so + // config media type and artifact type are handled there. + return partial.ManifestForLayers(m) +} + +// isCNCFBase reports whether the base model chain produces CNCF ModelPack config. +func (m *model) isCNCFBase() bool { + raw, err := m.base.RawConfigFile() + if err != nil { + return false } - return manifest, nil + return modelpack.IsModelPackConfig(raw) } func (m *model) RawConfigFile() ([]byte, error) { + if m.isCNCFBase() { + return m.rawCNCFConfigFile() + } + return m.rawDockerConfigFile() +} + +// rawDockerConfigFile builds the Docker-format config file, appending DiffIDs +// and optionally setting context size. +func (m *model) rawDockerConfigFile() ([]byte, error) { cf, err := partial.ConfigFile(m.base) if err != nil { return nil, err @@ -124,9 +161,30 @@ func (m *model) RawConfigFile() ([]byte, error) { if m.contextSize != nil { cf.Config.ContextSize = m.contextSize } - raw, err := json.Marshal(cf) + return json.Marshal(cf) +} + +// rawCNCFConfigFile builds the CNCF ModelPack config file, appending DiffIDs +// to ModelFS. Context size is not supported in the CNCF format. +func (m *model) rawCNCFConfigFile() ([]byte, error) { + raw, err := m.base.RawConfigFile() if err != nil { return nil, err } - return raw, err + var mp modelpack.Model + if err := json.Unmarshal(raw, &mp); err != nil { + return nil, fmt.Errorf("unmarshal cncf config: %w", err) + } + for _, l := range m.appended { + diffID, err := l.DiffID() + if err != nil { + return nil, err + } + // Convert oci.Hash to digest.Digest ("algorithm:hex" string form). + mp.ModelFS.DiffIDs = append( + mp.ModelFS.DiffIDs, + modelpack.HashToDigest(diffID.String()), + ) + } + return json.Marshal(mp) } diff --git a/pkg/distribution/internal/mutate/mutate.go b/pkg/distribution/internal/mutate/mutate.go index 10101a712..a0cc0925a 100644 --- a/pkg/distribution/internal/mutate/mutate.go +++ b/pkg/distribution/internal/mutate/mutate.go @@ -25,3 +25,14 @@ func ContextSize(mdl types.ModelArtifact, cs int32) types.ModelArtifact { contextSize: &cs, } } + +// SetManifestOptions applies manifest-level metadata (config media type and +// artifact type) to the given model. The options propagate through subsequent +// mutate wrappers via GetManifestOptions. +func SetManifestOptions(mdl types.ModelArtifact, configMT oci.MediaType, artifactType string) types.ModelArtifact { + return &model{ + base: mdl, + configMediaType: configMT, + artifactType: artifactType, + } +} diff --git a/pkg/distribution/internal/partial/cncf_model.go b/pkg/distribution/internal/partial/cncf_model.go new file mode 100644 index 000000000..b393d6688 --- /dev/null +++ b/pkg/distribution/internal/partial/cncf_model.go @@ -0,0 +1,146 @@ +package partial + +import ( + "bytes" + "encoding/json" + "fmt" + + "github.com/docker/model-runner/pkg/distribution/modelpack" + "github.com/docker/model-runner/pkg/distribution/oci" + "github.com/docker/model-runner/pkg/distribution/types" +) + +// CNCFModel is a model artifact whose config is serialized as a CNCF +// ModelPack config (application/vnd.cncf.model.config.v1+json) and whose +// manifest carries the required artifactType field. +type CNCFModel struct { + // ModelPackConfig holds the CNCF ModelPack config to be serialized. + ModelPackConfig modelpack.Model + // LayerList is the ordered list of OCI layers. + LayerList []oci.Layer +} + +var _ types.ModelArtifact = &CNCFModel{} + +// GetManifestOptions implements WithManifestOptions, providing the CNCF +// config media type and required artifact type for the manifest. +func (m *CNCFModel) GetManifestOptions() ManifestOptions { + return ManifestOptions{ + ConfigMediaType: modelpack.MediaTypeModelConfigV1, + ArtifactType: modelpack.ArtifactTypeModelManifest, + } +} + +// GetConfigMediaType implements WithConfigMediaType for backward compatibility. +func (m *CNCFModel) GetConfigMediaType() oci.MediaType { + return modelpack.MediaTypeModelConfigV1 +} + +func (m *CNCFModel) Layers() ([]oci.Layer, error) { + return m.LayerList, nil +} + +func (m *CNCFModel) RawConfigFile() ([]byte, error) { + return json.Marshal(m.ModelPackConfig) +} + +func (m *CNCFModel) Manifest() (*oci.Manifest, error) { + return ManifestForLayers(m) +} + +func (m *CNCFModel) RawManifest() ([]byte, error) { + manifest, err := m.Manifest() + if err != nil { + return nil, err + } + return json.Marshal(manifest) +} + +func (m *CNCFModel) ID() (string, error) { + return ID(m) +} + +func (m *CNCFModel) Config() (types.ModelConfig, error) { + return &m.ModelPackConfig, nil +} + +func (m *CNCFModel) Descriptor() (types.Descriptor, error) { + // CNCF format stores creation time in ModelDescriptor.CreatedAt. + return types.Descriptor{Created: m.ModelPackConfig.Descriptor.CreatedAt}, nil +} + +func (m *CNCFModel) Size() (int64, error) { + raw, err := m.RawManifest() + if err != nil { + return 0, err + } + rawCfg, err := m.RawConfigFile() + if err != nil { + return 0, err + } + size := int64(len(raw)) + int64(len(rawCfg)) + for _, l := range m.LayerList { + s, err := l.Size() + if err != nil { + return 0, err + } + size += s + } + return size, nil +} + +func (m *CNCFModel) ConfigName() (oci.Hash, error) { + raw, err := m.RawConfigFile() + if err != nil { + return oci.Hash{}, err + } + h, _, err := oci.SHA256(bytes.NewReader(raw)) + return h, err +} + +func (m *CNCFModel) ConfigFile() (*oci.ConfigFile, error) { + return nil, fmt.Errorf("invalid for CNCF model") +} + +func (m *CNCFModel) Digest() (oci.Hash, error) { + raw, err := m.RawManifest() + if err != nil { + return oci.Hash{}, err + } + h, _, err := oci.SHA256(bytes.NewReader(raw)) + return h, err +} + +func (m *CNCFModel) MediaType() (oci.MediaType, error) { + manifest, err := m.Manifest() + if err != nil { + return "", fmt.Errorf("compute manifest: %w", err) + } + return manifest.MediaType, nil +} + +func (m *CNCFModel) LayerByDigest(hash oci.Hash) (oci.Layer, error) { + for _, l := range m.LayerList { + d, err := l.Digest() + if err != nil { + return nil, fmt.Errorf("get layer digest: %w", err) + } + if d == hash { + return l, nil + } + } + return nil, fmt.Errorf("layer not found") +} + +func (m *CNCFModel) LayerByDiffID(hash oci.Hash) (oci.Layer, error) { + for _, l := range m.LayerList { + d, err := l.DiffID() + if err != nil { + return nil, fmt.Errorf("get layer diffID: %w", err) + } + if d == hash { + return l, nil + } + } + return nil, fmt.Errorf("layer not found") +} diff --git a/pkg/distribution/internal/partial/partial.go b/pkg/distribution/internal/partial/partial.go index 5516719ae..5d7d48e35 100644 --- a/pkg/distribution/internal/partial/partial.go +++ b/pkg/distribution/internal/partial/partial.go @@ -237,7 +237,7 @@ func matchesMediaType(layerMT, targetMT oci.MediaType, modelFormat string) bool return true } - // Native ModelPack support: check format-specific ModelPack types + // Native ModelPack support: check format-specific and category types. switch targetMT { case types.MediaTypeGGUF: if layerMT == modelpack.MediaTypeWeightGGUF { @@ -247,15 +247,28 @@ func matchesMediaType(layerMT, targetMT oci.MediaType, modelFormat string) bool if layerMT == modelpack.MediaTypeWeightSafetensors { return true } - case types.MediaTypeDDUF, types.MediaTypeLicense, types.MediaTypeMultimodalProjector, - types.MediaTypeChatTemplate, types.MediaTypeModelFile, types.MediaTypeVLLMConfigArchive, - types.MediaTypeDirTar, types.MediaTypeModelConfigV01, types.MediaTypeModelConfigV02, + case types.MediaTypeLicense: + // CNCF doc layers may carry license files. + if layerMT == modelpack.MediaTypeDocRaw { + return true + } + case types.MediaTypeChatTemplate, types.MediaTypeModelFile, types.MediaTypeVLLMConfigArchive: + // CNCF weight config layers carry config/tokenizer/template files. + if layerMT == modelpack.MediaTypeWeightConfigRaw { + return true + } + case types.MediaTypeMultimodalProjector: + // mmproj files are packaged as generic weights in CNCF format. + // Cannot distinguish from other weight files by media type alone; + // rely on filepath annotation during unpack. + case types.MediaTypeDDUF, types.MediaTypeDirTar, types.MediaTypeModelConfigV01, + types.MediaTypeModelConfigV02, oci.OCIManifestSchema1, oci.OCIImageIndex, oci.OCIConfigJSON, oci.OCILayer, oci.OCILayerGzip, oci.OCILayerZstd, oci.OCIContentDescriptor, oci.OCIArtifactManifest, oci.OCIEmptyJSON, oci.DockerManifestSchema2, oci.DockerManifestList, oci.DockerConfigJSON, oci.DockerLayer, oci.DockerForeignLayer, oci.DockerUncompressedLayer: - // No format-specific ModelPack mapping for these media types + // No format-specific ModelPack mapping for these media types. } // ModelPack model-spec support: format-agnostic weight types (.raw, .tar, etc.) @@ -263,6 +276,8 @@ func matchesMediaType(layerMT, targetMT oci.MediaType, modelFormat string) bool // (e.g., MediaTypeWeightGGUF, MediaTypeWeightSafetensors) already encode the format // in their media type and are handled above; applying this fallback to them would // cause cross-format false positives (e.g., safetensors layer matching as GGUF). + // MediaTypeWeightConfigRaw and MediaTypeDocRaw are also excluded because they carry + // non-weight content. if modelFormat != "" && modelpack.IsModelPackGenericWeightMediaType(string(layerMT)) { switch targetMT { case types.MediaTypeGGUF: @@ -279,7 +294,7 @@ func matchesMediaType(layerMT, targetMT oci.MediaType, modelFormat string) bool oci.OCIContentDescriptor, oci.OCIArtifactManifest, oci.OCIEmptyJSON, oci.DockerManifestSchema2, oci.DockerManifestList, oci.DockerConfigJSON, oci.DockerLayer, oci.DockerForeignLayer, oci.DockerUncompressedLayer: - // No generic weight resolution for these media types + // No generic weight resolution for these media types. } } @@ -287,10 +302,44 @@ func matchesMediaType(layerMT, targetMT oci.MediaType, modelFormat string) bool } // WithConfigMediaType provides access to the config media type version. +// Deprecated: prefer WithManifestOptions which also carries the artifact type. type WithConfigMediaType interface { GetConfigMediaType() oci.MediaType } +// ManifestOptions holds the manifest-level metadata for an artifact. +type ManifestOptions struct { + // ConfigMediaType is the media type of the config blob. + ConfigMediaType oci.MediaType + // ArtifactType is the OCI artifact type of the manifest (optional). + // The CNCF ModelPack spec requires + // "application/vnd.cncf.model.manifest.v1+json". + ArtifactType string +} + +// WithManifestOptions provides manifest assembly options. It takes precedence +// over WithConfigMediaType when both are implemented. +type WithManifestOptions interface { + GetManifestOptions() ManifestOptions +} + +// resolveManifestOptions extracts manifest options from the given object, +// checking WithManifestOptions first and falling back to WithConfigMediaType. +func resolveManifestOptions(i interface{}) ManifestOptions { + if mof, ok := i.(WithManifestOptions); ok { + return mof.GetManifestOptions() + } + if cmt, ok := i.(WithConfigMediaType); ok { + if mt := cmt.GetConfigMediaType(); mt != "" { + return ManifestOptions{ConfigMediaType: mt} + } + } + return ManifestOptions{} +} + +// ManifestForLayers assembles an OCI manifest for the given model. The +// config media type and optional artifact type are read from the model via +// the WithManifestOptions interface (or the legacy WithConfigMediaType). func ManifestForLayers(i WithLayers) (*oci.Manifest, error) { raw, err := i.RawConfigFile() if err != nil { @@ -301,12 +350,12 @@ func ManifestForLayers(i WithLayers) (*oci.Manifest, error) { return nil, fmt.Errorf("compute config hash: %w", err) } - // Use the config media type from the model if available, otherwise default to V0.1 - configMediaType := types.MediaTypeModelConfigV01 - if cmt, ok := i.(WithConfigMediaType); ok { - if mt := cmt.GetConfigMediaType(); mt != "" { - configMediaType = mt - } + // Resolve config media type and artifact type from the model. + opts := resolveManifestOptions(i) + configMediaType := opts.ConfigMediaType + if configMediaType == "" { + // Default to Docker format V01 for backward compatibility. + configMediaType = types.MediaTypeModelConfigV01 } cfgDsc := oci.Descriptor{ @@ -322,12 +371,13 @@ func ManifestForLayers(i WithLayers) (*oci.Manifest, error) { var layers []oci.Descriptor for _, l := range ls { - // Check if this is our Layer type which embeds the full descriptor with annotations + // Check if this is our Layer type which embeds the full descriptor + // with annotations. if layer, ok := l.(*Layer); ok { - // Use the embedded descriptor directly to preserve annotations + // Use the embedded descriptor directly to preserve annotations. layers = append(layers, layer.Descriptor) } else { - // Fall back to computing descriptor for other layer types + // Fall back to computing descriptor for other layer types. mt, err := l.MediaType() if err != nil { return nil, fmt.Errorf("get layer media type: %w", err) @@ -351,6 +401,7 @@ func ManifestForLayers(i WithLayers) (*oci.Manifest, error) { return &oci.Manifest{ SchemaVersion: 2, MediaType: oci.OCIManifestSchema1, + ArtifactType: opts.ArtifactType, Config: cfgDsc, Layers: layers, }, nil diff --git a/pkg/distribution/modelpack/convert.go b/pkg/distribution/modelpack/convert.go new file mode 100644 index 000000000..97d08a491 --- /dev/null +++ b/pkg/distribution/modelpack/convert.go @@ -0,0 +1,134 @@ +package modelpack + +import ( + "path/filepath" + "strings" + "time" + + "github.com/docker/model-runner/pkg/distribution/files" + "github.com/docker/model-runner/pkg/distribution/oci" + "github.com/docker/model-runner/pkg/distribution/types" + "github.com/opencontainers/go-digest" +) + +// LayerKind is a semantic classification of a model artifact layer. +// It maps to specific CNCF model-spec media types. +type LayerKind int + +const ( + // KindWeight is a primary model weight file (GGUF, safetensors, DDUF, + // mmproj, etc.). + KindWeight LayerKind = iota + // KindWeightConfig is a weight config file: tokenizer.json, config.json, + // vLLM config archives, chat templates, etc. + KindWeightConfig + // KindDoc is a documentation file: README.md, LICENSE, etc. + KindDoc +) + +// ClassifyLayer determines the CNCF model-spec LayerKind for a layer. +// Resolution order: +// 1. Explicit Docker semantic media types (most specific). +// 2. Filepath/annotation heuristics for ambiguous media types. +// 3. Docker media type fallback. +func ClassifyLayer(dockerMT oci.MediaType, path string) LayerKind { + switch dockerMT { + case types.MediaTypeLicense: + return KindDoc + case types.MediaTypeChatTemplate, types.MediaTypeVLLMConfigArchive, types.MediaTypeModelFile: + return KindWeightConfig + case types.MediaTypeMultimodalProjector: + return KindWeight + case types.MediaTypeGGUF, types.MediaTypeSafetensors, types.MediaTypeDDUF: + return KindWeight + } + + // Use filepath heuristics for ambiguous or unknown media types. + if path != "" { + return classifyByPath(path) + } + + // Safe default: treat as weight config. + return KindWeightConfig +} + +// classifyByPath classifies a file as a LayerKind based on its path/name. +func classifyByPath(path string) LayerKind { + ft := files.Classify(path) + switch ft { + case files.FileTypeGGUF, files.FileTypeSafetensors, files.FileTypeDDUF: + return KindWeight + case files.FileTypeLicense: + return KindDoc + case files.FileTypeChatTemplate: + return KindWeightConfig + case files.FileTypeConfig: + // .md files are documentation, not weight config. + if strings.ToLower(filepath.Ext(path)) == ".md" { + return KindDoc + } + return KindWeightConfig + default: + return KindWeightConfig + } +} + +// LayerKindToMediaType maps a LayerKind to the CNCF model-spec raw media type. +func LayerKindToMediaType(kind LayerKind) oci.MediaType { + switch kind { + case KindWeight: + return MediaTypeWeightRaw + case KindDoc: + return MediaTypeDocRaw + default: + return MediaTypeWeightConfigRaw + } +} + +// MapLayerMediaType returns the CNCF model-spec media type for the given +// Docker layer media type and optional filepath annotation. +func MapLayerMediaType(dockerMT oci.MediaType, path string) oci.MediaType { + return LayerKindToMediaType(ClassifyLayer(dockerMT, path)) +} + +// DockerConfigToModelPack converts a Docker-format model config into a +// CNCF ModelPack Model config. The diffIDs should already be in +// digest.Digest ("algorithm:hex") format. +func DockerConfigToModelPack( + cfg types.Config, + desc types.Descriptor, + diffIDs []digest.Digest, +) Model { + now := time.Now() + createdAt := desc.Created + if createdAt == nil { + createdAt = &now + } + return Model{ + Descriptor: ModelDescriptor{ + CreatedAt: createdAt, + // Map architecture to family as the closest available field. + Family: cfg.Architecture, + }, + Config: ModelConfig{ + Architecture: cfg.Architecture, + Format: string(cfg.Format), + ParamSize: normalizeParamSize(cfg.Parameters), + Quantization: cfg.Quantization, + }, + ModelFS: ModelFS{ + Type: "layers", + DiffIDs: diffIDs, + }, + } +} + +// normalizeParamSize converts a Docker-format parameters string (e.g. +// "8.03B", "70B") to a model-spec paramSize string (e.g. "8b", "70b"). +// Returns empty string if s is empty. +func normalizeParamSize(s string) string { + if s == "" { + return "" + } + return strings.ToLower(s) +} diff --git a/pkg/distribution/modelpack/types.go b/pkg/distribution/modelpack/types.go index 52f86b9a6..359a10533 100644 --- a/pkg/distribution/modelpack/types.go +++ b/pkg/distribution/modelpack/types.go @@ -17,6 +17,8 @@ import ( "github.com/docker/model-runner/pkg/distribution/types" "github.com/opencontainers/go-digest" + + specv1 "github.com/modelpack/model-spec/specs-go/v1" ) const ( @@ -27,17 +29,35 @@ const ( MediaTypeWeightPrefix = "application/vnd.cncf.model.weight." // MediaTypeModelConfigV1 is the CNCF model config v1 media type. - MediaTypeModelConfigV1 = "application/vnd.cncf.model.config.v1+json" + MediaTypeModelConfigV1 = specv1.MediaTypeModelConfig + + // ArtifactTypeModelManifest is the CNCF model manifest artifact type. + // Required on the manifest when producing model-spec artifacts. + ArtifactTypeModelManifest = specv1.ArtifactTypeModelManifest + + // MediaTypeWeightRaw is the CNCF model-spec media type for unarchived, + // uncompressed model weights. This is the type used by modctl and the + // official model-spec (v0.0.7+). + MediaTypeWeightRaw = specv1.MediaTypeModelWeightRaw + + // MediaTypeWeightConfigRaw is the CNCF model-spec media type for + // unarchived, uncompressed weight config files (tokenizer.json, + // config.json, chat templates, etc.). + MediaTypeWeightConfigRaw = specv1.MediaTypeModelWeightConfigRaw - // MediaTypeWeightGGUF is the CNCF ModelPack media type for GGUF weight layers. + // MediaTypeDocRaw is the CNCF model-spec media type for unarchived, + // uncompressed documentation files (README.md, LICENSE, etc.). + MediaTypeDocRaw = specv1.MediaTypeModelDocRaw + + // MediaTypeWeightGGUF is the CNCF ModelPack media type for GGUF weight + // layers. This is a DMR extension not in the official model-spec; kept + // for read-compatibility with artifacts produced by older DMR versions. MediaTypeWeightGGUF = "application/vnd.cncf.model.weight.v1.gguf" - // MediaTypeWeightSafetensors is the CNCF ModelPack media type for safetensors weight layers. + // MediaTypeWeightSafetensors is the CNCF ModelPack media type for + // safetensors weight layers. This is a DMR extension not in the official + // model-spec; kept for read-compatibility with older DMR artifacts. MediaTypeWeightSafetensors = "application/vnd.cncf.model.weight.v1.safetensors" - - // MediaTypeWeightRaw is the CNCF model-spec media type for unarchived, uncompressed model weights. - // This is the actual type used by modctl and the official model-spec (v0.0.7+). - MediaTypeWeightRaw = "application/vnd.cncf.model.weight.v1.raw" ) // IsModelPackWeightMediaType checks if the given media type is a CNCF ModelPack weight layer type. @@ -267,3 +287,10 @@ func (m *Model) GetParameters() string { func (m *Model) GetQuantization() string { return m.Config.Quantization } + +// HashToDigest converts a hash string (in "algorithm:hex" form) to a +// digest.Digest. This allows callers to pass oci.Hash.String() values +// without importing the oci package from modelpack. +func HashToDigest(hashStr string) digest.Digest { + return digest.Digest(hashStr) +} diff --git a/pkg/distribution/oci/manifest.go b/pkg/distribution/oci/manifest.go index a284f21c0..5f893bbd3 100644 --- a/pkg/distribution/oci/manifest.go +++ b/pkg/distribution/oci/manifest.go @@ -29,12 +29,16 @@ type Platform struct { // Manifest represents an OCI image manifest. type Manifest struct { - SchemaVersion int64 `json:"schemaVersion"` - MediaType MediaType `json:"mediaType,omitempty"` - Config Descriptor `json:"config"` - Layers []Descriptor `json:"layers"` - Annotations map[string]string `json:"annotations,omitempty"` - Subject *Descriptor `json:"subject,omitempty"` + SchemaVersion int64 `json:"schemaVersion"` + MediaType MediaType `json:"mediaType,omitempty"` + // ArtifactType is an optional field that identifies the artifact type. + // Required by the CNCF ModelPack spec: + // "application/vnd.cncf.model.manifest.v1+json". + ArtifactType string `json:"artifactType,omitempty"` + Config Descriptor `json:"config"` + Layers []Descriptor `json:"layers"` + Annotations map[string]string `json:"annotations,omitempty"` + Subject *Descriptor `json:"subject,omitempty"` } // IndexManifest represents an OCI image index (multi-platform manifest list). diff --git a/pkg/distribution/oci/remote/remote.go b/pkg/distribution/oci/remote/remote.go index 987412071..a908aa483 100644 --- a/pkg/distribution/oci/remote/remote.go +++ b/pkg/distribution/oci/remote/remote.go @@ -652,19 +652,62 @@ func (l *remoteLayer) Digest() (oci.Hash, error) { // DiffID returns the uncompressed layer digest. // For remote layers, we look up the diff ID from the image config. +// Supports both Docker format (rootfs.diff_ids) and CNCF ModelPack format +// (modelfs.diffIds). func (l *remoteLayer) DiffID() (oci.Hash, error) { - // Get the config file to look up the diff ID - config, err := l.image.ConfigFile() + raw, err := l.image.RawConfigFile() if err != nil { - return oci.Hash{}, fmt.Errorf("getting config file for diff ID lookup: %w", err) + return oci.Hash{}, fmt.Errorf("getting raw config for diff ID lookup: %w", err) } - // Check if the layer index is within bounds of the diff IDs - if l.index < 0 || l.index >= len(config.RootFS.DiffIDs) { - return l.desc.Digest, nil // Fallback to digest if diff ID not available + // Try to extract diffIds from the raw config generically, so we support + // both Docker format (rootfs.diff_ids) and CNCF ModelPack (modelfs.diffIds). + diffIDs, err := extractDiffIDs(raw, l.index) + if err != nil || diffIDs == (oci.Hash{}) { + // Fall back to the descriptor digest (works for uncompressed layers). + return l.desc.Digest, nil + } + return diffIDs, nil +} + +// extractDiffIDs parses a raw config blob and returns the DiffID at the given +// layer index. It tries Docker format (rootfs.diff_ids) first, then CNCF +// ModelPack format (modelfs.diffIds). +func extractDiffIDs(raw []byte, index int) (oci.Hash, error) { + // Parse as a generic map to support both config formats. + var parsed map[string]json.RawMessage + if err := json.Unmarshal(raw, &parsed); err != nil { + return oci.Hash{}, err + } + + // Try Docker format: rootfs.diff_ids + if rootfsRaw, ok := parsed["rootfs"]; ok { + var rootfs struct { + DiffIDs []oci.Hash `json:"diff_ids"` + } + if err := json.Unmarshal(rootfsRaw, &rootfs); err == nil { + if index >= 0 && index < len(rootfs.DiffIDs) { + return rootfs.DiffIDs[index], nil + } + } } - return config.RootFS.DiffIDs[l.index], nil + // Try CNCF ModelPack format: modelfs.diffIds + if modelfsRaw, ok := parsed["modelfs"]; ok { + var modelfs struct { + DiffIDs []string `json:"diffIds"` + } + if err := json.Unmarshal(modelfsRaw, &modelfs); err == nil { + if index >= 0 && index < len(modelfs.DiffIDs) { + h, err := oci.NewHash(modelfs.DiffIDs[index]) + if err == nil { + return h, nil + } + } + } + } + + return oci.Hash{}, nil } // Compressed returns the compressed layer contents. @@ -880,8 +923,15 @@ func Write(ref reference.Reference, img oci.Image, w io.Writer, opts ...Option) return fmt.Errorf("getting config name: %w", err) } + // Use the config media type from the manifest rather than a hardcoded value, + // so that both Docker-format and CNCF ModelPack artifacts are pushed + // with the correct media type. + pushManifest, err := img.Manifest() + if err != nil { + return fmt.Errorf("getting manifest for config media type: %w", err) + } configDesc := v1.Descriptor{ - MediaType: "application/vnd.docker.container.image.v1+json", + MediaType: string(pushManifest.Config.MediaType), Digest: godigest.Digest(configName.String()), Size: int64(len(rawConfig)), } From 40aa2270e74583930713ae698a441ce2db482cd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Thu, 9 Apr 2026 12:28:49 +0200 Subject: [PATCH 2/7] refactor: replace deprecated WithConfigMediaType with WithManifestOptions and clean up related code --- pkg/distribution/builder/builder.go | 3 +- pkg/distribution/builder/from_directory.go | 3 +- pkg/distribution/internal/mutate/model.go | 2 - .../internal/partial/cncf_model.go | 5 - pkg/distribution/internal/partial/model.go | 11 +- pkg/distribution/internal/partial/partial.go | 20 +-- pkg/distribution/internal/testutil/model.go | 11 +- pkg/distribution/modelpack/convert.go | 7 +- pkg/distribution/modelpack/types.go | 159 +++++------------- 9 files changed, 63 insertions(+), 158 deletions(-) diff --git a/pkg/distribution/builder/builder.go b/pkg/distribution/builder/builder.go index 18dc87dce..177456858 100644 --- a/pkg/distribution/builder/builder.go +++ b/pkg/distribution/builder/builder.go @@ -7,14 +7,13 @@ import ( "io" "time" - "github.com/opencontainers/go-digest" - "github.com/docker/model-runner/pkg/distribution/format" "github.com/docker/model-runner/pkg/distribution/internal/mutate" "github.com/docker/model-runner/pkg/distribution/internal/partial" "github.com/docker/model-runner/pkg/distribution/modelpack" "github.com/docker/model-runner/pkg/distribution/oci" "github.com/docker/model-runner/pkg/distribution/types" + "github.com/opencontainers/go-digest" ) // BuildFormat specifies the output artifact format. diff --git a/pkg/distribution/builder/from_directory.go b/pkg/distribution/builder/from_directory.go index d20982922..037fa9316 100644 --- a/pkg/distribution/builder/from_directory.go +++ b/pkg/distribution/builder/from_directory.go @@ -8,8 +8,6 @@ import ( "strings" "time" - "github.com/opencontainers/go-digest" - "github.com/docker/model-runner/pkg/distribution/files" "github.com/docker/model-runner/pkg/distribution/format" "github.com/docker/model-runner/pkg/distribution/internal/mutate" @@ -17,6 +15,7 @@ import ( "github.com/docker/model-runner/pkg/distribution/modelpack" "github.com/docker/model-runner/pkg/distribution/oci" "github.com/docker/model-runner/pkg/distribution/types" + "github.com/opencontainers/go-digest" ) const rootFSType = "rootfs" diff --git a/pkg/distribution/internal/mutate/model.go b/pkg/distribution/internal/mutate/model.go index a4e3ef711..d7214a8b2 100644 --- a/pkg/distribution/internal/mutate/model.go +++ b/pkg/distribution/internal/mutate/model.go @@ -109,8 +109,6 @@ func (m *model) GetManifestOptions() partial.ManifestOptions { var opts partial.ManifestOptions if base, ok := m.base.(partial.WithManifestOptions); ok { opts = base.GetManifestOptions() - } else if cmt, ok := m.base.(partial.WithConfigMediaType); ok { - opts.ConfigMediaType = cmt.GetConfigMediaType() } // Apply overrides set on this wrapper. if m.configMediaType != "" { diff --git a/pkg/distribution/internal/partial/cncf_model.go b/pkg/distribution/internal/partial/cncf_model.go index b393d6688..5ad1db93a 100644 --- a/pkg/distribution/internal/partial/cncf_model.go +++ b/pkg/distribution/internal/partial/cncf_model.go @@ -31,11 +31,6 @@ func (m *CNCFModel) GetManifestOptions() ManifestOptions { } } -// GetConfigMediaType implements WithConfigMediaType for backward compatibility. -func (m *CNCFModel) GetConfigMediaType() oci.MediaType { - return modelpack.MediaTypeModelConfigV1 -} - func (m *CNCFModel) Layers() ([]oci.Layer, error) { return m.LayerList, nil } diff --git a/pkg/distribution/internal/partial/model.go b/pkg/distribution/internal/partial/model.go index f2413d8f1..cdaee6f87 100644 --- a/pkg/distribution/internal/partial/model.go +++ b/pkg/distribution/internal/partial/model.go @@ -130,8 +130,11 @@ func (m *BaseModel) Descriptor() (types.Descriptor, error) { return Descriptor(m) } -// GetConfigMediaType returns the config media type for the model. -// If not set, returns empty string and ManifestForLayers will default to V0.1. -func (m *BaseModel) GetConfigMediaType() oci.MediaType { - return m.ConfigMediaType +// GetManifestOptions implements WithManifestOptions, providing the config +// media type for Docker-format models. If ConfigMediaType is not set, +// ManifestForLayers will default to V0.1. +func (m *BaseModel) GetManifestOptions() ManifestOptions { + return ManifestOptions{ + ConfigMediaType: m.ConfigMediaType, + } } diff --git a/pkg/distribution/internal/partial/partial.go b/pkg/distribution/internal/partial/partial.go index 5d7d48e35..397ba0327 100644 --- a/pkg/distribution/internal/partial/partial.go +++ b/pkg/distribution/internal/partial/partial.go @@ -301,12 +301,6 @@ func matchesMediaType(layerMT, targetMT oci.MediaType, modelFormat string) bool return false } -// WithConfigMediaType provides access to the config media type version. -// Deprecated: prefer WithManifestOptions which also carries the artifact type. -type WithConfigMediaType interface { - GetConfigMediaType() oci.MediaType -} - // ManifestOptions holds the manifest-level metadata for an artifact. type ManifestOptions struct { // ConfigMediaType is the media type of the config blob. @@ -317,29 +311,23 @@ type ManifestOptions struct { ArtifactType string } -// WithManifestOptions provides manifest assembly options. It takes precedence -// over WithConfigMediaType when both are implemented. +// WithManifestOptions provides manifest assembly options. type WithManifestOptions interface { GetManifestOptions() ManifestOptions } -// resolveManifestOptions extracts manifest options from the given object, -// checking WithManifestOptions first and falling back to WithConfigMediaType. +// resolveManifestOptions extracts manifest options from the given object +// via the WithManifestOptions interface. func resolveManifestOptions(i interface{}) ManifestOptions { if mof, ok := i.(WithManifestOptions); ok { return mof.GetManifestOptions() } - if cmt, ok := i.(WithConfigMediaType); ok { - if mt := cmt.GetConfigMediaType(); mt != "" { - return ManifestOptions{ConfigMediaType: mt} - } - } return ManifestOptions{} } // ManifestForLayers assembles an OCI manifest for the given model. The // config media type and optional artifact type are read from the model via -// the WithManifestOptions interface (or the legacy WithConfigMediaType). +// the WithManifestOptions interface. func ManifestForLayers(i WithLayers) (*oci.Manifest, error) { raw, err := i.RawConfigFile() if err != nil { diff --git a/pkg/distribution/internal/testutil/model.go b/pkg/distribution/internal/testutil/model.go index f1c5f3086..c84737a6b 100644 --- a/pkg/distribution/internal/testutil/model.go +++ b/pkg/distribution/internal/testutil/model.go @@ -143,10 +143,13 @@ func NewModelPackArtifact(t *testing.T, model modelpack.Model, layers ...LayerSp return NewArtifact(rawConfig, modelpack.MediaTypeModelConfigV1, builtLayers...) } -// GetConfigMediaType implements partial.WithConfigMediaType so that ManifestForLayers -// uses the correct config media type when building the OCI manifest. -func (a *Artifact) GetConfigMediaType() oci.MediaType { - return a.configMediaType +// GetManifestOptions implements partial.WithManifestOptions so that +// ManifestForLayers uses the correct config media type when building the +// OCI manifest. +func (a *Artifact) GetManifestOptions() partial.ManifestOptions { + return partial.ManifestOptions{ + ConfigMediaType: a.configMediaType, + } } // ID implements types.ModelArtifact. diff --git a/pkg/distribution/modelpack/convert.go b/pkg/distribution/modelpack/convert.go index 97d08a491..c74e31f09 100644 --- a/pkg/distribution/modelpack/convert.go +++ b/pkg/distribution/modelpack/convert.go @@ -32,7 +32,7 @@ const ( // 2. Filepath/annotation heuristics for ambiguous media types. // 3. Docker media type fallback. func ClassifyLayer(dockerMT oci.MediaType, path string) LayerKind { - switch dockerMT { + switch dockerMT { //nolint:exhaustive // Only Docker semantic media types are classified; OCI standard types fall through to filepath heuristics. case types.MediaTypeLicense: return KindDoc case types.MediaTypeChatTemplate, types.MediaTypeVLLMConfigArchive, types.MediaTypeModelFile: @@ -62,6 +62,8 @@ func classifyByPath(path string) LayerKind { return KindDoc case files.FileTypeChatTemplate: return KindWeightConfig + case files.FileTypeUnknown: + return KindWeightConfig case files.FileTypeConfig: // .md files are documentation, not weight config. if strings.ToLower(filepath.Ext(path)) == ".md" { @@ -80,9 +82,10 @@ func LayerKindToMediaType(kind LayerKind) oci.MediaType { return MediaTypeWeightRaw case KindDoc: return MediaTypeDocRaw - default: + case KindWeightConfig: return MediaTypeWeightConfigRaw } + return MediaTypeWeightConfigRaw } // MapLayerMediaType returns the CNCF model-spec media type for the given diff --git a/pkg/distribution/modelpack/types.go b/pkg/distribution/modelpack/types.go index 359a10533..de27f90c0 100644 --- a/pkg/distribution/modelpack/types.go +++ b/pkg/distribution/modelpack/types.go @@ -3,9 +3,9 @@ // without conversion. Both Docker and ModelPack formats are supported natively through // the types.ModelConfig interface. // -// Note: JSON tags in this package use camelCase (e.g., "createdAt", "paramSize") to match -// the CNCF ModelPack spec, which differs from Docker model-spec's snake_case convention -// (e.g., "context_size"). +// The struct types (ModelDescriptor, ModelConfig, ModelFS, ModelCapabilities) are +// re-exported directly from the official CNCF model-spec Go module so that +// serialization tags and field definitions stay in sync with the specification. // // See: https://github.com/modelpack/model-spec package modelpack @@ -13,12 +13,10 @@ package modelpack import ( "encoding/json" "strings" - "time" "github.com/docker/model-runner/pkg/distribution/types" - "github.com/opencontainers/go-digest" - specv1 "github.com/modelpack/model-spec/specs-go/v1" + "github.com/opencontainers/go-digest" ) const ( @@ -60,6 +58,40 @@ const ( MediaTypeWeightSafetensors = "application/vnd.cncf.model.weight.v1.safetensors" ) +// Type aliases re-export the canonical CNCF model-spec struct types so that +// callers use the upstream definitions (and their JSON tags) by default. +// This eliminates local struct duplication while keeping the modelpack +// package as the single import for DMR code. +type ( + // ModelDescriptor defines the general information of a model. + ModelDescriptor = specv1.ModelDescriptor + + // ModelConfig defines the execution parameters for an inference engine. + ModelConfig = specv1.ModelConfig + + // ModelFS describes the layer content addresses. + ModelFS = specv1.ModelFS + + // ModelCapabilities defines the special capabilities that the model supports. + ModelCapabilities = specv1.ModelCapabilities +) + +// Model represents the CNCF ModelPack config structure. +// It provides the `application/vnd.cncf.model.config.v1+json` mediatype when marshalled to JSON. +// +// The struct mirrors specv1.Model but is declared as its own named type so +// that it can implement the types.ModelConfig interface required by DMR. +type Model struct { + // Descriptor provides metadata about the model provenance and identity. + Descriptor ModelDescriptor `json:"descriptor"` + + // ModelFS describes the layer content addresses. + ModelFS ModelFS `json:"modelfs"` + + // Config defines the execution parameters for the model. + Config ModelConfig `json:"config,omitempty"` +} + // IsModelPackWeightMediaType checks if the given media type is a CNCF ModelPack weight layer type. // This includes both format-specific types (e.g., .gguf, .safetensors) and // format-agnostic types from the official model-spec (e.g., .raw, .tar). @@ -126,121 +158,6 @@ func IsModelPackConfig(raw []byte) bool { return false } -// Model represents the CNCF ModelPack config structure. -// It provides the `application/vnd.cncf.model.config.v1+json` mediatype when marshalled to JSON. -type Model struct { - // Descriptor provides metadata about the model provenance and identity. - Descriptor ModelDescriptor `json:"descriptor"` - - // ModelFS describes the layer content addresses. - ModelFS ModelFS `json:"modelfs"` - - // Config defines the execution parameters for the model. - Config ModelConfig `json:"config,omitempty"` -} - -// ModelDescriptor defines the general information of a model. -type ModelDescriptor struct { - // CreatedAt is the date and time on which the model was built. - CreatedAt *time.Time `json:"createdAt,omitempty"` - - // Authors contains the contact details of the people or organization responsible for the model. - Authors []string `json:"authors,omitempty"` - - // Family is the model family, such as llama3, gpt2, qwen2, etc. - Family string `json:"family,omitempty"` - - // Name is the model name, such as llama3-8b-instruct, gpt2-xl, etc. - Name string `json:"name,omitempty"` - - // DocURL is the URL to get documentation on the model. - DocURL string `json:"docURL,omitempty"` - - // SourceURL is the URL to get source code for building the model. - SourceURL string `json:"sourceURL,omitempty"` - - // DatasetsURL contains URLs referencing datasets that the model was trained upon. - DatasetsURL []string `json:"datasetsURL,omitempty"` - - // Version is the version of the packaged software. - Version string `json:"version,omitempty"` - - // Revision is the source control revision identifier for the packaged software. - Revision string `json:"revision,omitempty"` - - // Vendor is the name of the distributing entity, organization or individual. - Vendor string `json:"vendor,omitempty"` - - // Licenses contains the license(s) under which contained software is distributed - // as an SPDX License Expression. - Licenses []string `json:"licenses,omitempty"` - - // Title is the human-readable title of the model. - Title string `json:"title,omitempty"` - - // Description is the human-readable description of the software packaged in the model. - Description string `json:"description,omitempty"` -} - -// ModelConfig defines the execution parameters which should be used as a base -// when running a model using an inference engine. -type ModelConfig struct { - // Architecture is the model architecture, such as transformer, cnn, rnn, etc. - Architecture string `json:"architecture,omitempty"` - - // Format is the model format, such as gguf, safetensors, onnx, etc. - Format string `json:"format,omitempty"` - - // ParamSize is the size of the model parameters, such as "8b", "16b", "32b", etc. - ParamSize string `json:"paramSize,omitempty"` - - // Precision is the model precision, such as bf16, fp16, int8, mixed etc. - Precision string `json:"precision,omitempty"` - - // Quantization is the model quantization method, such as awq, gptq, etc. - Quantization string `json:"quantization,omitempty"` - - // Capabilities defines special capabilities that the model supports. - Capabilities *ModelCapabilities `json:"capabilities,omitempty"` -} - -// ModelCapabilities defines the special capabilities that the model supports. -type ModelCapabilities struct { - // InputTypes specifies what input modalities the model can process. - // Values can be: "text", "image", "audio", "video", "embedding", "other". - InputTypes []string `json:"inputTypes,omitempty"` - - // OutputTypes specifies what output modalities the model can produce. - // Values can be: "text", "image", "audio", "video", "embedding", "other". - OutputTypes []string `json:"outputTypes,omitempty"` - - // KnowledgeCutoff is the date of the datasets that the model was trained on. - KnowledgeCutoff *time.Time `json:"knowledgeCutoff,omitempty"` - - // Reasoning indicates whether the model can perform reasoning tasks. - Reasoning *bool `json:"reasoning,omitempty"` - - // ToolUsage indicates whether the model can use external tools. - ToolUsage *bool `json:"toolUsage,omitempty"` - - // Reward indicates whether the model is a reward model. - Reward *bool `json:"reward,omitempty"` - - // Languages indicates the languages that the model can speak. - // Encoded as ISO 639 two letter codes. For example, ["en", "fr", "zh"]. - Languages []string `json:"languages,omitempty"` -} - -// ModelFS describes the layer content addresses. -type ModelFS struct { - // Type is the type of the rootfs. MUST be set to "layers". - Type string `json:"type"` - - // DiffIDs is an array of layer content hashes (DiffIDs), - // in order from bottom-most to top-most. - DiffIDs []digest.Digest `json:"diffIds"` -} - // Ensure Model implements types.ModelConfig var _ types.ModelConfig = (*Model)(nil) From c68b9d7bad29f0ca08ebb7013b98ba0363e33400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Thu, 9 Apr 2026 13:55:23 +0200 Subject: [PATCH 3/7] fix: add format field to integration test packageOptions and regenerate docs The --format flag was added to the package command but: 1. Integration tests constructed packageOptions directly without setting format, causing validation to fail with 'invalid --format value ""' 2. Reference docs were not regenerated after adding the new flag Set format: "docker" in all three test packageOptions structs and regenerate docs with 'make docs'. --- cmd/cli/commands/integration_test.go | 3 +++ .../docs/reference/docker_model_package.yaml | 11 +++++++++ cmd/cli/docs/reference/model_package.md | 23 ++++++++++--------- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/cmd/cli/commands/integration_test.go b/cmd/cli/commands/integration_test.go index c8413d58d..b2f1c33d3 100644 --- a/cmd/cli/commands/integration_test.go +++ b/cmd/cli/commands/integration_test.go @@ -1054,6 +1054,7 @@ func TestIntegration_PackageModel(t *testing.T) { opts := packageOptions{ ggufPath: absPath, tag: targetTag, + format: "docker", } // Execute the package command using the helper function with test client @@ -1089,6 +1090,7 @@ func TestIntegration_PackageModel(t *testing.T) { ggufPath: absPath, tag: targetTag, contextSize: 4096, + format: "docker", } // Create a command for context @@ -1121,6 +1123,7 @@ func TestIntegration_PackageModel(t *testing.T) { opts := packageOptions{ ggufPath: absPath, tag: targetTag, + format: "docker", } // Create a command for context diff --git a/cmd/cli/docs/reference/docker_model_package.yaml b/cmd/cli/docs/reference/docker_model_package.yaml index ce868c1e5..7bc696c5b 100644 --- a/cmd/cli/docs/reference/docker_model_package.yaml +++ b/cmd/cli/docs/reference/docker_model_package.yaml @@ -71,6 +71,17 @@ options: experimentalcli: false kubernetes: false swarm: false + - option: format + value_type: string + default_value: docker + description: | + output artifact format: "docker" (default) or "cncf" (CNCF ModelPack spec) + deprecated: false + hidden: false + experimental: false + experimentalcli: false + kubernetes: false + swarm: false - option: from value_type: string description: reference to an existing model to repackage diff --git a/cmd/cli/docs/reference/model_package.md b/cmd/cli/docs/reference/model_package.md index ade44149b..571b77c1f 100644 --- a/cmd/cli/docs/reference/model_package.md +++ b/cmd/cli/docs/reference/model_package.md @@ -42,17 +42,18 @@ Packaging behavior: ### Options -| Name | Type | Default | Description | -|:--------------------|:--------------|:--------|:---------------------------------------------------------------------------------------| -| `--chat-template` | `string` | | absolute path to chat template file (must be Jinja format) | -| `--context-size` | `uint64` | `0` | context size in tokens | -| `--dduf` | `string` | | absolute path to DDUF archive file (Diffusers Unified Format) | -| `--from` | `string` | | reference to an existing model to repackage | -| `--gguf` | `string` | | absolute path to gguf file | -| `-l`, `--license` | `stringArray` | | absolute path to a license file | -| `--mmproj` | `string` | | absolute path to multimodal projector file | -| `--push` | `bool` | | push to registry (if not set, the model is loaded into the Model Runner content store) | -| `--safetensors-dir` | `string` | | absolute path to directory containing safetensors files and config | +| Name | Type | Default | Description | +|:--------------------|:--------------|:---------|:---------------------------------------------------------------------------------------| +| `--chat-template` | `string` | | absolute path to chat template file (must be Jinja format) | +| `--context-size` | `uint64` | `0` | context size in tokens | +| `--dduf` | `string` | | absolute path to DDUF archive file (Diffusers Unified Format) | +| `--format` | `string` | `docker` | output artifact format: "docker" (default) or "cncf" (CNCF ModelPack spec) | +| `--from` | `string` | | reference to an existing model to repackage | +| `--gguf` | `string` | | absolute path to gguf file | +| `-l`, `--license` | `stringArray` | | absolute path to a license file | +| `--mmproj` | `string` | | absolute path to multimodal projector file | +| `--push` | `bool` | | push to registry (if not set, the model is loaded into the Model Runner content store) | +| `--safetensors-dir` | `string` | | absolute path to directory containing safetensors files and config | From 9fd1811195a984ef7f0118c147994c27069789b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Thu, 9 Apr 2026 14:46:23 +0200 Subject: [PATCH 4/7] fix: enhance media type handling in descriptor retrieval and layer classification --- pkg/distribution/builder/builder.go | 16 +++++++++++----- pkg/distribution/modelpack/convert.go | 12 +++++++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/pkg/distribution/builder/builder.go b/pkg/distribution/builder/builder.go index 177456858..bee47e7bd 100644 --- a/pkg/distribution/builder/builder.go +++ b/pkg/distribution/builder/builder.go @@ -229,12 +229,18 @@ func (r *remappedLayer) GetDescriptor() oci.Descriptor { type descriptorProvider interface { GetDescriptor() oci.Descriptor } + var desc oci.Descriptor if dp, ok := r.Layer.(descriptorProvider); ok { - desc := dp.GetDescriptor() - desc.MediaType = r.newMediaType - return desc - } - return oci.Descriptor{MediaType: r.newMediaType} + desc = dp.GetDescriptor() + } else { + // Fall back to basic interface methods if the layer is not a + // descriptor provider (e.g. remoteLayer). + d, _ := r.Layer.Digest() + s, _ := r.Layer.Size() + desc = oci.Descriptor{Digest: d, Size: s} + } + desc.MediaType = r.newMediaType + return desc } // FromModel returns a *Builder that builds model artifacts from an existing diff --git a/pkg/distribution/modelpack/convert.go b/pkg/distribution/modelpack/convert.go index c74e31f09..47d708c09 100644 --- a/pkg/distribution/modelpack/convert.go +++ b/pkg/distribution/modelpack/convert.go @@ -32,15 +32,21 @@ const ( // 2. Filepath/annotation heuristics for ambiguous media types. // 3. Docker media type fallback. func ClassifyLayer(dockerMT oci.MediaType, path string) LayerKind { - switch dockerMT { //nolint:exhaustive // Only Docker semantic media types are classified; OCI standard types fall through to filepath heuristics. - case types.MediaTypeLicense: + switch dockerMT { //nolint:exhaustive // Only Docker and CNCF semantic media types are classified; OCI standard types fall through to filepath heuristics. + // Docker-format documentation types. + case types.MediaTypeLicense, MediaTypeDocRaw: return KindDoc - case types.MediaTypeChatTemplate, types.MediaTypeVLLMConfigArchive, types.MediaTypeModelFile: + // Docker-format weight config types. + case types.MediaTypeChatTemplate, types.MediaTypeVLLMConfigArchive, types.MediaTypeModelFile, MediaTypeWeightConfigRaw: return KindWeightConfig + // Docker-format weight types. case types.MediaTypeMultimodalProjector: return KindWeight case types.MediaTypeGGUF, types.MediaTypeSafetensors, types.MediaTypeDDUF: return KindWeight + // CNCF model-spec weight types (including legacy typed media types). + case MediaTypeWeightRaw, MediaTypeWeightGGUF, MediaTypeWeightSafetensors: + return KindWeight } // Use filepath heuristics for ambiguous or unknown media types. From d1e04b123f8737b5c536f2daa1047fa87a80dec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Thu, 9 Apr 2026 15:45:49 +0200 Subject: [PATCH 5/7] test: add CNCF format tests for artifact validation and media type checks --- pkg/distribution/builder/builder_test.go | 333 +++++++++++++++++++++++ 1 file changed, 333 insertions(+) diff --git a/pkg/distribution/builder/builder_test.go b/pkg/distribution/builder/builder_test.go index 6e4ff4d67..a10311b43 100644 --- a/pkg/distribution/builder/builder_test.go +++ b/pkg/distribution/builder/builder_test.go @@ -2,6 +2,7 @@ package builder_test import ( "context" + "encoding/json" "fmt" "io" "path/filepath" @@ -11,6 +12,8 @@ import ( "github.com/docker/model-runner/pkg/distribution/builder" "github.com/docker/model-runner/pkg/distribution/internal/testutil" + "github.com/docker/model-runner/pkg/distribution/modelpack" + "github.com/docker/model-runner/pkg/distribution/oci" "github.com/docker/model-runner/pkg/distribution/types" ) @@ -422,6 +425,336 @@ func TestFromModelErrorHandling(t *testing.T) { } } +// TestFromPathCNCFFormat verifies that FromPath with WithFormat(BuildFormatCNCF) produces +// a valid CNCF ModelPack artifact with correct media types, artifact type, and config. +func TestFromPathCNCFFormat(t *testing.T) { + ggufPath := filepath.Join("..", "assets", "dummy.gguf") + fixedTime := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC) + + b, err := builder.FromPath(ggufPath, + builder.WithFormat(builder.BuildFormatCNCF), + builder.WithCreated(fixedTime), + ) + if err != nil { + t.Fatalf("FromPath with CNCF format failed: %v", err) + } + + target := &fakeTarget{} + if err := b.Build(t.Context(), target, nil); err != nil { + t.Fatalf("Build failed: %v", err) + } + + // 1. Verify manifest has CNCF artifact type. + manifest, err := target.artifact.Manifest() + if err != nil { + t.Fatalf("Failed to get manifest: %v", err) + } + if manifest.ArtifactType != modelpack.ArtifactTypeModelManifest { + t.Errorf("Expected artifactType %q, got %q", + modelpack.ArtifactTypeModelManifest, manifest.ArtifactType) + } + + // 2. Verify config media type is CNCF model config. + if manifest.Config.MediaType != oci.MediaType(modelpack.MediaTypeModelConfigV1) { + t.Errorf("Expected config media type %q, got %q", + modelpack.MediaTypeModelConfigV1, manifest.Config.MediaType) + } + + // 3. Verify all layers have CNCF media types (not Docker media types). + for i, layer := range manifest.Layers { + mt := string(layer.MediaType) + if !strings.HasPrefix(mt, modelpack.MediaTypePrefix) { + t.Errorf("Layer %d has non-CNCF media type %q (expected prefix %q)", + i, mt, modelpack.MediaTypePrefix) + } + } + + // 4. Verify the weight layer specifically uses the CNCF weight media type. + if len(manifest.Layers) == 0 { + t.Fatal("Expected at least one layer") + } + weightMT := manifest.Layers[0].MediaType + if weightMT != oci.MediaType(modelpack.MediaTypeWeightRaw) { + t.Errorf("Expected weight layer media type %q, got %q", + modelpack.MediaTypeWeightRaw, weightMT) + } + + // 5. Verify the raw config is valid ModelPack JSON with correct fields. + rawCfg, err := target.artifact.RawConfigFile() + if err != nil { + t.Fatalf("Failed to get raw config: %v", err) + } + var mp modelpack.Model + if err := json.Unmarshal(rawCfg, &mp); err != nil { + t.Fatalf("Failed to unmarshal CNCF config: %v", err) + } + if mp.Config.Format != "gguf" { + t.Errorf("Expected config.format %q, got %q", "gguf", mp.Config.Format) + } + if mp.ModelFS.Type != "layers" { + t.Errorf("Expected modelfs.type %q, got %q", "layers", mp.ModelFS.Type) + } + if len(mp.ModelFS.DiffIDs) == 0 { + t.Error("Expected at least one diffId in modelfs") + } + if mp.Descriptor.CreatedAt == nil { + t.Error("Expected descriptor.createdAt to be set") + } else if !mp.Descriptor.CreatedAt.Equal(fixedTime) { + t.Errorf("Expected descriptor.createdAt %v, got %v", fixedTime, *mp.Descriptor.CreatedAt) + } + + // 6. Verify the JSON tags are camelCase (spec-compliant). + var rawMap map[string]json.RawMessage + if err := json.Unmarshal(rawCfg, &rawMap); err != nil { + t.Fatalf("Failed to unmarshal config to map: %v", err) + } + // Must have "modelfs" (not "model_fs"). + if _, ok := rawMap["modelfs"]; !ok { + t.Error("Config JSON missing 'modelfs' key") + } + // Verify modelfs contains "diffIds" (camelCase, not "diff_ids"). + if modelfsRaw, ok := rawMap["modelfs"]; ok { + var modelfsMap map[string]json.RawMessage + if err := json.Unmarshal(modelfsRaw, &modelfsMap); err != nil { + t.Fatalf("Failed to unmarshal modelfs: %v", err) + } + if _, ok := modelfsMap["diffIds"]; !ok { + t.Error("modelfs JSON missing 'diffIds' key (expected camelCase)") + } + if _, ok := modelfsMap["diff_ids"]; ok { + t.Error("modelfs JSON has 'diff_ids' (snake_case) — should be 'diffIds' (camelCase)") + } + } + // Verify config contains "paramSize" (not "param_size"). + if configRaw, ok := rawMap["config"]; ok { + var configMap map[string]json.RawMessage + if err := json.Unmarshal(configRaw, &configMap); err != nil { + t.Fatalf("Failed to unmarshal config section: %v", err) + } + if _, ok := configMap["param_size"]; ok { + t.Error("config JSON has 'param_size' (snake_case) — should be 'paramSize' (camelCase)") + } + } +} + +// TestFromPathCNCFWithAdditionalLayers verifies that additional layers added +// to a CNCF builder get CNCF media types, not Docker media types. +func TestFromPathCNCFWithAdditionalLayers(t *testing.T) { + ggufPath := filepath.Join("..", "assets", "dummy.gguf") + + b, err := builder.FromPath(ggufPath, builder.WithFormat(builder.BuildFormatCNCF)) + if err != nil { + t.Fatalf("FromPath failed: %v", err) + } + + // Add license + b, err = b.WithLicense(filepath.Join("..", "assets", "license.txt")) + if err != nil { + t.Fatalf("Failed to add license: %v", err) + } + + // Add multimodal projector + b, err = b.WithMultimodalProjector(filepath.Join("..", "assets", "dummy.mmproj")) + if err != nil { + t.Fatalf("Failed to add multimodal projector: %v", err) + } + + // Add chat template + b, err = b.WithChatTemplateFile(filepath.Join("..", "assets", "template.jinja")) + if err != nil { + t.Fatalf("Failed to add chat template: %v", err) + } + + target := &fakeTarget{} + if err := b.Build(t.Context(), target, nil); err != nil { + t.Fatalf("Build failed: %v", err) + } + + manifest, err := target.artifact.Manifest() + if err != nil { + t.Fatalf("Failed to get manifest: %v", err) + } + + // Should have 4 layers: weight + license + mmproj + chat template + if len(manifest.Layers) != 4 { + t.Fatalf("Expected 4 layers, got %d", len(manifest.Layers)) + } + + // ALL layers must have CNCF media type prefix. + for i, layer := range manifest.Layers { + mt := string(layer.MediaType) + if !strings.HasPrefix(mt, modelpack.MediaTypePrefix) { + t.Errorf("Layer %d has non-CNCF media type %q", i, mt) + } + } + + // No Docker media types should appear. + dockerMTs := []oci.MediaType{ + types.MediaTypeGGUF, + types.MediaTypeLicense, + types.MediaTypeMultimodalProjector, + types.MediaTypeChatTemplate, + } + for _, layer := range manifest.Layers { + for _, dmt := range dockerMTs { + if layer.MediaType == dmt { + t.Errorf("Found Docker media type %q in CNCF artifact", dmt) + } + } + } +} + +// TestFromPathCNCFContextSizeError verifies that WithContextSize returns an error +// when the output format is CNCF (context size is not in the CNCF spec). +func TestFromPathCNCFContextSizeError(t *testing.T) { + ggufPath := filepath.Join("..", "assets", "dummy.gguf") + + b, err := builder.FromPath(ggufPath, builder.WithFormat(builder.BuildFormatCNCF)) + if err != nil { + t.Fatalf("FromPath failed: %v", err) + } + + _, err = b.WithContextSize(4096) + if err == nil { + t.Fatal("Expected error when setting context size with CNCF format, got nil") + } + if !strings.Contains(err.Error(), "--context-size is not supported") { + t.Errorf("Expected error about context-size not supported, got: %v", err) + } +} + +// TestFromModelToCNCF verifies that FromModel with WithFormat(BuildFormatCNCF) correctly +// converts a Docker-format model to CNCF ModelPack format. +func TestFromModelToCNCF(t *testing.T) { + // Step 1: Create a Docker-format model with a license layer. + dockerBuilder, err := builder.FromPath(filepath.Join("..", "assets", "dummy.gguf")) + if err != nil { + t.Fatalf("FromPath failed: %v", err) + } + dockerBuilder, err = dockerBuilder.WithLicense(filepath.Join("..", "assets", "license.txt")) + if err != nil { + t.Fatalf("WithLicense failed: %v", err) + } + + dockerTarget := &fakeTarget{} + if err := dockerBuilder.Build(t.Context(), dockerTarget, nil); err != nil { + t.Fatalf("Build Docker model failed: %v", err) + } + + // Verify the Docker model has Docker media types. + dockerManifest, err := dockerTarget.artifact.Manifest() + if err != nil { + t.Fatalf("Failed to get Docker manifest: %v", err) + } + for _, layer := range dockerManifest.Layers { + if strings.HasPrefix(string(layer.MediaType), modelpack.MediaTypePrefix) { + t.Fatalf("Docker model should not have CNCF media types, found %q", layer.MediaType) + } + } + + // Step 2: Convert Docker model to CNCF format. + cncfBuilder, err := builder.FromModel(dockerTarget.artifact, builder.WithFormat(builder.BuildFormatCNCF)) + if err != nil { + t.Fatalf("FromModel with CNCF format failed: %v", err) + } + + cncfTarget := &fakeTarget{} + if err := cncfBuilder.Build(t.Context(), cncfTarget, nil); err != nil { + t.Fatalf("Build CNCF model failed: %v", err) + } + + // Step 3: Verify the CNCF model. + cncfManifest, err := cncfTarget.artifact.Manifest() + if err != nil { + t.Fatalf("Failed to get CNCF manifest: %v", err) + } + + // Artifact type must be set. + if cncfManifest.ArtifactType != modelpack.ArtifactTypeModelManifest { + t.Errorf("Expected artifactType %q, got %q", + modelpack.ArtifactTypeModelManifest, cncfManifest.ArtifactType) + } + + // Config media type must be CNCF. + if cncfManifest.Config.MediaType != oci.MediaType(modelpack.MediaTypeModelConfigV1) { + t.Errorf("Expected config media type %q, got %q", + modelpack.MediaTypeModelConfigV1, cncfManifest.Config.MediaType) + } + + // Same number of layers must be preserved. + if len(cncfManifest.Layers) != len(dockerManifest.Layers) { + t.Fatalf("Expected %d layers, got %d", len(dockerManifest.Layers), len(cncfManifest.Layers)) + } + + // All layers must have CNCF media types. + for i, layer := range cncfManifest.Layers { + mt := string(layer.MediaType) + if !strings.HasPrefix(mt, modelpack.MediaTypePrefix) { + t.Errorf("Layer %d has non-CNCF media type %q after conversion", i, mt) + } + } + + // Layer digests should be preserved (same content, different media type). + for i := range dockerManifest.Layers { + if dockerManifest.Layers[i].Digest != cncfManifest.Layers[i].Digest { + t.Errorf("Layer %d digest changed after conversion: %v → %v", + i, dockerManifest.Layers[i].Digest, cncfManifest.Layers[i].Digest) + } + } + + // Config should have the model architecture and format. + cfg, err := cncfTarget.artifact.Config() + if err != nil { + t.Fatalf("Failed to get config: %v", err) + } + if cfg.GetFormat() != types.FormatGGUF { + t.Errorf("Expected format %q, got %q", types.FormatGGUF, cfg.GetFormat()) + } +} + +// TestFromPathCNCFDeterministicDigest verifies that CNCF format builds +// with the same inputs produce the same digests. +func TestFromPathCNCFDeterministicDigest(t *testing.T) { + ggufPath := filepath.Join("..", "assets", "dummy.gguf") + fixedTime := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC) + + b1, err := builder.FromPath(ggufPath, + builder.WithFormat(builder.BuildFormatCNCF), + builder.WithCreated(fixedTime), + ) + if err != nil { + t.Fatalf("FromPath (first) failed: %v", err) + } + b2, err := builder.FromPath(ggufPath, + builder.WithFormat(builder.BuildFormatCNCF), + builder.WithCreated(fixedTime), + ) + if err != nil { + t.Fatalf("FromPath (second) failed: %v", err) + } + + target1 := &fakeTarget{} + target2 := &fakeTarget{} + if err := b1.Build(t.Context(), target1, nil); err != nil { + t.Fatalf("Build (first) failed: %v", err) + } + if err := b2.Build(t.Context(), target2, nil); err != nil { + t.Fatalf("Build (second) failed: %v", err) + } + + digest1, err := target1.artifact.Digest() + if err != nil { + t.Fatalf("Digest (first) failed: %v", err) + } + digest2, err := target2.artifact.Digest() + if err != nil { + t.Fatalf("Digest (second) failed: %v", err) + } + if digest1 != digest2 { + t.Errorf("Expected identical digests for CNCF format with same inputs, got %v and %v", digest1, digest2) + } +} + var _ builder.Target = &fakeTarget{} type fakeTarget struct { From 0843b41284f519669170c23f789cd469a5741ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Fri, 10 Apr 2026 09:46:17 +0200 Subject: [PATCH 6/7] test: add integration tests for CNCF format packaging and diff ID extraction --- cmd/cli/commands/integration_test.go | 28 +++ pkg/distribution/builder/from_directory.go | 2 + pkg/distribution/modelpack/convert.go | 9 +- .../oci/remote/extract_diffids_test.go | 195 ++++++++++++++++++ 4 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 pkg/distribution/oci/remote/extract_diffids_test.go diff --git a/cmd/cli/commands/integration_test.go b/cmd/cli/commands/integration_test.go index b2f1c33d3..413be2fc5 100644 --- a/cmd/cli/commands/integration_test.go +++ b/cmd/cli/commands/integration_test.go @@ -1146,6 +1146,34 @@ func TestIntegration_PackageModel(t *testing.T) { require.NoError(t, err, "Failed to remove model") }) + // Test case 4: Package with CNCF format + t.Run("package GGUF with CNCF format", func(t *testing.T) { + targetTag := "ai/packaged-cncf:latest" + + // Create package options with CNCF format + opts := packageOptions{ + ggufPath: absPath, + tag: targetTag, + format: "cncf", + } + + // Execute the package command using the helper function with test client + t.Logf("Packaging GGUF file as CNCF format %s", targetTag) + err := packageModel(env.ctx, newPackagedCmd(), env.client, opts) + require.NoError(t, err, "Failed to package GGUF model with CNCF format") + + // Verify the model was loaded and tagged + model, err := env.client.Inspect(targetTag, false) + require.NoError(t, err, "Failed to inspect CNCF packaged model") + require.Contains(t, model.Tags, normalizeRef(t, targetTag), "Model should have the expected tag") + + t.Logf("✓ Successfully packaged model with CNCF format: %s (ID: %s)", targetTag, model.ID[7:19]) + + // Cleanup + err = removeModel(env.client, model.ID, true) + require.NoError(t, err, "Failed to remove model") + }) + // Verify all models are cleaned up models, err = listModels(false, env.client, true, false, "") require.NoError(t, err) diff --git a/pkg/distribution/builder/from_directory.go b/pkg/distribution/builder/from_directory.go index 037fa9316..d8ef68929 100644 --- a/pkg/distribution/builder/from_directory.go +++ b/pkg/distribution/builder/from_directory.go @@ -68,6 +68,8 @@ func WithCreatedTime(t time.Time) DirectoryOption { } // WithOutputFormat sets the output artifact format for the directory builder. +// Defaults to BuildFormatDocker if not specified. +// This is the DirectoryOption equivalent of WithFormat (BuildOption). func WithOutputFormat(f BuildFormat) DirectoryOption { return func(opts *DirectoryOptions) { opts.Format = f diff --git a/pkg/distribution/modelpack/convert.go b/pkg/distribution/modelpack/convert.go index 47d708c09..b4a8cdb1b 100644 --- a/pkg/distribution/modelpack/convert.go +++ b/pkg/distribution/modelpack/convert.go @@ -54,7 +54,14 @@ func ClassifyLayer(dockerMT oci.MediaType, path string) LayerKind { return classifyByPath(path) } - // Safe default: treat as weight config. + // Default: treat unknown media types (without filepath hints) as weight + // config. This is intentional for the directory-based packaging flow + // where ambiguous files (tokenizer.json, config.json, etc.) are common + // and typically carry configuration rather than model weights. All known + // weight media types — both Docker (MediaTypeGGUF, MediaTypeSafetensors, + // etc.) and CNCF (MediaTypeWeightRaw, etc.) — are handled explicitly in + // the switch above, so this fallback only triggers for truly unrecognized + // media types. return KindWeightConfig } diff --git a/pkg/distribution/oci/remote/extract_diffids_test.go b/pkg/distribution/oci/remote/extract_diffids_test.go new file mode 100644 index 000000000..bffc88d4b --- /dev/null +++ b/pkg/distribution/oci/remote/extract_diffids_test.go @@ -0,0 +1,195 @@ +package remote + +import ( + "encoding/json" + "testing" + + "github.com/docker/model-runner/pkg/distribution/oci" +) + +// Valid 64-char hex strings for SHA256 test hashes. +const ( + hexA = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + hexB = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + hexC = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" + hex1 = "1111111111111111111111111111111111111111111111111111111111111111" + hex2 = "2222222222222222222222222222222222222222222222222222222222222222" +) + +func TestExtractDiffIDs_DockerFormat(t *testing.T) { + config := map[string]interface{}{ + "rootfs": map[string]interface{}{ + "type": "rootfs", + "diff_ids": []string{"sha256:" + hexA, "sha256:" + hexB, "sha256:" + hexC}, + }, + } + raw, err := json.Marshal(config) + if err != nil { + t.Fatalf("marshal config: %v", err) + } + + tests := []struct { + name string + index int + wantHex string + wantOk bool + }{ + {"first layer", 0, hexA, true}, + {"second layer", 1, hexB, true}, + {"last layer", 2, hexC, true}, + {"index out of bounds", 3, "", false}, + {"negative index", -1, "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + h, err := extractDiffIDs(raw, tt.index) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if tt.wantOk { + if h == (oci.Hash{}) { + t.Fatal("expected non-zero hash, got zero") + } + if h.Hex != tt.wantHex { + t.Errorf("expected hex %q, got %q", tt.wantHex, h.Hex) + } + } else { + if h != (oci.Hash{}) { + t.Errorf("expected zero hash, got %v", h) + } + } + }) + } +} + +func TestExtractDiffIDs_CNCFModelPackFormat(t *testing.T) { + config := map[string]interface{}{ + "modelfs": map[string]interface{}{ + "type": "layers", + "diffIds": []string{"sha256:" + hex1, "sha256:" + hex2}, + }, + } + raw, err := json.Marshal(config) + if err != nil { + t.Fatalf("marshal config: %v", err) + } + + tests := []struct { + name string + index int + wantHex string + wantOk bool + }{ + {"first layer", 0, hex1, true}, + {"second layer", 1, hex2, true}, + {"index out of bounds", 2, "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + h, err := extractDiffIDs(raw, tt.index) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if tt.wantOk { + if h == (oci.Hash{}) { + t.Fatal("expected non-zero hash, got zero") + } + if h.Hex != tt.wantHex { + t.Errorf("expected hex %q, got %q", tt.wantHex, h.Hex) + } + } else { + if h != (oci.Hash{}) { + t.Errorf("expected zero hash, got %v", h) + } + } + }) + } +} + +func TestExtractDiffIDs_DockerTakesPrecedence(t *testing.T) { + // When both rootfs and modelfs are present, Docker format should win. + config := map[string]interface{}{ + "rootfs": map[string]interface{}{ + "type": "rootfs", + "diff_ids": []string{"sha256:" + hexA}, + }, + "modelfs": map[string]interface{}{ + "type": "layers", + "diffIds": []string{"sha256:" + hex1}, + }, + } + raw, err := json.Marshal(config) + if err != nil { + t.Fatalf("marshal config: %v", err) + } + + h, err := extractDiffIDs(raw, 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if h.Hex != hexA { + t.Errorf("expected Docker format to take precedence (hex %q), got %q", hexA, h.Hex) + } +} + +func TestExtractDiffIDs_EmptyConfig(t *testing.T) { + raw := []byte(`{}`) + h, err := extractDiffIDs(raw, 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if h != (oci.Hash{}) { + t.Errorf("expected zero hash for empty config, got %v", h) + } +} + +func TestExtractDiffIDs_InvalidJSON(t *testing.T) { + raw := []byte(`not valid json`) + _, err := extractDiffIDs(raw, 0) + if err == nil { + t.Fatal("expected error for invalid JSON, got nil") + } +} + +func TestExtractDiffIDs_MalformedRootFS(t *testing.T) { + // rootfs exists but is not an object — should fall through gracefully. + config := map[string]interface{}{ + "rootfs": "not an object", + } + raw, err := json.Marshal(config) + if err != nil { + t.Fatalf("marshal config: %v", err) + } + + h, err := extractDiffIDs(raw, 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if h != (oci.Hash{}) { + t.Errorf("expected zero hash for malformed rootfs, got %v", h) + } +} + +func TestExtractDiffIDs_MalformedModelFS(t *testing.T) { + // modelfs exists but diffIds contains invalid hashes (not valid SHA256). + config := map[string]interface{}{ + "modelfs": map[string]interface{}{ + "type": "layers", + "diffIds": []string{"not-a-valid-hash"}, + }, + } + raw, err := json.Marshal(config) + if err != nil { + t.Fatalf("marshal config: %v", err) + } + + h, err := extractDiffIDs(raw, 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if h != (oci.Hash{}) { + t.Errorf("expected zero hash for malformed modelfs hash, got %v", h) + } +} From b62754dd068182707e875533d6a1f15207730c02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Fri, 10 Apr 2026 10:49:11 +0200 Subject: [PATCH 7/7] fix: update CNCF model detection to use artifactType field in manifest --- pkg/distribution/internal/bundle/unpack.go | 4 +++- pkg/distribution/internal/bundle/unpack_test.go | 7 ++++++- pkg/distribution/internal/partial/partial.go | 8 ++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pkg/distribution/internal/bundle/unpack.go b/pkg/distribution/internal/bundle/unpack.go index f1bec8e61..0a436a9e6 100644 --- a/pkg/distribution/internal/bundle/unpack.go +++ b/pkg/distribution/internal/bundle/unpack.go @@ -40,6 +40,8 @@ func isV02Model(model types.ModelArtifact) bool { } // isCNCFModel checks if the model was packaged using the CNCF ModelPack format. +// Detection uses the manifest's artifactType field, which is required by the +// CNCF model-spec ("application/vnd.cncf.model.manifest.v1+json"). // CNCF ModelPack uses a layer-per-file approach with filepath annotations, // similar to V0.2, so it can be unpacked using UnpackFromLayers. func isCNCFModel(model types.ModelArtifact) bool { @@ -47,7 +49,7 @@ func isCNCFModel(model types.ModelArtifact) bool { if err != nil { return false } - return manifest.Config.MediaType == modelpack.MediaTypeModelConfigV1 + return manifest.ArtifactType == modelpack.ArtifactTypeModelManifest } // unpackLegacy is the original V0.1 unpacking logic that uses model.GGUFPaths(), model.SafetensorsPaths(), etc. diff --git a/pkg/distribution/internal/bundle/unpack_test.go b/pkg/distribution/internal/bundle/unpack_test.go index 61c49d32e..114b1c2d1 100644 --- a/pkg/distribution/internal/bundle/unpack_test.go +++ b/pkg/distribution/internal/bundle/unpack_test.go @@ -235,11 +235,13 @@ func TestIsCNCFModel(t *testing.T) { tests := []struct { name string configMediaType oci.MediaType + artifactType string expected bool }{ { name: "CNCF ModelPack config V1", configMediaType: modelpack.MediaTypeModelConfigV1, + artifactType: modelpack.ArtifactTypeModelManifest, expected: true, }, { @@ -256,9 +258,10 @@ func TestIsCNCFModel(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Create a minimal artifact with the given config media type + // Create a minimal artifact with the given config media type and artifact type artifact := &testArtifactWithConfigMediaType{ configMediaType: tt.configMediaType, + artifactType: tt.artifactType, } result := isCNCFModel(artifact) if result != tt.expected { @@ -271,10 +274,12 @@ func TestIsCNCFModel(t *testing.T) { // testArtifactWithConfigMediaType is a minimal ModelArtifact for testing isCNCFModel/isV02Model. type testArtifactWithConfigMediaType struct { configMediaType oci.MediaType + artifactType string } func (a *testArtifactWithConfigMediaType) Manifest() (*oci.Manifest, error) { return &oci.Manifest{ + ArtifactType: a.artifactType, Config: oci.Descriptor{ MediaType: a.configMediaType, }, diff --git a/pkg/distribution/internal/partial/partial.go b/pkg/distribution/internal/partial/partial.go index 397ba0327..ca73bfb48 100644 --- a/pkg/distribution/internal/partial/partial.go +++ b/pkg/distribution/internal/partial/partial.go @@ -357,6 +357,10 @@ func ManifestForLayers(i WithLayers) (*oci.Manifest, error) { return nil, fmt.Errorf("get layers: %w", err) } + type descriptorProvider interface { + GetDescriptor() oci.Descriptor + } + var layers []oci.Descriptor for _, l := range ls { // Check if this is our Layer type which embeds the full descriptor @@ -364,6 +368,10 @@ func ManifestForLayers(i WithLayers) (*oci.Manifest, error) { if layer, ok := l.(*Layer); ok { // Use the embedded descriptor directly to preserve annotations. layers = append(layers, layer.Descriptor) + } else if dp, ok := l.(descriptorProvider); ok { + // Use GetDescriptor() to preserve annotations from wrapper + // types like remappedLayer. + layers = append(layers, dp.GetDescriptor()) } else { // Fall back to computing descriptor for other layer types. mt, err := l.MediaType()