diff --git a/baseten/client/modelconfig/__init__.py b/baseten/client/modelconfig/__init__.py new file mode 100644 index 0000000..621e1c0 --- /dev/null +++ b/baseten/client/modelconfig/__init__.py @@ -0,0 +1,134 @@ +# Code generated by apigen. DO NOT EDIT. + +"""Generated models for the Baseten model config (truss config.yaml). + +Types in this module are generated from the Truss config JSON schema and are +NOT covered by any stability or compatibility guarantees. They may change +without notice between versions. +""" + +from ._models import ( + AcceleratorSpec, + AdditionalAutoscalingConfig, + AutoscalingMetric, + BISLLM, + BaseImage, + Build, + CacheInternal, + CheckpointList, + CheckpointRepository, + CheckpointSource, + DockerAuthSettings, + DockerAuthType, + DockerServer, + ExternalData, + ExternalDataItem, + GRPCOptions, + HTTPOptions, + HealthChecks, + KvCacheHostMemoryBytes, + LookaheadNgramSize, + LookaheadVerificationSetSize, + LookaheadWindowsSize, + MaxSeqLen, + ModelCache, + ModelConfig, + ModelMetadata, + ModelRepo, + ModelRepoCacheInternal, + ModelRepoSourceKind, + ModelServer, + ModelSpecDecMode, + ModelSpeculatorConfiguration, + ModelTRTLLMBatchSchedulerPolicy, + ModelTRTLLMBuildConfiguration, + ModelTRTLLMLoraConfiguration, + ModelTRTLLMModel, + ModelTRTLLMPluginConfiguration, + ModelTRTLLMQuantizationType, + ModelTRTLLMRuntimeConfiguration, + ModelTRTQuantizationConfiguration, + NodeCount, + NumBuilderGpus, + NumDraftTokens, + RemoteSSH, + RequestDefaultMaxTokens, + Resources, + Runtime, + TRTLLMConfiguration, + TRTLLMConfigurationV1, + TRTLLMConfigurationV2, + TRTLLMRuntimeConfigurationV2, + TrainingArtifactReference, + VersionsOverrides, + VolumeFolder, + WebserverDefaultRoute, + WebsocketOptions, + Weights, + WeightsAuth, + WeightsAuthMethod, + WeightsSource, +) + +__all__ = [ + "AcceleratorSpec", + "AdditionalAutoscalingConfig", + "AutoscalingMetric", + "BISLLM", + "BaseImage", + "Build", + "CacheInternal", + "CheckpointList", + "CheckpointRepository", + "CheckpointSource", + "DockerAuthSettings", + "DockerAuthType", + "DockerServer", + "ExternalData", + "ExternalDataItem", + "GRPCOptions", + "HTTPOptions", + "HealthChecks", + "KvCacheHostMemoryBytes", + "LookaheadNgramSize", + "LookaheadVerificationSetSize", + "LookaheadWindowsSize", + "MaxSeqLen", + "ModelCache", + "ModelConfig", + "ModelMetadata", + "ModelRepo", + "ModelRepoCacheInternal", + "ModelRepoSourceKind", + "ModelServer", + "ModelSpecDecMode", + "ModelSpeculatorConfiguration", + "ModelTRTLLMBatchSchedulerPolicy", + "ModelTRTLLMBuildConfiguration", + "ModelTRTLLMLoraConfiguration", + "ModelTRTLLMModel", + "ModelTRTLLMPluginConfiguration", + "ModelTRTLLMQuantizationType", + "ModelTRTLLMRuntimeConfiguration", + "ModelTRTQuantizationConfiguration", + "NodeCount", + "NumBuilderGpus", + "NumDraftTokens", + "RemoteSSH", + "RequestDefaultMaxTokens", + "Resources", + "Runtime", + "TRTLLMConfiguration", + "TRTLLMConfigurationV1", + "TRTLLMConfigurationV2", + "TRTLLMRuntimeConfigurationV2", + "TrainingArtifactReference", + "VersionsOverrides", + "VolumeFolder", + "WebserverDefaultRoute", + "WebsocketOptions", + "Weights", + "WeightsAuth", + "WeightsAuthMethod", + "WeightsSource", +] diff --git a/baseten/client/modelconfig/_models.py b/baseten/client/modelconfig/_models.py new file mode 100644 index 0000000..f85ab6c --- /dev/null +++ b/baseten/client/modelconfig/_models.py @@ -0,0 +1,1211 @@ +# generated by datamodel-codegen: +# filename: + +from __future__ import annotations +from typing import Annotated, Any, Literal +from pydantic import BaseModel, ConfigDict, Field, RootModel +from enum import Enum + + +class ModelMetadata(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + example_model_input: Annotated[ + Any | None, + Field( + description="Sample input that populates the Baseten playground.", + examples=[{"prompt": "What is the meaning of life?"}], + ), + ] = None + + +class AcceleratorSpec(RootModel[str | None]): + root: str | None + + +class AutoscalingMetric(BaseModel): + name: Annotated[str, Field(title="Name")] + target: Annotated[float, Field(title="Target")] + + +class CheckpointSource(Enum): + HF = "HF" + GCS = "GCS" + S3 = "S3" + AZURE = "AZURE" + LOCAL = "LOCAL" + REMOTE_URL = "REMOTE_URL" + BASETEN_TRAINING = "BASETEN_TRAINING" + + +class DockerAuthType(Enum): + GCP_SERVICE_ACCOUNT_JSON = "GCP_SERVICE_ACCOUNT_JSON" + AWS_IAM = "AWS_IAM" + AWS_OIDC = "AWS_OIDC" + GCP_OIDC = "GCP_OIDC" + REGISTRY_SECRET = "REGISTRY_SECRET" + + +class DockerServer(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + start_command: Annotated[ + str | None, + Field( + description="The command to start the server. Required when no_build is not true.", + title="Start Command", + ), + ] = None + server_port: Annotated[ + int, + Field( + description="The port where the server runs. Port 8080 is reserved by Baseten's internal reverse proxy and cannot be used.", + title="Server Port", + ), + ] + predict_endpoint: Annotated[ + str, + Field( + description="The endpoint for inference requests. This is mapped to Baseten's /predict route.", + title="Predict Endpoint", + ), + ] + readiness_endpoint: Annotated[ + str, + Field( + description="The endpoint for readiness probes. Determines when the container can accept traffic.", + title="Readiness Endpoint", + ), + ] + liveness_endpoint: Annotated[ + str, + Field( + description="The endpoint for liveness probes. Determines if the container needs to be restarted.", + title="Liveness Endpoint", + ), + ] + run_as_user_id: Annotated[ + int | None, + Field( + description="The Linux UID to run the server process as inside the container. Use this when your base image expects a specific non-root user (for example, NVIDIA NIM containers).", + title="Run As User Id", + ), + ] = None + no_build: Annotated[ + bool | None, + Field( + description="Skip the build step and deploy the base image as-is. Baseten copies the image to its container registry without running docker build or modifying the image in any way.", + title="No Build", + ), + ] = None + + +class ExternalDataItem(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + url: Annotated[ + str, + Field( + description="URL to download the data from. Currently only files are allowed.", + min_length=1, + title="Url", + ), + ] + local_data_path: Annotated[ + str, + Field( + description="Path relative to the data directory where the remote file will be downloaded.", + min_length=1, + title="Local Data Path", + ), + ] + backend: Annotated[ + str | None, + Field( + description="Download backend to use. Defaults to 'http_public'.", + title="Backend", + ), + ] = "http_public" + name: Annotated[ + str | None, + Field( + description="Optional name for the download. Path relative to data directory.", + title="Name", + ), + ] = None + + +class GRPCOptions(BaseModel): + kind: Annotated[Literal["grpc"], Field(title="Kind")] = "grpc" + + +class HTTPOptions(BaseModel): + kind: Annotated[Literal["http"], Field(title="Kind")] = "http" + + +class HealthChecks(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + restart_check_delay_seconds: Annotated[ + int | None, + Field( + description="The delay in seconds before starting restart checks. Defaults to platform-determined value when not set.", + title="Restart Check Delay Seconds", + ), + ] = None + restart_threshold_seconds: Annotated[ + int | None, + Field( + description="The time in seconds after which an unhealthy instance is restarted. Defaults to platform-determined value when not set.", + title="Restart Threshold Seconds", + ), + ] = None + stop_traffic_threshold_seconds: Annotated[ + int | None, + Field( + description="The time in seconds after which traffic is stopped to an unhealthy instance. Defaults to platform-determined value when not set.", + title="Stop Traffic Threshold Seconds", + ), + ] = None + startup_threshold_seconds: Annotated[ + int | None, + Field( + description="The time in seconds to wait for a model to start before marking it as unhealthy. Defaults to platform-determined value when not set.", + title="Startup Threshold Seconds", + ), + ] = None + + +class VolumeFolder(RootModel[str]): + root: Annotated[str | None, Field(min_length=1, title="Volume Folder")] = None + + +class ModelRepoSourceKind(Enum): + hf = "hf" + gcs = "gcs" + s3 = "s3" + azure = "azure" + + +class ModelServer(Enum): + TrussServer = "TrussServer" + TRT_LLM = "TRT_LLM" + + +class RemoteSSH(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + enabled: Annotated[ + bool | None, + Field( + description="If true, enables SSH access to running model instances.", + title="Enabled", + ), + ] = False + + +class NodeCount(RootModel[int]): + root: Annotated[ + int | None, + Field( + description="Number of nodes for multi-node deployments.", + ge=1, + title="Node Count", + ), + ] = None + + +class Resources(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + cpu: Annotated[ + str | None, + Field( + description="CPU resources needed, expressed as either a raw number or millicpus. For example, 500m is half of a CPU core.", + examples=["1", "500m", "4"], + title="Cpu", + ), + ] = "1" + memory: Annotated[ + str | None, + Field( + description="CPU RAM needed, expressed as a number with units. Units include Gi (Gibibytes), G (Gigabytes), Mi (Mebibytes), and M (Megabytes).", + examples=["2Gi", "512Mi"], + title="Memory", + ), + ] = "2Gi" + accelerator: Annotated[ + AcceleratorSpec | None, + Field( + description="The GPU type for your instance. To request multiple GPUs, use the ':' operator (e.g. L4:4).", + examples=["A100", "T4:2", "H100:8"], + ), + ] = None + instance_type: Annotated[ + str | None, + Field( + description="The full SKU name for the instance type. When specified, cpu, memory, and accelerator fields are ignored.", + examples=["L4:4x16"], + title="Instance Type", + ), + ] = None + node_count: Annotated[ + NodeCount | None, + Field( + description="Number of nodes for multi-node deployments.", + title="Node Count", + ), + ] = None + + +class MaxSeqLen(RootModel[int]): + root: Annotated[int | None, Field(ge=1, le=1048576, title="Max Seq Len")] = None + + +class TRTLLMRuntimeConfigurationV2(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + max_seq_len: Annotated[MaxSeqLen | None, Field(title="Max Seq Len")] = None + max_batch_size: Annotated[ + int | None, Field(ge=1, le=2048, title="Max Batch Size") + ] = 256 + max_num_tokens: Annotated[ + int | None, Field(gt=64, le=131072, title="Max Num Tokens") + ] = 8192 + tensor_parallel_size: Annotated[ + int | None, Field(ge=1, title="Tensor Parallel Size") + ] = 1 + enable_chunked_prefill: Annotated[ + bool | None, Field(title="Enable Chunked Prefill") + ] = True + served_model_name: Annotated[str | None, Field(title="Served Model Name")] = None + patch_kwargs: Annotated[ + dict[str, str | int | float | dict[str, Any] | list[Any] | None] | None, + Field(title="Patch Kwargs"), + ] = None + + +class TrainingArtifactReference(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + training_job_id: Annotated[ + str, + Field( + description="The training job id that the artifact reference belongs to.", + title="Training Job Id", + ), + ] + paths: Annotated[ + list[str] | None, + Field( + description="The paths of the files to download which can contain * or ?.", + title="Paths", + ), + ] = None + + +class VersionsOverrides(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + engine_builder_version: Annotated[ + str | None, Field(title="Engine Builder Version") + ] = None + briton_version: Annotated[str | None, Field(title="Briton Version")] = None + bei_version: Annotated[str | None, Field(title="Bei Version")] = None + bei_bert_version: Annotated[str | None, Field(title="Bei Bert Version")] = None + v2_llm_version: Annotated[str | None, Field(title="V2 Llm Version")] = None + + +class WebsocketOptions(BaseModel): + kind: Annotated[Literal["websocket"], Field(title="Kind")] = "websocket" + ping_interval_seconds: Annotated[ + float | None, Field(title="Ping Interval Seconds") + ] = None + ping_timeout_seconds: Annotated[ + float | None, Field(title="Ping Timeout Seconds") + ] = None + + +class WeightsAuthMethod(Enum): + CUSTOM_SECRET = "CUSTOM_SECRET" + AWS_OIDC = "AWS_OIDC" + GCP_OIDC = "GCP_OIDC" + + +class ModelSpecDecMode(Enum): + DRAFT_TOKENS_EXTERNAL = "DRAFT_TOKENS_EXTERNAL" + LOOKAHEAD_DECODING = "LOOKAHEAD_DECODING" + + +class NumDraftTokens(RootModel[int]): + root: Annotated[int | None, Field(ge=1, title="Num Draft Tokens")] = None + + +class LookaheadWindowsSize(RootModel[int]): + root: Annotated[int | None, Field(ge=1, title="Lookahead Windows Size")] = None + + +class LookaheadNgramSize(RootModel[int]): + root: Annotated[int | None, Field(ge=1, title="Lookahead Ngram Size")] = None + + +class LookaheadVerificationSetSize(RootModel[int]): + root: Annotated[ + int | None, Field(ge=1, title="Lookahead Verification Set Size") + ] = None + + +class ModelTRTLLMBatchSchedulerPolicy(Enum): + max_utilization = "max_utilization" + guaranteed_no_evict = "guaranteed_no_evict" + + +class NumBuilderGpus(RootModel[int]): + root: Annotated[int | None, Field(ge=1, title="Num Builder Gpus")] = None + + +class ModelTRTLLMLoraConfiguration(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + max_lora_rank: Annotated[int | None, Field(title="Max Lora Rank")] = 64 + lora_target_modules: Annotated[ + list[str] | None, Field(title="Lora Target Modules") + ] = [] + + +class ModelTRTLLMModel(Enum): + encoder = "encoder" + encoder_bert = "encoder_bert" + decoder = "decoder" + palmyra = "palmyra" + qwen = "qwen" + llama = "llama" + mistral = "mistral" + deepseek = "deepseek" + whisper = "whisper" + + +class ModelTRTLLMPluginConfiguration(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + paged_kv_cache: Annotated[bool | None, Field(title="Paged Kv Cache")] = True + use_paged_context_fmha: Annotated[ + bool | None, Field(title="Use Paged Context Fmha") + ] = True + use_fp8_context_fmha: Annotated[ + bool | None, Field(title="Use Fp8 Context Fmha") + ] = False + + +class ModelTRTLLMQuantizationType(Enum): + no_quant = "no_quant" + weights_int8 = "weights_int8" + weights_kv_int8 = "weights_kv_int8" + weights_int4 = "weights_int4" + weights_int4_kv_int8 = "weights_int4_kv_int8" + smooth_quant = "smooth_quant" + fp8 = "fp8" + fp8_kv = "fp8_kv" + fp4 = "fp4" + fp4_kv = "fp4_kv" + fp4_mlp_only = "fp4_mlp_only" + + +class KvCacheHostMemoryBytes(RootModel[int]): + root: Annotated[int | None, Field(ge=1, title="Kv Cache Host Memory Bytes")] = None + + +class RequestDefaultMaxTokens(RootModel[int]): + root: Annotated[int | None, Field(ge=1, title="Request Default Max Tokens")] = None + + +class WebserverDefaultRoute(Enum): + field_v1_embeddings = "/v1/embeddings" + field_rerank = "/rerank" + field_predict = "/predict" + field_predict_tokens = "/predict_tokens" + + +class ModelTRTLLMRuntimeConfiguration(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + kv_cache_free_gpu_mem_fraction: Annotated[ + float | None, Field(title="Kv Cache Free Gpu Mem Fraction") + ] = 0.9 + kv_cache_host_memory_bytes: Annotated[ + KvCacheHostMemoryBytes | None, Field(title="Kv Cache Host Memory Bytes") + ] = None + enable_chunked_context: Annotated[ + bool | None, Field(title="Enable Chunked Context") + ] = True + batch_scheduler_policy: ModelTRTLLMBatchSchedulerPolicy | None = ( + ModelTRTLLMBatchSchedulerPolicy.guaranteed_no_evict + ) + request_default_max_tokens: Annotated[ + RequestDefaultMaxTokens | None, Field(title="Request Default Max Tokens") + ] = None + served_model_name: Annotated[str | None, Field(title="Served Model Name")] = None + total_token_limit: Annotated[int | None, Field(title="Total Token Limit")] = 500000 + webserver_default_route: Annotated[ + WebserverDefaultRoute | None, Field(title="Webserver Default Route") + ] = None + + +class ModelTRTQuantizationConfiguration(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + calib_size: Annotated[int | None, Field(title="Calib Size")] = 1024 + calib_dataset: Annotated[str | None, Field(title="Calib Dataset")] = ( + "abisee/cnn_dailymail" + ) + calib_max_seq_length: Annotated[int | None, Field(title="Calib Max Seq Length")] = ( + 1536 + ) + + +class AdditionalAutoscalingConfig(BaseModel): + metrics: Annotated[ + list[AutoscalingMetric], + Field(description="List of metric targets for autoscaling.", title="Metrics"), + ] + + +class BISLLM(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + config: Annotated[ + dict[str, Any] | None, + Field( + description="Configuration options for BIS LLM deployments.", title="Config" + ), + ] = None + version: Annotated[ + str | None, + Field( + description="The version of the BIS LLM deployment stack.", title="Version" + ), + ] = "" + additional_autoscaling_config: Annotated[ + AdditionalAutoscalingConfig | None, + Field(description="Additional autoscaling configuration"), + ] = None + + +class Build(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + model_server: ModelServer | None = ModelServer.TrussServer + arguments: Annotated[dict[str, Any] | None, Field(title="Arguments")] = None + secret_to_path_mapping: Annotated[ + dict[str, str] | None, + Field( + description="Grants access to secrets during the build. Provide a mapping between a secret and a path on the image.", + title="Secret To Path Mapping", + ), + ] = None + no_cache: Annotated[bool | None, Field(title="No Cache")] = False + + +class CheckpointList(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + download_folder: Annotated[ + str | None, + Field( + description="The folder to download the checkpoints to.", + examples=["/tmp/training_checkpoints"], + title="Download Folder", + ), + ] = "/tmp/training_checkpoints" + artifact_references: Annotated[ + list[TrainingArtifactReference] | None, Field(title="Artifact References") + ] = None + + +class CheckpointRepository(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + source: CheckpointSource + repo: Annotated[str, Field(title="Repo")] + revision: Annotated[str | None, Field(title="Revision")] = None + runtime_secret_name: Annotated[str | None, Field(title="Runtime Secret Name")] = ( + "hf_access_token" + ) + + +class DockerAuthSettings(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + aws_oidc_role_arn: Annotated[ + str | None, + Field( + description="AWS IAM role ARN for OIDC authentication.", + title="Aws Oidc Role Arn", + ), + ] = None + aws_oidc_region: Annotated[ + str | None, + Field( + description="AWS region for OIDC authentication.", title="Aws Oidc Region" + ), + ] = None + gcp_oidc_service_account: Annotated[ + str | None, + Field( + description="GCP service account name for OIDC authentication.", + title="Gcp Oidc Service Account", + ), + ] = None + gcp_oidc_workload_id_provider: Annotated[ + str | None, + Field( + description="GCP workload identity provider for OIDC authentication.", + title="Gcp Oidc Workload Id Provider", + ), + ] = None + auth_method: DockerAuthType + registry: Annotated[str | None, Field(title="Registry")] = "" + secret_name: Annotated[str | None, Field(title="Secret Name")] = None + aws_access_key_id_secret_name: Annotated[ + str | None, Field(title="Aws Access Key Id Secret Name") + ] = "aws_access_key_id" + aws_secret_access_key_secret_name: Annotated[ + str | None, Field(title="Aws Secret Access Key Secret Name") + ] = "aws_secret_access_key" + + +class ExternalData(RootModel[list[ExternalDataItem]]): + root: Annotated[ + list[ExternalDataItem], + Field( + description="[Experimental] External data is data that is not contained in the Truss folder.\n\nTypically, this will be data stored remotely. This data is guaranteed to be made\navailable under the data directory of the truss.", + title="ExternalData", + ), + ] + + +class ModelRepo(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + repo_id: Annotated[str, Field(min_length=1, title="Repo Id")] + revision: Annotated[str | None, Field(title="Revision")] = "" + allow_patterns: Annotated[list[str] | None, Field(title="Allow Patterns")] = None + ignore_patterns: Annotated[list[str] | None, Field(title="Ignore Patterns")] = None + volume_folder: Annotated[VolumeFolder | None, Field(title="Volume Folder")] = None + use_volume: Annotated[bool, Field(title="Use Volume")] + kind: ModelRepoSourceKind | None = ModelRepoSourceKind.hf + runtime_secret_name: Annotated[str | None, Field(title="Runtime Secret Name")] = ( + "hf_access_token" + ) + + +class ModelRepoCacheInternal(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + repo_id: Annotated[str, Field(min_length=1, title="Repo Id")] + revision: Annotated[str | None, Field(title="Revision")] = "" + allow_patterns: Annotated[list[str] | None, Field(title="Allow Patterns")] = None + ignore_patterns: Annotated[list[str] | None, Field(title="Ignore Patterns")] = None + volume_folder: Annotated[VolumeFolder | None, Field(title="Volume Folder")] = None + use_volume: Annotated[bool | None, Field(title="Use Volume")] = False + kind: ModelRepoSourceKind | None = ModelRepoSourceKind.hf + runtime_secret_name: Annotated[str | None, Field(title="Runtime Secret Name")] = ( + "hf_access_token" + ) + + +class Runtime(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + predict_concurrency: Annotated[ + int | None, + Field( + description="The number of concurrent requests that can run in your model's predict method. Increase this if your model supports parallelism.", + title="Predict Concurrency", + ), + ] = 1 + streaming_read_timeout: Annotated[ + int | None, + Field( + description="The timeout in seconds for streaming read operations.", + title="Streaming Read Timeout", + ), + ] = 60 + enable_tracing_data: Annotated[ + bool | None, + Field( + description="If true, enables trace data export with built-in OTEL instrumentation. May add performance overhead.", + title="Enable Tracing Data", + ), + ] = False + enable_debug_logs: Annotated[ + bool | None, + Field( + description="If true, sets the Truss server log level to DEBUG instead of INFO.", + title="Enable Debug Logs", + ), + ] = False + transport: Annotated[ + HTTPOptions | WebsocketOptions | GRPCOptions | None, + Field( + description="The transport protocol for your model. Supports http (default), websocket, and grpc.", + discriminator="kind", + title="Transport", + ), + ] = None + is_websocket_endpoint: Annotated[ + bool | None, + Field( + description="DEPRECATED. Do not set manually. Automatically inferred from transport.kind == websocket.", + title="Is Websocket Endpoint", + ), + ] = None + health_checks: HealthChecks | None = None + remote_ssh: RemoteSSH | None = None + truss_server_version_override: Annotated[ + str | None, + Field( + description="By default, truss servers are built from the same release as the CLI used to push. This field allows specifying a pinned/specific version instead.", + title="Truss Server Version Override", + ), + ] = None + + +class WeightsAuth(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + aws_oidc_role_arn: Annotated[ + str | None, + Field( + description="AWS IAM role ARN for OIDC authentication.", + title="Aws Oidc Role Arn", + ), + ] = None + aws_oidc_region: Annotated[ + str | None, + Field( + description="AWS region for OIDC authentication.", title="Aws Oidc Region" + ), + ] = None + gcp_oidc_service_account: Annotated[ + str | None, + Field( + description="GCP service account name for OIDC authentication.", + title="Gcp Oidc Service Account", + ), + ] = None + gcp_oidc_workload_id_provider: Annotated[ + str | None, + Field( + description="GCP workload identity provider for OIDC authentication.", + title="Gcp Oidc Workload Id Provider", + ), + ] = None + auth_method: Annotated[ + WeightsAuthMethod, + Field( + description="Authentication method for downloading weights from the source." + ), + ] + auth_secret_name: Annotated[ + str | None, + Field( + description="Baseten secret name containing credentials for accessing the source.", + title="Auth Secret Name", + ), + ] = None + + +class WeightsSource(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + source: Annotated[ + str, + Field( + description="URI with scheme prefix. Use hf://, s3://, gs://, azure://, r2://, or https://. For HuggingFace, use @revision suffix (e.g., hf://owner/repo@main).", + min_length=1, + title="Source", + ), + ] + mount_location: Annotated[ + str, + Field( + description="Absolute path where weights will be mounted at runtime.", + min_length=1, + title="Mount Location", + ), + ] + auth: Annotated[ + WeightsAuth | None, + Field( + description="Authentication configuration for accessing the weights source." + ), + ] = None + auth_secret_name: Annotated[ + str | None, + Field( + description="Baseten secret name containing credentials. Can also be specified in auth.auth_secret_name.", + title="Auth Secret Name", + ), + ] = None + allow_patterns: Annotated[ + list[str] | None, + Field( + description="File patterns to include (e.g., ['*.safetensors']).", + title="Allow Patterns", + ), + ] = None + ignore_patterns: Annotated[ + list[str] | None, + Field( + description="File patterns to exclude (e.g., ['*.md']).", + title="Ignore Patterns", + ), + ] = None + + +class BaseImage(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + image: Annotated[ + str | None, + Field( + description="The path to the Docker image.", + examples=["vllm/vllm-openai:v0.7.3", "nvcr.io/nvidia/nemo:23.03"], + title="Image", + ), + ] = "" + python_executable_path: Annotated[ + str | None, + Field( + description="A path to the Python executable on the image.", + examples=["/usr/bin/python"], + title="Python Executable Path", + ), + ] = "" + docker_auth: Annotated[ + DockerAuthSettings | None, + Field( + description="Authentication configuration for a private Docker registry." + ), + ] = None + + +class CacheInternal(RootModel[list[ModelRepoCacheInternal]]): + root: Annotated[list[ModelRepoCacheInternal], Field(title="CacheInternal")] + + +class ModelCache(RootModel[list[ModelRepo]]): + root: Annotated[list[ModelRepo], Field(title="ModelCache")] + + +class Weights(RootModel[list[WeightsSource]]): + root: Annotated[ + list[WeightsSource], + Field( + description="List of weights sources for the new weights API.", + title="Weights", + ), + ] + + +class ModelConfig(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + model_name: Annotated[ + str | None, + Field( + description="The name of your model. This is displayed in the model details page in the Baseten UI.", + title="Model Name", + ), + ] = None + model_metadata: Annotated[ + ModelMetadata | None, + Field( + description="A flexible field for additional metadata. The entire config file is available to your model at runtime.", + title="Model Metadata", + ), + ] = None + description: Annotated[ + str | None, + Field(description="A description of your model.", title="Description"), + ] = None + examples_filename: Annotated[ + str | None, + Field( + description="Path to a file containing example model inputs.", + title="Examples Filename", + ), + ] = "examples.yaml" + data_dir: Annotated[ + str | None, + Field(description="The folder for data files in your Truss.", title="Data Dir"), + ] = "data" + external_data: Annotated[ + ExternalData | None, + Field( + description="External data to be downloaded and made available under the data directory at serving time." + ), + ] = None + external_package_dirs: Annotated[ + list[str] | None, + Field( + description="Use external_package_dirs to access custom packages located outside your Truss. This lets multiple Trusses share the same package.", + title="External Package Dirs", + ), + ] = None + python_version: Annotated[ + str | None, + Field( + description="The Python version to use.", + examples=["py313", "py312", "py311", "py310", "py39"], + title="Python Version", + ), + ] = "py313" + base_image: Annotated[ + BaseImage | None, + Field( + description="Use a custom Docker base image instead of the default Truss image." + ), + ] = None + requirements_file: Annotated[ + str | None, + Field( + description="Path to a dependency file. Supports requirements.txt, pyproject.toml, and uv.lock. Mutually exclusive with 'requirements'.", + title="Requirements File", + ), + ] = None + requirements: Annotated[ + list[str] | None, + Field( + description="A list of Python dependencies in pip requirements file format. Mutually exclusive with 'requirements_file'.", + title="Requirements", + ), + ] = None + system_packages: Annotated[ + list[str] | None, + Field( + description="System packages that you would typically install using apt on a Debian operating system.", + examples=[["ffmpeg", "libsm6", "libxext6"]], + title="System Packages", + ), + ] = None + environment_variables: Annotated[ + dict[str, str] | None, + Field( + description="Key-value pairs exposed to the environment that the model executes in. Do not store secret values here.", + title="Environment Variables", + ), + ] = None + secrets: Annotated[ + dict[str, str | None] | None, + Field( + description="Declare secrets your model needs at runtime, such as API keys or access tokens. Use null as a placeholder; store actual values in your organization settings.", + title="Secrets", + ), + ] = None + resources: Resources | None = None + runtime: Runtime | None = None + build: Build | None = None + build_commands: Annotated[ + list[str] | None, + Field( + description="A list of shell commands to run during Docker build. These commands execute after system packages and Python requirements are installed.", + title="Build Commands", + ), + ] = None + docker_server: Annotated[ + DockerServer | None, + Field( + description="Deploy a custom Docker image that has its own HTTP server, without writing a Model class." + ), + ] = None + model_cache: Annotated[ + ModelCache | None, + Field( + description="Deprecated. Use 'weights' instead. Bundle model weights into your image at build time." + ), + ] = None + weights: Annotated[ + Weights | None, + Field( + description="Configure Baseten Delivery Network (BDN) for model weight delivery with multi-tier caching." + ), + ] = None + trt_llm: Annotated[ + TRTLLMConfiguration | None, + Field(description="TensorRT-LLM configuration for optimized LLM inference."), + ] = None + training_checkpoints: Annotated[ + CheckpointList | None, + Field(description="Configuration for deploying from training checkpoints."), + ] = None + bis_llm: Annotated[ + BISLLM | None, + Field( + description="Configuration options for BIS LLM deployments. This field may change in the future." + ), + ] = None + input_type: Annotated[str | None, Field(title="Input Type")] = "Any" + model_framework: Annotated[str | None, Field(title="Model Framework")] = "custom" + model_type: Annotated[str | None, Field(title="Model Type")] = "Model" + model_module_dir: Annotated[ + str | None, + Field( + description="The folder containing your model class.", + title="Model Module Dir", + ), + ] = "model" + model_class_filename: Annotated[str | None, Field(title="Model Class Filename")] = ( + "model.py" + ) + model_class_name: Annotated[ + str | None, + Field( + description="The name of the class that defines your Truss model. This class must implement at least a predict method.", + title="Model Class Name", + ), + ] = "Model" + bundled_packages_dir: Annotated[ + str | None, + Field( + description="The folder for custom packages in your Truss.", + title="Bundled Packages Dir", + ), + ] = "packages" + use_local_src: Annotated[bool | None, Field(title="Use Local Src")] = False + cache_internal: CacheInternal | None = None + live_reload: Annotated[ + bool | None, + Field( + description="If true, changes to your model code are automatically reloaded without restarting the server.", + title="Live Reload", + ), + ] = False + apply_library_patches: Annotated[ + bool | None, + Field( + description="Whether to apply library patches for improved compatibility.", + title="Apply Library Patches", + ), + ] = True + spec_version: Annotated[str | None, Field(title="Spec Version")] = "2.0" + + +class TRTLLMConfigurationV1(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + build: ModelTRTLLMBuildConfiguration + inference_stack: Annotated[Literal["v1"], Field(title="Inference Stack")] = "v1" + runtime: Annotated[ + ModelTRTLLMRuntimeConfiguration | None, + Field( + default_factory=lambda: ModelTRTLLMRuntimeConfiguration.model_validate( + { + "kv_cache_free_gpu_mem_fraction": 0.9, + "kv_cache_host_memory_bytes": None, + "enable_chunked_context": True, + "batch_scheduler_policy": "guaranteed_no_evict", + "request_default_max_tokens": None, + "served_model_name": None, + "total_token_limit": 500000, + "webserver_default_route": None, + } + ) + ), + ] + version_overrides: Annotated[ + VersionsOverrides | None, + Field( + default_factory=lambda: VersionsOverrides.model_validate( + { + "engine_builder_version": None, + "briton_version": None, + "bei_version": None, + "bei_bert_version": None, + "v2_llm_version": None, + } + ) + ), + ] + + +class TRTLLMConfigurationV2(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + inference_stack: Annotated[Literal["v2"], Field(title="Inference Stack")] = "v2" + build: ModelTRTLLMBuildConfiguration + runtime: TRTLLMRuntimeConfigurationV2 + version_overrides: Annotated[ + VersionsOverrides | None, + Field( + default_factory=lambda: VersionsOverrides.model_validate( + { + "engine_builder_version": None, + "briton_version": None, + "bei_version": None, + "bei_bert_version": None, + "v2_llm_version": None, + } + ) + ), + ] + + +class ModelSpeculatorConfiguration(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + speculative_decoding_mode: ModelSpecDecMode | None = ( + ModelSpecDecMode.DRAFT_TOKENS_EXTERNAL + ) + num_draft_tokens: Annotated[ + NumDraftTokens | None, Field(title="Num Draft Tokens") + ] = None + checkpoint_repository: CheckpointRepository | None = None + runtime: Annotated[ + ModelTRTLLMRuntimeConfiguration | None, + Field( + default_factory=lambda: ModelTRTLLMRuntimeConfiguration.model_validate( + { + "kv_cache_free_gpu_mem_fraction": 0.9, + "kv_cache_host_memory_bytes": None, + "enable_chunked_context": True, + "batch_scheduler_policy": "guaranteed_no_evict", + "request_default_max_tokens": None, + "served_model_name": None, + "total_token_limit": 500000, + "webserver_default_route": None, + } + ) + ), + ] + build: ModelTRTLLMBuildConfiguration | None = None + lookahead_windows_size: Annotated[ + LookaheadWindowsSize | None, Field(title="Lookahead Windows Size") + ] = None + lookahead_ngram_size: Annotated[ + LookaheadNgramSize | None, Field(title="Lookahead Ngram Size") + ] = None + lookahead_verification_set_size: Annotated[ + LookaheadVerificationSetSize | None, + Field(title="Lookahead Verification Set Size"), + ] = None + enable_b10_lookahead: Annotated[ + bool | None, Field(title="Enable B10 Lookahead") + ] = False + + +class ModelTRTLLMBuildConfiguration(BaseModel): + model_config = ConfigDict( + extra="allow", + ) + base_model: ModelTRTLLMModel | None = ModelTRTLLMModel.decoder + max_seq_len: Annotated[MaxSeqLen | None, Field(title="Max Seq Len")] = None + max_batch_size: Annotated[ + int | None, Field(ge=1, le=2048, title="Max Batch Size") + ] = 256 + max_num_tokens: Annotated[ + int | None, Field(gt=64, le=1048576, title="Max Num Tokens") + ] = 8192 + max_beam_width: Annotated[int | None, Field(ge=1, le=1, title="Max Beam Width")] = 1 + max_prompt_embedding_table_size: Annotated[ + int | None, Field(title="Max Prompt Embedding Table Size") + ] = 0 + checkpoint_repository: CheckpointRepository | None = None + gather_all_token_logits: Annotated[ + bool | None, Field(title="Gather All Token Logits") + ] = False + strongly_typed: Annotated[bool | None, Field(title="Strongly Typed")] = False + quantization_type: ModelTRTLLMQuantizationType | None = ( + ModelTRTLLMQuantizationType.no_quant + ) + quantization_config: Annotated[ + ModelTRTQuantizationConfiguration | None, + Field( + default_factory=lambda: ModelTRTQuantizationConfiguration.model_validate( + { + "calib_size": 1024, + "calib_dataset": "abisee/cnn_dailymail", + "calib_max_seq_length": 1536, + } + ) + ), + ] + tensor_parallel_count: Annotated[ + int | None, Field(ge=1, title="Tensor Parallel Count") + ] = 1 + pipeline_parallel_count: Annotated[ + int | None, Field(title="Pipeline Parallel Count") + ] = 1 + moe_expert_parallel_option: Annotated[ + int | None, Field(title="Moe Expert Parallel Option") + ] = -1 + sequence_parallel_count: Annotated[ + int | None, Field(title="Sequence Parallel Count") + ] = 1 + plugin_configuration: Annotated[ + ModelTRTLLMPluginConfiguration | None, + Field( + default_factory=lambda: ModelTRTLLMPluginConfiguration.model_validate( + { + "paged_kv_cache": True, + "use_paged_context_fmha": True, + "use_fp8_context_fmha": False, + } + ) + ), + ] + num_builder_gpus: Annotated[ + NumBuilderGpus | None, Field(title="Num Builder Gpus") + ] = None + speculator: ModelSpeculatorConfiguration | None = None + lora_adapters: Annotated[ + dict[ + Annotated[str, Field(pattern=r"^[a-zA-Z0-9_\-\.:]+$")], CheckpointRepository + ] + | None, + Field(title="Lora Adapters"), + ] = None + lora_configuration: ModelTRTLLMLoraConfiguration | None = None + skip_build_result: Annotated[bool | None, Field(title="Skip Build Result")] = False + + +class TRTLLMConfiguration(RootModel[TRTLLMConfigurationV1 | TRTLLMConfigurationV2]): + root: Annotated[ + TRTLLMConfigurationV1 | TRTLLMConfigurationV2, + Field(title="TRTLLMConfiguration"), + ] + + +ModelConfig.model_rebuild() +TRTLLMConfigurationV1.model_rebuild() +TRTLLMConfigurationV2.model_rebuild() +ModelSpeculatorConfiguration.model_rebuild() diff --git a/pyproject.toml b/pyproject.toml index 3c42e13..4603260 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,9 @@ dev = [ "poethepoet>=0.35", "pytest>=9.0.2", "pytest-asyncio>=1.3.0", + "pyyaml>=6.0", "ruff>=0.15.8", - "ty>=0.0.26", + "ty>=0.0.33", ] [project.urls] diff --git a/scripts/apigen/__main__.py b/scripts/apigen/__main__.py index 68718a8..97d98fd 100644 --- a/scripts/apigen/__main__.py +++ b/scripts/apigen/__main__.py @@ -13,10 +13,14 @@ from pathlib import Path from scripts.apigen.clientgen import generate_client -from scripts.apigen.preprocess import preprocess_spec +from scripts.apigen.postprocess import postprocess_models +from scripts.apigen.preprocess import preprocess_spec, preprocess_truss_config_schema MANAGEMENT_SPEC_URL = "https://api.baseten.co/v1/spec" INFERENCE_SPEC_URL = "https://api.baseten.co/inference-spec" +TRUSS_CONFIG_SCHEMA_URL = ( + "https://raw.githubusercontent.com/basetenlabs/truss/main/truss/config.schema.json" +) APIGEN_DIR = Path(__file__).parent SPECS_DIR = APIGEN_DIR / "specs" @@ -37,11 +41,13 @@ def main() -> None: print("Updating specs from remote URLs...") download_spec(MANAGEMENT_SPEC_URL, SPECS_DIR / "management.json") download_spec(INFERENCE_SPEC_URL, SPECS_DIR / "inference.json") + download_spec(TRUSS_CONFIG_SCHEMA_URL, SPECS_DIR / "config.schema.json") generate_api( SPECS_DIR / "management.json", CLIENT_DIR / "managementapi", "Management" ) generate_api(SPECS_DIR / "inference.json", CLIENT_DIR / "inferenceapi", "Inference") + generate_modelconfig(SPECS_DIR / "config.schema.json", CLIENT_DIR / "modelconfig") def download_spec(url: str, dest: Path) -> None: @@ -72,6 +78,48 @@ def generate_api(spec_file: Path, out_dir: Path, display_name: str) -> None: run_ruff(f) +def generate_modelconfig(spec_file: Path, out_dir: Path) -> None: + print(f"Generating {out_dir.name} from {spec_file}") + out_dir.mkdir(parents=True, exist_ok=True) + + preprocessed = preprocess_truss_config_schema(spec_file.read_bytes()) + + models_file = out_dir / "_models.py" + run_datamodel_codegen(preprocessed, models_file, input_file_type="jsonschema") + models_file.write_text(postprocess_models(models_file.read_text())) + print(f" -> {models_file}") + + init_file = out_dir / "__init__.py" + generate_modelconfig_init(models_file, init_file) + print(f" -> {init_file}") + + for f in (models_file, init_file): + run_ruff(f) + + +def generate_modelconfig_init(models_file: Path, out_file: Path) -> None: + model_names = sorted(_public_class_names(models_file)) + model_imports = ", ".join(model_names) + all_entries = "\n".join(f' "{name}",' for name in model_names) + + out_file.write_text(f'''\ +# Code generated by apigen. DO NOT EDIT. + +"""Generated models for the Baseten model config (truss config.yaml). + +Types in this module are generated from the Truss config JSON schema and are +NOT covered by any stability or compatibility guarantees. They may change +without notice between versions. +""" + +from ._models import {model_imports} + +__all__ = [ +{all_entries} +] +''') + + def generate_init( display_name: str, models_file: Path, client_file: Path, out_file: Path ) -> None: @@ -115,32 +163,31 @@ def _public_class_names(path: Path) -> list[str]: ] -def run_datamodel_codegen(spec_data: bytes, out_file: Path) -> None: - subprocess.run( - [ - sys.executable, - "-m", - "datamodel_code_generator", - "--input-file-type", - "openapi", - "--output", - str(out_file), - "--output-model-type", - "pydantic_v2.BaseModel", - "--target-python-version", - "3.10", - "--use-annotated", - "--set-default-enum-member", - "--disable-timestamp", - "--openapi-scopes", - "schemas", - "--formatters", - "ruff-format", - "ruff-check", - ], - input=spec_data, - check=True, - ) +def run_datamodel_codegen( + spec_data: bytes, out_file: Path, input_file_type: str = "openapi" +) -> None: + args = [ + sys.executable, + "-m", + "datamodel_code_generator", + "--input-file-type", + input_file_type, + "--output", + str(out_file), + "--output-model-type", + "pydantic_v2.BaseModel", + "--target-python-version", + "3.10", + "--use-annotated", + "--set-default-enum-member", + "--disable-timestamp", + "--formatters", + "ruff-format", + "ruff-check", + ] + if input_file_type == "openapi": + args += ["--openapi-scopes", "schemas"] + subprocess.run(args, input=spec_data, check=True) def run_ruff(file: Path) -> None: diff --git a/scripts/apigen/postprocess.py b/scripts/apigen/postprocess.py new file mode 100644 index 0000000..fff9f8b --- /dev/null +++ b/scripts/apigen/postprocess.py @@ -0,0 +1,65 @@ +"""Postprocesses generated Python model files.""" + +import re + +# Matches a class block of the form: +# class Name(RootModel[INNER]): +# root: Annotated[T, Field(...)] = None +# or: +# root: T = None +# where INNER does not contain "None". Captures the inner type expression +# of the `root` annotation so we can widen it to `T | None`. +# +# datamodel-code-generator (as of 0.55.0) emits `= None` defaults on +# constrained nullable RootModel scalars (e.g. an integer schema with +# `anyOf: [{type: integer, ge: 1}, {type: null}]`). The annotation is +# non-nullable, so the default does not match the type. The schema's +# intent is nullable, so we widen the annotation to `T | None`. Tracked at +# https://github.com/koxudaxi/datamodel-code-generator/issues/2027 (closed +# but the issue persists for this shape in 0.55.0). +_ROOT_MODEL_BLOCK = re.compile( + r"(class \w+\(RootModel\[(?P[^\]]+)\]\):\n" + r" root: )(?P.+?)(?P = None\n)", + re.DOTALL, +) + +# Matches `dict[constr(pattern=...), X]` and converts to +# `dict[Annotated[str, Field(pattern=...)], X]`. ty (and other strict +# checkers) reject function calls in type expressions. Tracked at +# https://github.com/koxudaxi/datamodel-code-generator/issues/1973 (closed +# but the issue persists for this shape in 0.55.0). +_DICT_CONSTR = re.compile( + r"dict\[constr\(pattern=(?Pr?\"[^\"]+\")\), (?P[\w.]+)\]" +) + + +def postprocess_models(src: str) -> str: + src = _ROOT_MODEL_BLOCK.sub(_widen_root_annotation, src) + src = _DICT_CONSTR.sub( + r"dict[Annotated[str, Field(pattern=\g)], \g]", src + ) + if "constr(" not in src: + src = src.replace(", RootModel, constr", ", RootModel") + src = src.replace(", constr,", ",") + src = src.replace(", constr\n", "\n") + return src + + +def _widen_root_annotation(m: re.Match) -> str: + wrapped = m.group("wrapped").strip() + if "None" in wrapped: + return m.group(0) + rest = m.group("rest") + # Two forms: + # Annotated[T, Field(...)] + # T (bare type, possibly multiline) + if rest.lstrip().startswith("Annotated["): + widened = re.sub( + r"Annotated\[\s*([^,]+?)\s*,", + lambda mm: f"Annotated[{mm.group(1).strip()} | None,", + rest, + count=1, + ) + else: + widened = rest.rstrip() + " | None" + return m.group(1) + widened + m.group("eq") diff --git a/scripts/apigen/preprocess.py b/scripts/apigen/preprocess.py index c0eb6a5..c4f0518 100644 --- a/scripts/apigen/preprocess.py +++ b/scripts/apigen/preprocess.py @@ -4,6 +4,46 @@ import re +def preprocess_truss_config_schema(data: bytes) -> bytes: + doc = json.loads(data) + + # Rename Truss-prefixed definitions and the root title to Model-prefixed. + # Field names (e.g. truss_*) are property keys, not definition names, and + # are left untouched. + defs = doc.get("$defs", {}) + renames = { + name: "Model" + name[len("Truss") :] + for name in defs + if name.startswith("Truss") + } + if renames: + _rename_defs_refs(doc, renames) + for old, new in renames.items(): + defs[new] = defs.pop(old) + title = doc.get("title") + if isinstance(title, str) and title.startswith("Truss"): + doc["title"] = "Model" + title[len("Truss") :] + + return json.dumps(doc, indent=2).encode() + + +_DEFS_REF_PATTERN = re.compile(r"#/\$defs/(\w+)") + + +def _rename_defs_refs(node: object, renames: dict[str, str]) -> None: + if isinstance(node, dict): + ref = node.get("$ref") # ty: ignore[invalid-argument-type] + if isinstance(ref, str): + m = _DEFS_REF_PATTERN.fullmatch(ref) + if m and m.group(1) in renames: + node["$ref"] = f"#/$defs/{renames[m.group(1)]}" # ty: ignore[invalid-assignment] + for child in node.values(): + _rename_defs_refs(child, renames) + elif isinstance(node, list): + for child in node: + _rename_defs_refs(child, renames) + + def preprocess_spec(data: bytes) -> bytes: doc = json.loads(data) diff --git a/scripts/apigen/specs/config.schema.json b/scripts/apigen/specs/config.schema.json new file mode 100644 index 0000000..d5e3f67 --- /dev/null +++ b/scripts/apigen/specs/config.schema.json @@ -0,0 +1,2159 @@ +{ + "$defs": { + "AcceleratorSpec": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "AdditionalAutoscalingConfig": { + "description": "Additional autoscaling configuration for in-flight token metrics.", + "properties": { + "metrics": { + "description": "List of metric targets for autoscaling.", + "items": { + "$ref": "#/$defs/AutoscalingMetric" + }, + "title": "Metrics", + "type": "array" + } + }, + "required": [ + "metrics" + ], + "title": "AdditionalAutoscalingConfig", + "type": "object" + }, + "AutoscalingMetric": { + "properties": { + "name": { + "title": "Name", + "type": "string" + }, + "target": { + "title": "Target", + "type": "number" + } + }, + "required": [ + "name", + "target" + ], + "title": "AutoscalingMetric", + "type": "object" + }, + "BISLLM": { + "additionalProperties": true, + "description": "Configuration options for BIS LLM deployments.", + "properties": { + "config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Configuration options for BIS LLM deployments.", + "title": "Config" + }, + "version": { + "default": "", + "description": "The version of the BIS LLM deployment stack.", + "title": "Version", + "type": "string" + }, + "additional_autoscaling_config": { + "anyOf": [ + { + "$ref": "#/$defs/AdditionalAutoscalingConfig" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Additional autoscaling configuration" + } + }, + "title": "BISLLM", + "type": "object" + }, + "BaseImage": { + "additionalProperties": true, + "description": "Use base_image to deploy a custom Docker image.", + "properties": { + "image": { + "default": "", + "description": "The path to the Docker image.", + "examples": [ + "vllm/vllm-openai:v0.7.3", + "nvcr.io/nvidia/nemo:23.03" + ], + "title": "Image", + "type": "string" + }, + "python_executable_path": { + "default": "", + "description": "A path to the Python executable on the image.", + "examples": [ + "/usr/bin/python" + ], + "title": "Python Executable Path", + "type": "string" + }, + "docker_auth": { + "anyOf": [ + { + "$ref": "#/$defs/DockerAuthSettings" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Authentication configuration for a private Docker registry." + } + }, + "title": "BaseImage", + "type": "object" + }, + "Build": { + "additionalProperties": true, + "description": "Build-time configuration, including secret access during Docker builds.", + "properties": { + "model_server": { + "$ref": "#/$defs/ModelServer", + "default": "TrussServer" + }, + "arguments": { + "additionalProperties": true, + "title": "Arguments", + "type": "object" + }, + "secret_to_path_mapping": { + "additionalProperties": { + "type": "string" + }, + "description": "Grants access to secrets during the build. Provide a mapping between a secret and a path on the image.", + "title": "Secret To Path Mapping", + "type": "object" + }, + "no_cache": { + "default": false, + "title": "No Cache", + "type": "boolean" + } + }, + "title": "Build", + "type": "object" + }, + "CacheInternal": { + "items": { + "$ref": "#/$defs/ModelRepoCacheInternal" + }, + "title": "CacheInternal", + "type": "array" + }, + "CheckpointList": { + "additionalProperties": true, + "properties": { + "download_folder": { + "default": "/tmp/training_checkpoints", + "description": "The folder to download the checkpoints to.", + "examples": [ + "/tmp/training_checkpoints" + ], + "title": "Download Folder", + "type": "string" + }, + "artifact_references": { + "items": { + "$ref": "#/$defs/TrainingArtifactReference" + }, + "title": "Artifact References", + "type": "array" + } + }, + "title": "CheckpointList", + "type": "object" + }, + "CheckpointRepository": { + "additionalProperties": true, + "properties": { + "source": { + "$ref": "#/$defs/CheckpointSource" + }, + "repo": { + "title": "Repo", + "type": "string" + }, + "revision": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Revision" + }, + "runtime_secret_name": { + "default": "hf_access_token", + "title": "Runtime Secret Name", + "type": "string" + } + }, + "required": [ + "source", + "repo" + ], + "title": "CheckpointRepository", + "type": "object" + }, + "CheckpointSource": { + "enum": [ + "HF", + "GCS", + "S3", + "AZURE", + "LOCAL", + "REMOTE_URL", + "BASETEN_TRAINING" + ], + "title": "CheckpointSource", + "type": "string" + }, + "DockerAuthSettings": { + "additionalProperties": true, + "description": "Provides information about how to authenticate to the docker registry containing\nthe custom base image.", + "properties": { + "aws_oidc_role_arn": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "AWS IAM role ARN for OIDC authentication.", + "title": "Aws Oidc Role Arn" + }, + "aws_oidc_region": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "AWS region for OIDC authentication.", + "title": "Aws Oidc Region" + }, + "gcp_oidc_service_account": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "GCP service account name for OIDC authentication.", + "title": "Gcp Oidc Service Account" + }, + "gcp_oidc_workload_id_provider": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "GCP workload identity provider for OIDC authentication.", + "title": "Gcp Oidc Workload Id Provider" + }, + "auth_method": { + "$ref": "#/$defs/DockerAuthType" + }, + "registry": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "", + "title": "Registry" + }, + "secret_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Secret Name" + }, + "aws_access_key_id_secret_name": { + "default": "aws_access_key_id", + "title": "Aws Access Key Id Secret Name", + "type": "string" + }, + "aws_secret_access_key_secret_name": { + "default": "aws_secret_access_key", + "title": "Aws Secret Access Key Secret Name", + "type": "string" + } + }, + "required": [ + "auth_method" + ], + "title": "DockerAuthSettings", + "type": "object" + }, + "DockerAuthType": { + "description": "This enum will express all the types of registry\nauthentication we support.", + "enum": [ + "GCP_SERVICE_ACCOUNT_JSON", + "AWS_IAM", + "AWS_OIDC", + "GCP_OIDC", + "REGISTRY_SECRET" + ], + "title": "DockerAuthType", + "type": "string" + }, + "DockerServer": { + "additionalProperties": true, + "description": "Deploy a custom Docker image that has its own HTTP server, without writing a Model class.", + "properties": { + "start_command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The command to start the server. Required when no_build is not true.", + "title": "Start Command" + }, + "server_port": { + "description": "The port where the server runs. Port 8080 is reserved by Baseten's internal reverse proxy and cannot be used.", + "title": "Server Port", + "type": "integer" + }, + "predict_endpoint": { + "description": "The endpoint for inference requests. This is mapped to Baseten's /predict route.", + "title": "Predict Endpoint", + "type": "string" + }, + "readiness_endpoint": { + "description": "The endpoint for readiness probes. Determines when the container can accept traffic.", + "title": "Readiness Endpoint", + "type": "string" + }, + "liveness_endpoint": { + "description": "The endpoint for liveness probes. Determines if the container needs to be restarted.", + "title": "Liveness Endpoint", + "type": "string" + }, + "run_as_user_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The Linux UID to run the server process as inside the container. Use this when your base image expects a specific non-root user (for example, NVIDIA NIM containers).", + "title": "Run As User Id" + }, + "no_build": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Skip the build step and deploy the base image as-is. Baseten copies the image to its container registry without running docker build or modifying the image in any way.", + "title": "No Build" + } + }, + "required": [ + "server_port", + "predict_endpoint", + "readiness_endpoint", + "liveness_endpoint" + ], + "title": "DockerServer", + "type": "object" + }, + "ExternalData": { + "description": "[Experimental] External data is data that is not contained in the Truss folder.\n\nTypically, this will be data stored remotely. This data is guaranteed to be made\navailable under the data directory of the truss.", + "items": { + "$ref": "#/$defs/ExternalDataItem" + }, + "title": "ExternalData", + "type": "array" + }, + "ExternalDataItem": { + "additionalProperties": true, + "description": "A piece of remote data, to be made available to the Truss at serving time.\n\nRemote data is downloaded and stored under Truss's data directory. Care should be taken\nto avoid conflicts. This will get precedence if there's overlap.", + "properties": { + "url": { + "description": "URL to download the data from. Currently only files are allowed.", + "minLength": 1, + "title": "Url", + "type": "string" + }, + "local_data_path": { + "description": "Path relative to the data directory where the remote file will be downloaded.", + "minLength": 1, + "title": "Local Data Path", + "type": "string" + }, + "backend": { + "default": "http_public", + "description": "Download backend to use. Defaults to 'http_public'.", + "title": "Backend", + "type": "string" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Optional name for the download. Path relative to data directory.", + "title": "Name" + } + }, + "required": [ + "url", + "local_data_path" + ], + "title": "ExternalDataItem", + "type": "object" + }, + "GRPCOptions": { + "properties": { + "kind": { + "const": "grpc", + "default": "grpc", + "title": "Kind", + "type": "string" + } + }, + "title": "GRPCOptions", + "type": "object" + }, + "HTTPOptions": { + "properties": { + "kind": { + "const": "http", + "default": "http", + "title": "Kind", + "type": "string" + } + }, + "title": "HTTPOptions", + "type": "object" + }, + "HealthChecks": { + "additionalProperties": true, + "description": "Custom health check configuration for your deployments.", + "properties": { + "restart_check_delay_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The delay in seconds before starting restart checks. Defaults to platform-determined value when not set.", + "title": "Restart Check Delay Seconds" + }, + "restart_threshold_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The time in seconds after which an unhealthy instance is restarted. Defaults to platform-determined value when not set.", + "title": "Restart Threshold Seconds" + }, + "stop_traffic_threshold_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The time in seconds after which traffic is stopped to an unhealthy instance. Defaults to platform-determined value when not set.", + "title": "Stop Traffic Threshold Seconds" + }, + "startup_threshold_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The time in seconds to wait for a model to start before marking it as unhealthy. Defaults to platform-determined value when not set.", + "title": "Startup Threshold Seconds" + } + }, + "title": "HealthChecks", + "type": "object" + }, + "ModelCache": { + "items": { + "$ref": "#/$defs/ModelRepo" + }, + "title": "ModelCache", + "type": "array" + }, + "ModelRepo": { + "additionalProperties": true, + "properties": { + "repo_id": { + "minLength": 1, + "title": "Repo Id", + "type": "string" + }, + "revision": { + "default": "", + "title": "Revision", + "type": "string" + }, + "allow_patterns": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Allow Patterns" + }, + "ignore_patterns": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ignore Patterns" + }, + "volume_folder": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Volume Folder" + }, + "use_volume": { + "title": "Use Volume", + "type": "boolean" + }, + "kind": { + "$ref": "#/$defs/ModelRepoSourceKind", + "default": "hf" + }, + "runtime_secret_name": { + "default": "hf_access_token", + "title": "Runtime Secret Name", + "type": "string" + } + }, + "required": [ + "repo_id", + "use_volume" + ], + "title": "ModelRepo", + "type": "object" + }, + "ModelRepoCacheInternal": { + "additionalProperties": true, + "properties": { + "repo_id": { + "minLength": 1, + "title": "Repo Id", + "type": "string" + }, + "revision": { + "default": "", + "title": "Revision", + "type": "string" + }, + "allow_patterns": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Allow Patterns" + }, + "ignore_patterns": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ignore Patterns" + }, + "volume_folder": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Volume Folder" + }, + "use_volume": { + "default": false, + "title": "Use Volume", + "type": "boolean" + }, + "kind": { + "$ref": "#/$defs/ModelRepoSourceKind", + "default": "hf" + }, + "runtime_secret_name": { + "default": "hf_access_token", + "title": "Runtime Secret Name", + "type": "string" + } + }, + "required": [ + "repo_id" + ], + "title": "ModelRepoCacheInternal", + "type": "object" + }, + "ModelRepoSourceKind": { + "description": "syned with `pub enum ResolutionType` in truss-transfer", + "enum": [ + "hf", + "gcs", + "s3", + "azure" + ], + "title": "ModelRepoSourceKind", + "type": "string" + }, + "ModelServer": { + "description": "To determine the image builder path for trusses built from alternative server backends.\nThis enum is also used to gate development deployments to BasetenRemote\nhttps://github.com/basetenlabs/truss/blob/7505c17a2ddd4a6fa626b9126772999dc8f3fa86/truss/remote/baseten/remote.py#L56-L57", + "enum": [ + "TrussServer", + "TRT_LLM" + ], + "title": "ModelServer", + "type": "string" + }, + "RemoteSSH": { + "additionalProperties": true, + "description": "Configuration for SSH access to running model instances.", + "properties": { + "enabled": { + "default": false, + "description": "If true, enables SSH access to running model instances.", + "title": "Enabled", + "type": "boolean" + } + }, + "title": "RemoteSSH", + "type": "object" + }, + "Resources": { + "additionalProperties": true, + "description": "Compute resources that your model needs, including CPU, memory, and GPU resources.", + "properties": { + "cpu": { + "default": "1", + "description": "CPU resources needed, expressed as either a raw number or millicpus. For example, 500m is half of a CPU core.", + "examples": [ + "1", + "500m", + "4" + ], + "title": "Cpu", + "type": "string" + }, + "memory": { + "default": "2Gi", + "description": "CPU RAM needed, expressed as a number with units. Units include Gi (Gibibytes), G (Gigabytes), Mi (Mebibytes), and M (Megabytes).", + "examples": [ + "2Gi", + "512Mi" + ], + "title": "Memory", + "type": "string" + }, + "accelerator": { + "$ref": "#/$defs/AcceleratorSpec", + "description": "The GPU type for your instance. To request multiple GPUs, use the ':' operator (e.g. L4:4).", + "examples": [ + "A100", + "T4:2", + "H100:8" + ] + }, + "instance_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The full SKU name for the instance type. When specified, cpu, memory, and accelerator fields are ignored.", + "examples": [ + "L4:4x16" + ], + "title": "Instance Type" + }, + "node_count": { + "anyOf": [ + { + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of nodes for multi-node deployments.", + "title": "Node Count" + } + }, + "title": "Resources", + "type": "object" + }, + "Runtime": { + "additionalProperties": true, + "description": "Runtime settings for your model instance.", + "properties": { + "predict_concurrency": { + "default": 1, + "description": "The number of concurrent requests that can run in your model's predict method. Increase this if your model supports parallelism.", + "title": "Predict Concurrency", + "type": "integer" + }, + "streaming_read_timeout": { + "default": 60, + "description": "The timeout in seconds for streaming read operations.", + "title": "Streaming Read Timeout", + "type": "integer" + }, + "enable_tracing_data": { + "default": false, + "description": "If true, enables trace data export with built-in OTEL instrumentation. May add performance overhead.", + "title": "Enable Tracing Data", + "type": "boolean" + }, + "enable_debug_logs": { + "default": false, + "description": "If true, sets the Truss server log level to DEBUG instead of INFO.", + "title": "Enable Debug Logs", + "type": "boolean" + }, + "transport": { + "description": "The transport protocol for your model. Supports http (default), websocket, and grpc.", + "discriminator": { + "mapping": { + "grpc": "#/$defs/GRPCOptions", + "http": "#/$defs/HTTPOptions", + "websocket": "#/$defs/WebsocketOptions" + }, + "propertyName": "kind" + }, + "oneOf": [ + { + "$ref": "#/$defs/HTTPOptions" + }, + { + "$ref": "#/$defs/WebsocketOptions" + }, + { + "$ref": "#/$defs/GRPCOptions" + } + ], + "title": "Transport" + }, + "is_websocket_endpoint": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "DEPRECATED. Do not set manually. Automatically inferred from transport.kind == websocket.", + "title": "Is Websocket Endpoint" + }, + "health_checks": { + "$ref": "#/$defs/HealthChecks" + }, + "remote_ssh": { + "$ref": "#/$defs/RemoteSSH" + }, + "truss_server_version_override": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "By default, truss servers are built from the same release as the CLI used to push. This field allows specifying a pinned/specific version instead.", + "title": "Truss Server Version Override" + } + }, + "title": "Runtime", + "type": "object" + }, + "TRTLLMConfiguration": { + "oneOf": [ + { + "$ref": "#/$defs/TRTLLMConfigurationV1" + }, + { + "$ref": "#/$defs/TRTLLMConfigurationV2" + } + ], + "title": "TRTLLMConfiguration" + }, + "TRTLLMConfigurationV1": { + "additionalProperties": true, + "properties": { + "build": { + "$ref": "#/$defs/TrussTRTLLMBuildConfiguration" + }, + "inference_stack": { + "const": "v1", + "default": "v1", + "title": "Inference Stack", + "type": "string" + }, + "runtime": { + "$ref": "#/$defs/TrussTRTLLMRuntimeConfiguration", + "default": { + "kv_cache_free_gpu_mem_fraction": 0.9, + "kv_cache_host_memory_bytes": null, + "enable_chunked_context": true, + "batch_scheduler_policy": "guaranteed_no_evict", + "request_default_max_tokens": null, + "served_model_name": null, + "total_token_limit": 500000, + "webserver_default_route": null + } + }, + "version_overrides": { + "$ref": "#/$defs/VersionsOverrides", + "default": { + "engine_builder_version": null, + "briton_version": null, + "bei_version": null, + "bei_bert_version": null, + "v2_llm_version": null + } + } + }, + "required": [ + "build" + ], + "title": "TRTLLMConfigurationV1", + "type": "object" + }, + "TRTLLMConfigurationV2": { + "additionalProperties": true, + "properties": { + "inference_stack": { + "const": "v2", + "default": "v2", + "title": "Inference Stack", + "type": "string" + }, + "build": { + "$ref": "#/$defs/TrussTRTLLMBuildConfiguration" + }, + "runtime": { + "$ref": "#/$defs/TRTLLMRuntimeConfigurationV2" + }, + "version_overrides": { + "$ref": "#/$defs/VersionsOverrides", + "default": { + "engine_builder_version": null, + "briton_version": null, + "bei_version": null, + "bei_bert_version": null, + "v2_llm_version": null + } + } + }, + "required": [ + "build", + "runtime" + ], + "title": "TRTLLMConfigurationV2", + "type": "object" + }, + "TRTLLMRuntimeConfigurationV2": { + "additionalProperties": true, + "properties": { + "max_seq_len": { + "anyOf": [ + { + "maximum": 1048576, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Seq Len" + }, + "max_batch_size": { + "default": 256, + "maximum": 2048, + "minimum": 1, + "title": "Max Batch Size", + "type": "integer" + }, + "max_num_tokens": { + "default": 8192, + "exclusiveMinimum": 64, + "maximum": 131072, + "title": "Max Num Tokens", + "type": "integer" + }, + "tensor_parallel_size": { + "default": 1, + "minimum": 1, + "title": "Tensor Parallel Size", + "type": "integer" + }, + "enable_chunked_prefill": { + "default": true, + "title": "Enable Chunked Prefill", + "type": "boolean" + }, + "served_model_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Served Model Name" + }, + "patch_kwargs": { + "additionalProperties": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "items": {}, + "type": "array" + }, + { + "type": "null" + } + ] + }, + "title": "Patch Kwargs", + "type": "object" + } + }, + "title": "TRTLLMRuntimeConfigurationV2", + "type": "object" + }, + "TrainingArtifactReference": { + "additionalProperties": true, + "properties": { + "training_job_id": { + "description": "The training job id that the artifact reference belongs to.", + "title": "Training Job Id", + "type": "string" + }, + "paths": { + "description": "The paths of the files to download which can contain * or ?.", + "items": { + "type": "string" + }, + "title": "Paths", + "type": "array" + } + }, + "required": [ + "training_job_id" + ], + "title": "TrainingArtifactReference", + "type": "object" + }, + "TrussSpecDecMode": { + "enum": [ + "DRAFT_TOKENS_EXTERNAL", + "LOOKAHEAD_DECODING" + ], + "title": "TrussSpecDecMode", + "type": "string" + }, + "TrussSpeculatorConfiguration": { + "additionalProperties": true, + "properties": { + "speculative_decoding_mode": { + "$ref": "#/$defs/TrussSpecDecMode", + "default": "DRAFT_TOKENS_EXTERNAL" + }, + "num_draft_tokens": { + "anyOf": [ + { + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Num Draft Tokens" + }, + "checkpoint_repository": { + "anyOf": [ + { + "$ref": "#/$defs/CheckpointRepository" + }, + { + "type": "null" + } + ], + "default": null + }, + "runtime": { + "$ref": "#/$defs/TrussTRTLLMRuntimeConfiguration", + "default": { + "kv_cache_free_gpu_mem_fraction": 0.9, + "kv_cache_host_memory_bytes": null, + "enable_chunked_context": true, + "batch_scheduler_policy": "guaranteed_no_evict", + "request_default_max_tokens": null, + "served_model_name": null, + "total_token_limit": 500000, + "webserver_default_route": null + } + }, + "build": { + "anyOf": [ + { + "$ref": "#/$defs/TrussTRTLLMBuildConfiguration" + }, + { + "type": "null" + } + ], + "default": null + }, + "lookahead_windows_size": { + "anyOf": [ + { + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Lookahead Windows Size" + }, + "lookahead_ngram_size": { + "anyOf": [ + { + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Lookahead Ngram Size" + }, + "lookahead_verification_set_size": { + "anyOf": [ + { + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Lookahead Verification Set Size" + }, + "enable_b10_lookahead": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": false, + "title": "Enable B10 Lookahead" + } + }, + "title": "TrussSpeculatorConfiguration", + "type": "object" + }, + "TrussTRTLLMBatchSchedulerPolicy": { + "enum": [ + "max_utilization", + "guaranteed_no_evict" + ], + "title": "TrussTRTLLMBatchSchedulerPolicy", + "type": "string" + }, + "TrussTRTLLMBuildConfiguration": { + "additionalProperties": true, + "properties": { + "base_model": { + "$ref": "#/$defs/TrussTRTLLMModel", + "default": "decoder" + }, + "max_seq_len": { + "anyOf": [ + { + "maximum": 1048576, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Seq Len" + }, + "max_batch_size": { + "default": 256, + "maximum": 2048, + "minimum": 1, + "title": "Max Batch Size", + "type": "integer" + }, + "max_num_tokens": { + "default": 8192, + "exclusiveMinimum": 64, + "maximum": 1048576, + "title": "Max Num Tokens", + "type": "integer" + }, + "max_beam_width": { + "default": 1, + "maximum": 1, + "minimum": 1, + "title": "Max Beam Width", + "type": "integer" + }, + "max_prompt_embedding_table_size": { + "default": 0, + "title": "Max Prompt Embedding Table Size", + "type": "integer" + }, + "checkpoint_repository": { + "anyOf": [ + { + "$ref": "#/$defs/CheckpointRepository" + }, + { + "type": "null" + } + ], + "default": null + }, + "gather_all_token_logits": { + "default": false, + "title": "Gather All Token Logits", + "type": "boolean" + }, + "strongly_typed": { + "default": false, + "title": "Strongly Typed", + "type": "boolean" + }, + "quantization_type": { + "$ref": "#/$defs/TrussTRTLLMQuantizationType", + "default": "no_quant" + }, + "quantization_config": { + "$ref": "#/$defs/TrussTRTQuantizationConfiguration", + "default": { + "calib_size": 1024, + "calib_dataset": "abisee/cnn_dailymail", + "calib_max_seq_length": 1536 + } + }, + "tensor_parallel_count": { + "default": 1, + "minimum": 1, + "title": "Tensor Parallel Count", + "type": "integer" + }, + "pipeline_parallel_count": { + "default": 1, + "title": "Pipeline Parallel Count", + "type": "integer" + }, + "moe_expert_parallel_option": { + "default": -1, + "title": "Moe Expert Parallel Option", + "type": "integer" + }, + "sequence_parallel_count": { + "default": 1, + "title": "Sequence Parallel Count", + "type": "integer" + }, + "plugin_configuration": { + "$ref": "#/$defs/TrussTRTLLMPluginConfiguration", + "default": { + "paged_kv_cache": true, + "use_paged_context_fmha": true, + "use_fp8_context_fmha": false + } + }, + "num_builder_gpus": { + "anyOf": [ + { + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Num Builder Gpus" + }, + "speculator": { + "anyOf": [ + { + "$ref": "#/$defs/TrussSpeculatorConfiguration" + }, + { + "type": "null" + } + ], + "default": null + }, + "lora_adapters": { + "anyOf": [ + { + "patternProperties": { + "^[a-zA-Z0-9_\\-\\.:]+$": { + "$ref": "#/$defs/CheckpointRepository" + } + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Lora Adapters" + }, + "lora_configuration": { + "anyOf": [ + { + "$ref": "#/$defs/TrussTRTLLMLoraConfiguration" + }, + { + "type": "null" + } + ], + "default": null + }, + "skip_build_result": { + "default": false, + "title": "Skip Build Result", + "type": "boolean" + } + }, + "title": "TrussTRTLLMBuildConfiguration", + "type": "object" + }, + "TrussTRTLLMLoraConfiguration": { + "additionalProperties": true, + "properties": { + "max_lora_rank": { + "default": 64, + "title": "Max Lora Rank", + "type": "integer" + }, + "lora_target_modules": { + "default": [], + "items": { + "type": "string" + }, + "title": "Lora Target Modules", + "type": "array" + } + }, + "title": "TrussTRTLLMLoraConfiguration", + "type": "object" + }, + "TrussTRTLLMModel": { + "enum": [ + "encoder", + "encoder_bert", + "decoder", + "palmyra", + "qwen", + "llama", + "mistral", + "deepseek", + "whisper" + ], + "title": "TrussTRTLLMModel", + "type": "string" + }, + "TrussTRTLLMPluginConfiguration": { + "additionalProperties": true, + "properties": { + "paged_kv_cache": { + "default": true, + "title": "Paged Kv Cache", + "type": "boolean" + }, + "use_paged_context_fmha": { + "default": true, + "title": "Use Paged Context Fmha", + "type": "boolean" + }, + "use_fp8_context_fmha": { + "default": false, + "title": "Use Fp8 Context Fmha", + "type": "boolean" + } + }, + "title": "TrussTRTLLMPluginConfiguration", + "type": "object" + }, + "TrussTRTLLMQuantizationType": { + "enum": [ + "no_quant", + "weights_int8", + "weights_kv_int8", + "weights_int4", + "weights_int4_kv_int8", + "smooth_quant", + "fp8", + "fp8_kv", + "fp4", + "fp4_kv", + "fp4_mlp_only" + ], + "title": "TrussTRTLLMQuantizationType", + "type": "string" + }, + "TrussTRTLLMRuntimeConfiguration": { + "additionalProperties": true, + "properties": { + "kv_cache_free_gpu_mem_fraction": { + "default": 0.9, + "title": "Kv Cache Free Gpu Mem Fraction", + "type": "number" + }, + "kv_cache_host_memory_bytes": { + "anyOf": [ + { + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Kv Cache Host Memory Bytes" + }, + "enable_chunked_context": { + "default": true, + "title": "Enable Chunked Context", + "type": "boolean" + }, + "batch_scheduler_policy": { + "$ref": "#/$defs/TrussTRTLLMBatchSchedulerPolicy", + "default": "guaranteed_no_evict" + }, + "request_default_max_tokens": { + "anyOf": [ + { + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Request Default Max Tokens" + }, + "served_model_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Served Model Name" + }, + "total_token_limit": { + "default": 500000, + "title": "Total Token Limit", + "type": "integer" + }, + "webserver_default_route": { + "anyOf": [ + { + "enum": [ + "/v1/embeddings", + "/rerank", + "/predict", + "/predict_tokens" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Webserver Default Route" + } + }, + "title": "TrussTRTLLMRuntimeConfiguration", + "type": "object" + }, + "TrussTRTQuantizationConfiguration": { + "additionalProperties": true, + "description": "Configuration for quantization of TRT models\n\nArgs:\n calib_size (int, optional): Size of calibration dataset. Defaults to 1024.\n recommended to increase for production runs (e.g. 1536), or decrease e.g. to 256 for quick testing.\n calib_dataset (str, optional): Hugginface dataset to use for calibration. Defaults to 'cnn_dailymail'.\n uses split='train' and quantized based on 'text' column.\n calib_max_seq_length (int, optional): Maximum sequence length for calibration. Defaults to 2048.", + "properties": { + "calib_size": { + "default": 1024, + "title": "Calib Size", + "type": "integer" + }, + "calib_dataset": { + "default": "abisee/cnn_dailymail", + "title": "Calib Dataset", + "type": "string" + }, + "calib_max_seq_length": { + "default": 1536, + "title": "Calib Max Seq Length", + "type": "integer" + } + }, + "title": "TrussTRTQuantizationConfiguration", + "type": "object" + }, + "VersionsOverrides": { + "additionalProperties": true, + "properties": { + "engine_builder_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Engine Builder Version" + }, + "briton_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Briton Version" + }, + "bei_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Bei Version" + }, + "bei_bert_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Bei Bert Version" + }, + "v2_llm_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "V2 Llm Version" + } + }, + "title": "VersionsOverrides", + "type": "object" + }, + "WebsocketOptions": { + "properties": { + "kind": { + "const": "websocket", + "default": "websocket", + "title": "Kind", + "type": "string" + }, + "ping_interval_seconds": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ping Interval Seconds" + }, + "ping_timeout_seconds": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ping Timeout Seconds" + } + }, + "title": "WebsocketOptions", + "type": "object" + }, + "Weights": { + "description": "List of weights sources for the new weights API.", + "items": { + "$ref": "#/$defs/WeightsSource" + }, + "title": "Weights", + "type": "array" + }, + "WeightsAuth": { + "additionalProperties": true, + "description": "Authentication configuration for a weights source.\n\nThis can be used to specify OIDC-based authentication for cloud storage sources,\nor a Baseten secret name for access key authentication.", + "properties": { + "aws_oidc_role_arn": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "AWS IAM role ARN for OIDC authentication.", + "title": "Aws Oidc Role Arn" + }, + "aws_oidc_region": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "AWS region for OIDC authentication.", + "title": "Aws Oidc Region" + }, + "gcp_oidc_service_account": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "GCP service account name for OIDC authentication.", + "title": "Gcp Oidc Service Account" + }, + "gcp_oidc_workload_id_provider": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "GCP workload identity provider for OIDC authentication.", + "title": "Gcp Oidc Workload Id Provider" + }, + "auth_method": { + "$ref": "#/$defs/WeightsAuthMethod", + "description": "Authentication method for downloading weights from the source." + }, + "auth_secret_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Baseten secret name containing credentials for accessing the source.", + "title": "Auth Secret Name" + } + }, + "required": [ + "auth_method" + ], + "title": "WeightsAuth", + "type": "object" + }, + "WeightsAuthMethod": { + "description": "Authentication methods for weights sources.", + "enum": [ + "CUSTOM_SECRET", + "AWS_OIDC", + "GCP_OIDC" + ], + "title": "WeightsAuthMethod", + "type": "string" + }, + "WeightsSource": { + "additionalProperties": true, + "description": "Configuration for a weights source in the new weights API.\n\nUses a URI-based `source` field with a required scheme prefix:\n- hf:// -> HuggingFace (e.g., \"hf://meta-llama/Llama-2-7b\" or \"hf://meta-llama/Llama-2-7b@main\")\n- s3:// -> AWS S3 (e.g., \"s3://bucket/path\")\n- gs:// -> Google Cloud Storage (e.g., \"gs://bucket/path\")\n- azure:// -> Azure Blob Storage (e.g., \"azure://account/container/path\")\n- r2:// -> CloudFlare R2 Storage (e.g., \"r2://account_id.bucket/path\")\n- https:// -> Direct URL download (e.g., \"https://example.com/model.bin\")\n\nFor HuggingFace sources, you can specify a revision (branch, tag, or commit SHA)\nusing the @{rev} suffix: \"hf://owner/repo@revision\"\n\nAuthentication can be specified either:\n- Using the `auth` section (required for OIDC):\n auth:\n auth_method: AWS_OIDC\n aws_oidc_role_arn: \n aws_oidc_region: \n- Using `auth_secret_name` at the top level (or in the `auth` section)", + "properties": { + "source": { + "description": "URI with scheme prefix. Use hf://, s3://, gs://, azure://, r2://, or https://. For HuggingFace, use @revision suffix (e.g., hf://owner/repo@main).", + "minLength": 1, + "title": "Source", + "type": "string" + }, + "mount_location": { + "description": "Absolute path where weights will be mounted at runtime.", + "minLength": 1, + "title": "Mount Location", + "type": "string" + }, + "auth": { + "anyOf": [ + { + "$ref": "#/$defs/WeightsAuth" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Authentication configuration for accessing the weights source." + }, + "auth_secret_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Baseten secret name containing credentials. Can also be specified in auth.auth_secret_name.", + "title": "Auth Secret Name" + }, + "allow_patterns": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "File patterns to include (e.g., ['*.safetensors']).", + "title": "Allow Patterns" + }, + "ignore_patterns": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "File patterns to exclude (e.g., ['*.md']).", + "title": "Ignore Patterns" + } + }, + "required": [ + "source", + "mount_location" + ], + "title": "WeightsSource", + "type": "object" + } + }, + "additionalProperties": true, + "description": "Configuration for a Truss model deployment.", + "properties": { + "model_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The name of your model. This is displayed in the model details page in the Baseten UI.", + "title": "Model Name" + }, + "model_metadata": { + "additionalProperties": true, + "description": "A flexible field for additional metadata. The entire config file is available to your model at runtime.", + "properties": { + "example_model_input": { + "description": "Sample input that populates the Baseten playground.", + "examples": [ + { + "prompt": "What is the meaning of life?" + } + ] + } + }, + "title": "Model Metadata", + "type": "object" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of your model.", + "title": "Description" + }, + "examples_filename": { + "default": "examples.yaml", + "description": "Path to a file containing example model inputs.", + "title": "Examples Filename", + "type": "string" + }, + "data_dir": { + "default": "data", + "description": "The folder for data files in your Truss.", + "title": "Data Dir", + "type": "string" + }, + "external_data": { + "anyOf": [ + { + "$ref": "#/$defs/ExternalData" + }, + { + "type": "null" + } + ], + "default": null, + "description": "External data to be downloaded and made available under the data directory at serving time." + }, + "external_package_dirs": { + "description": "Use external_package_dirs to access custom packages located outside your Truss. This lets multiple Trusses share the same package.", + "items": { + "type": "string" + }, + "title": "External Package Dirs", + "type": "array" + }, + "python_version": { + "default": "py313", + "description": "The Python version to use.", + "examples": [ + "py313", + "py312", + "py311", + "py310", + "py39" + ], + "title": "Python Version", + "type": "string" + }, + "base_image": { + "anyOf": [ + { + "$ref": "#/$defs/BaseImage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Use a custom Docker base image instead of the default Truss image." + }, + "requirements_file": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Path to a dependency file. Supports requirements.txt, pyproject.toml, and uv.lock. Mutually exclusive with 'requirements'.", + "title": "Requirements File" + }, + "requirements": { + "description": "A list of Python dependencies in pip requirements file format. Mutually exclusive with 'requirements_file'.", + "items": { + "type": "string" + }, + "title": "Requirements", + "type": "array" + }, + "system_packages": { + "description": "System packages that you would typically install using apt on a Debian operating system.", + "examples": [ + [ + "ffmpeg", + "libsm6", + "libxext6" + ] + ], + "items": { + "type": "string" + }, + "title": "System Packages", + "type": "array" + }, + "environment_variables": { + "additionalProperties": { + "type": "string" + }, + "description": "Key-value pairs exposed to the environment that the model executes in. Do not store secret values here.", + "title": "Environment Variables", + "type": "object" + }, + "secrets": { + "additionalProperties": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "description": "Declare secrets your model needs at runtime, such as API keys or access tokens. Use null as a placeholder; store actual values in your organization settings.", + "title": "Secrets", + "type": "object" + }, + "resources": { + "$ref": "#/$defs/Resources" + }, + "runtime": { + "$ref": "#/$defs/Runtime" + }, + "build": { + "$ref": "#/$defs/Build" + }, + "build_commands": { + "description": "A list of shell commands to run during Docker build. These commands execute after system packages and Python requirements are installed.", + "items": { + "type": "string" + }, + "title": "Build Commands", + "type": "array" + }, + "docker_server": { + "anyOf": [ + { + "$ref": "#/$defs/DockerServer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Deploy a custom Docker image that has its own HTTP server, without writing a Model class." + }, + "model_cache": { + "$ref": "#/$defs/ModelCache", + "description": "Deprecated. Use 'weights' instead. Bundle model weights into your image at build time." + }, + "weights": { + "$ref": "#/$defs/Weights", + "description": "Configure Baseten Delivery Network (BDN) for model weight delivery with multi-tier caching." + }, + "trt_llm": { + "anyOf": [ + { + "$ref": "#/$defs/TRTLLMConfiguration" + }, + { + "type": "null" + } + ], + "default": null, + "description": "TensorRT-LLM configuration for optimized LLM inference." + }, + "training_checkpoints": { + "anyOf": [ + { + "$ref": "#/$defs/CheckpointList" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Configuration for deploying from training checkpoints." + }, + "bis_llm": { + "anyOf": [ + { + "$ref": "#/$defs/BISLLM" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Configuration options for BIS LLM deployments. This field may change in the future." + }, + "input_type": { + "default": "Any", + "title": "Input Type", + "type": "string" + }, + "model_framework": { + "default": "custom", + "title": "Model Framework", + "type": "string" + }, + "model_type": { + "default": "Model", + "title": "Model Type", + "type": "string" + }, + "model_module_dir": { + "default": "model", + "description": "The folder containing your model class.", + "title": "Model Module Dir", + "type": "string" + }, + "model_class_filename": { + "default": "model.py", + "title": "Model Class Filename", + "type": "string" + }, + "model_class_name": { + "default": "Model", + "description": "The name of the class that defines your Truss model. This class must implement at least a predict method.", + "title": "Model Class Name", + "type": "string" + }, + "bundled_packages_dir": { + "default": "packages", + "description": "The folder for custom packages in your Truss.", + "title": "Bundled Packages Dir", + "type": "string" + }, + "use_local_src": { + "default": false, + "title": "Use Local Src", + "type": "boolean" + }, + "cache_internal": { + "$ref": "#/$defs/CacheInternal" + }, + "live_reload": { + "default": false, + "description": "If true, changes to your model code are automatically reloaded without restarting the server.", + "title": "Live Reload", + "type": "boolean" + }, + "apply_library_patches": { + "default": true, + "description": "Whether to apply library patches for improved compatibility.", + "title": "Apply Library Patches", + "type": "boolean" + }, + "spec_version": { + "default": "2.0", + "title": "Spec Version", + "type": "string" + } + }, + "title": "TrussConfig", + "type": "object" +} diff --git a/tests/client/test_modelconfig.py b/tests/client/test_modelconfig.py new file mode 100644 index 0000000..186153e --- /dev/null +++ b/tests/client/test_modelconfig.py @@ -0,0 +1,113 @@ +from textwrap import dedent + +import yaml + +from baseten.client.modelconfig import ModelConfig + + +def test_vllm_config(): + # From truss-examples/vllm/config.yaml + config = ModelConfig.model_validate( + yaml.safe_load( + dedent(""" + model_name: "Llama 3.1 8B Instruct VLLM openai compatible" + python_version: py311 + model_metadata: + example_model_input: {"prompt": "what is the meaning of life"} + repo_id: meta-llama/Llama-3.1-8B-Instruct + openai_compatible: true + requirements: + - vllm==0.5.4 + resources: + accelerator: A100 + use_gpu: true + runtime: + predict_concurrency: 128 + secrets: + hf_access_token: null + """) + ) + ) + assert config.model_name == "Llama 3.1 8B Instruct VLLM openai compatible" + assert config.python_version == "py311" + assert config.requirements == ["vllm==0.5.4"] + assert config.resources is not None + assert config.resources.accelerator is not None + assert config.resources.accelerator.root == "A100" + assert config.resources.model_extra == {"use_gpu": True} + assert config.runtime is not None + assert config.runtime.predict_concurrency == 128 + + +def test_whisper_config(): + # From truss-examples/07-high-performance-dynamic-batching/config.yaml + config = ModelConfig.model_validate( + yaml.safe_load( + dedent(""" + base_image: + image: baseten/trtllm-server:r23.12_baseten_v0.9.0.dev2024022000 + python_executable_path: /usr/bin/python3 + model_name: TRT Whisper - Dynamic Batching + python_version: py311 + model_cache: + - repo_id: baseten/trtllm-whisper-a10g-large-v2-1 + revision: main + use_volume: true + volume_folder: trtllm-whisper-a10g-large-v2-1 + resources: + accelerator: A10G + runtime: + predict_concurrency: 256 + external_data: + - local_data_path: assets/multilingual.tiktoken + url: https://raw.githubusercontent.com/openai/whisper/main/whisper/assets/multilingual.tiktoken + """) + ) + ) + assert config.model_name == "TRT Whisper - Dynamic Batching" + assert config.base_image is not None + assert ( + config.base_image.image + == "baseten/trtllm-server:r23.12_baseten_v0.9.0.dev2024022000" + ) + assert config.model_cache is not None + assert len(config.model_cache.root) == 1 + assert ( + config.model_cache.root[0].repo_id == "baseten/trtllm-whisper-a10g-large-v2-1" + ) + assert config.model_cache.root[0].use_volume is True + assert config.external_data is not None + assert len(config.external_data.root) == 1 + assert ( + config.external_data.root[0].local_data_path == "assets/multilingual.tiktoken" + ) + + +def test_chatterbox_config(): + # From truss-examples/chatterbox-tts/config.yaml + config = ModelConfig.model_validate( + yaml.safe_load( + dedent(""" + model_name: Chatterbox TTS + base_image: + image: jojobaseten/truss-numpy-1.26.0-gpu:0.4 + python_executable_path: /usr/bin/python3 + python_version: py312 + requirements: + - chatterbox-tts + resources: + accelerator: H100 + cpu: '1' + memory: 40Gi + use_gpu: true + """) + ) + ) + assert config.model_name == "Chatterbox TTS" + assert config.python_version == "py312" + assert config.resources is not None + assert config.resources.accelerator is not None + assert config.resources.accelerator.root == "H100" + assert config.resources.cpu == "1" + assert config.resources.memory == "40Gi" + assert config.resources.model_extra == {"use_gpu": True} diff --git a/uv.lock b/uv.lock index dab5265..8755312 100644 --- a/uv.lock +++ b/uv.lock @@ -62,6 +62,7 @@ dev = [ { name = "poethepoet" }, { name = "pytest" }, { name = "pytest-asyncio" }, + { name = "pyyaml" }, { name = "ruff" }, { name = "ty" }, ] @@ -78,8 +79,9 @@ dev = [ { name = "poethepoet", specifier = ">=0.35" }, { name = "pytest", specifier = ">=9.0.2" }, { name = "pytest-asyncio", specifier = ">=1.3.0" }, + { name = "pyyaml", specifier = ">=6.0" }, { name = "ruff", specifier = ">=0.15.8" }, - { name = "ty", specifier = ">=0.0.26" }, + { name = "ty", specifier = ">=0.0.33" }, ] [[package]] @@ -806,26 +808,26 @@ wheels = [ [[package]] name = "ty" -version = "0.0.26" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/18/94/4879b81f8681117ccaf31544579304f6dc2ddcc0c67f872afb35869643a2/ty-0.0.26.tar.gz", hash = "sha256:0496b62405d62de7b954d6d677dc1cc5d3046197215d7a0a7fef37745d7b6d29", size = 5393643, upload-time = "2026-03-26T16:27:11.067Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/83/24/99fe33ecd7e16d23c53b0d4244778c6d1b6eb1663b091236dcba22882d67/ty-0.0.26-py3-none-linux_armv6l.whl", hash = "sha256:35beaa56cf59725fd59ab35d8445bbd40b97fe76db39b052b1fcb31f9bf8adf7", size = 10521856, upload-time = "2026-03-26T16:27:06.335Z" }, - { url = "https://files.pythonhosted.org/packages/55/97/1b5e939e2ff69b9bb279ab680bfa8f677d886309a1ac8d9588fd6ce58146/ty-0.0.26-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:487a0be58ab0eb02e31ba71eb6953812a0f88e50633469b0c0ce3fb795fe0fa1", size = 10320958, upload-time = "2026-03-26T16:27:13.849Z" }, - { url = "https://files.pythonhosted.org/packages/71/25/37081461e13d38a190e5646948d7bc42084f7bd1c6b44f12550be3923e7e/ty-0.0.26-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a01b7de5693379646d423b68f119719a1338a20017ba48a93eefaff1ee56f97b", size = 9799905, upload-time = "2026-03-26T16:26:55.805Z" }, - { url = "https://files.pythonhosted.org/packages/a1/1c/295d8f55a7b0e037dfc3a5ec4bdda3ab3cbca6f492f725bf269f96a4d841/ty-0.0.26-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:628c3ee869d113dd2bd249925662fd39d9d0305a6cb38f640ddaa7436b74a1ef", size = 10317507, upload-time = "2026-03-26T16:27:31.887Z" }, - { url = "https://files.pythonhosted.org/packages/1d/62/48b3875c5d2f48fe017468d4bbdde1164c76a8184374f1d5e6162cf7d9b8/ty-0.0.26-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:63d04f35f5370cbc91c0b9675dc83e0c53678125a7b629c9c95769e86f123e65", size = 10319821, upload-time = "2026-03-26T16:27:29.647Z" }, - { url = "https://files.pythonhosted.org/packages/ff/28/cfb2d495046d5bf42d532325cea7412fa1189912d549dbfae417a24fd794/ty-0.0.26-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a53c4e6f6a91927f8b90e584a4b12bcde05b0c1870ddff8d17462168ad7947a", size = 10831757, upload-time = "2026-03-26T16:27:37.441Z" }, - { url = "https://files.pythonhosted.org/packages/26/bf/dbc3e42f448a2d862651de070b4108028c543ca18cab096b38d7de449915/ty-0.0.26-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:caf2ced0e58d898d5e3ba5cb843e0ebd377c8a461464748586049afbd9321f51", size = 11369556, upload-time = "2026-03-26T16:26:58.655Z" }, - { url = "https://files.pythonhosted.org/packages/92/4c/6d2f8f34bc6d502ab778c9345a4a936a72ae113de11329c1764bb1f204f6/ty-0.0.26-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:384807bbcb7d7ce9b97ee5aaa6417a8ae03ccfb426c52b08018ca62cf60f5430", size = 11085679, upload-time = "2026-03-26T16:27:21.746Z" }, - { url = "https://files.pythonhosted.org/packages/cc/f4/f3f61c203bc980dd9bba0ba7ed3c6e81ddfd36b286330f9487c2c7d041aa/ty-0.0.26-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a2c766a94d79b4f82995d41229702caf2d76e5c440ec7e543d05c70e98bf8ab", size = 10900581, upload-time = "2026-03-26T16:27:24.39Z" }, - { url = "https://files.pythonhosted.org/packages/3d/fd/3ca1b4e4bdd129829e9ce78677e0f8e0f1038a7702dccecfa52f037c6046/ty-0.0.26-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f41ac45a0f8e3e8e181508d863a0a62156341db0f624ffd004b97ee550a9de80", size = 10294401, upload-time = "2026-03-26T16:27:03.999Z" }, - { url = "https://files.pythonhosted.org/packages/de/20/4ee3d8c3f90e008843795c765cb8bb245f188c23e5e5cc612c7697406fba/ty-0.0.26-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:73eb8327a34d529438dfe4db46796946c4e825167cbee434dc148569892e435f", size = 10351469, upload-time = "2026-03-26T16:27:19.003Z" }, - { url = "https://files.pythonhosted.org/packages/3d/b1/9fb154ade65906d4148f0b999c4a8257c2a34253cb72e15d84c1f04a064e/ty-0.0.26-py3-none-musllinux_1_2_i686.whl", hash = "sha256:4bb53a79259516535a1b55f613ba1619e9c666854946474ca8418c35a5c4fd60", size = 10529488, upload-time = "2026-03-26T16:27:01.378Z" }, - { url = "https://files.pythonhosted.org/packages/a5/70/9b02b03b1862e27b64143db65946d68b138160a5b6bfea193bee0b8bbc34/ty-0.0.26-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2f0e75edc1aeb1b4b84af516c7891f631254a4ca3dcd15e848fa1e061e1fe9da", size = 10999015, upload-time = "2026-03-26T16:27:34.636Z" }, - { url = "https://files.pythonhosted.org/packages/21/16/0a56b8667296e2989b9d48095472d98ebf57a0006c71f2a101bbc62a142d/ty-0.0.26-py3-none-win32.whl", hash = "sha256:943c998c5523ed6b519c899c0c39b26b4c751a9759e460fb964765a44cde226f", size = 9912378, upload-time = "2026-03-26T16:27:08.999Z" }, - { url = "https://files.pythonhosted.org/packages/60/c2/fef0d4bba9cd89a82d725b3b1a66efb1b36629ecf0fb1d8e916cb75b8829/ty-0.0.26-py3-none-win_amd64.whl", hash = "sha256:19c856d343efeb1ecad8ee220848f5d2c424daf7b2feda357763ad3036e2172f", size = 10863737, upload-time = "2026-03-26T16:27:27.06Z" }, - { url = "https://files.pythonhosted.org/packages/4d/05/888ebcb3c4d3b6b72d5d3241fddd299142caa3c516e6d26a9cd887dfed3b/ty-0.0.26-py3-none-win_arm64.whl", hash = "sha256:2cde58ccffa046db1223dc28f3e7d4f2c7da8267e97cc5cd186af6fe85f1758a", size = 10285408, upload-time = "2026-03-26T16:27:16.432Z" }, +version = "0.0.33" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/84/44/9478c50c266826c1bf30d1692e589755bffa8f1c0a3eb7af8a346c255991/ty-0.0.33.tar.gz", hash = "sha256:46d63bda07403322cb6c28ccfdd5536be916e13df725c29f7ccd0a21f06bd9e8", size = 5559373, upload-time = "2026-04-28T10:45:13.18Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/24/e287388c63a19191be26b32ff4dbd06029834068150ebe2532939bc4c851/ty-0.0.33-py3-none-linux_armv6l.whl", hash = "sha256:94d0a9d2234261a8911396d59e506b5923fe0971dbda43b9dcea287936887fcc", size = 11021308, upload-time = "2026-04-28T10:45:43.34Z" }, + { url = "https://files.pythonhosted.org/packages/00/ca/ba1eed819895bd239fba8ee35dfcd5fcb266c203b0914a17a59579096bb5/ty-0.0.33-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e4a2b5ba078f90de342f56b5f7979bb77c9b9b1d8625a041352ffc6ee93c4073", size = 10777272, upload-time = "2026-04-28T10:45:32.905Z" }, + { url = "https://files.pythonhosted.org/packages/25/a8/c3131d37b44b3fea1d6654a1c929a0cd0873822f77a90482b8ec28f6fbbd/ty-0.0.33-py3-none-macosx_11_0_arm64.whl", hash = "sha256:84ff5707825e9af9668d2bcf66975f93e520a63b524ab494e3a8265735be2563", size = 10201078, upload-time = "2026-04-28T10:45:23.374Z" }, + { url = "https://files.pythonhosted.org/packages/7b/db/d8e37ff0045810cc65e1ff36aa0da0a2253c05659787ac987df8a16c7897/ty-0.0.33-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e375285736f57886868e7af0b11c7b0ec5b6543fa15e7ad2a714fed9f077d4e0", size = 10732347, upload-time = "2026-04-28T10:45:21.444Z" }, + { url = "https://files.pythonhosted.org/packages/e0/1a/20e83a412506a918e4684fc67b567cf7cc13b105470b3428cb23c3d5aa13/ty-0.0.33-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5680f6350c3b4e46b8bff6d7bb132366ea239463d6cad4892725d06046e65464", size = 10808238, upload-time = "2026-04-28T10:45:38.565Z" }, + { url = "https://files.pythonhosted.org/packages/5d/4b/d0a39f4464dc6cb4cc2c159473ce216bd1846bfb684c0323a3cb36dce5c6/ty-0.0.33-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5535538bad8d0f7e62bcdff02197cdb30e41451d80b35d27e17d128f2e1dc5d", size = 11288348, upload-time = "2026-04-28T10:45:08.419Z" }, + { url = "https://files.pythonhosted.org/packages/35/7e/f1745e0f9583363d7a83d9a4990fc244f76ecc30840ddad83dc16a33c52d/ty-0.0.33-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:da196c42bbbc069e1e21e3e52107c061aa9660352dae57a41930690b56e2c02d", size = 11789907, upload-time = "2026-04-28T10:45:19.064Z" }, + { url = "https://files.pythonhosted.org/packages/a5/71/25f39f46a12d662859d45bc648555d0661044eb43db6b5648c9947487da9/ty-0.0.33-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9281672921ef6d4460e03146b5e6c18cb1a3e3a3b8a1a88f6f33226d05a469b7", size = 11500774, upload-time = "2026-04-28T10:45:48.012Z" }, + { url = "https://files.pythonhosted.org/packages/94/ec/136959ecbb7c71cb90537f5aea441c73f4ab24612868a6ecdc9d7444d32d/ty-0.0.33-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82c1b8f303f82da64e878108e764be3ecbcd7c9903ac0a7f7031614ed00b97ab", size = 11360314, upload-time = "2026-04-28T10:45:05.402Z" }, + { url = "https://files.pythonhosted.org/packages/cf/95/32809575c222f00beed498cb728e9290a0f5009f930025381bb7253b2206/ty-0.0.33-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:efe3af412c9ff67bce5fa37d0a2b0d8555c24072b145a5bac6c79637f1c83abe", size = 10707785, upload-time = "2026-04-28T10:45:10.836Z" }, + { url = "https://files.pythonhosted.org/packages/13/89/c8e9531f7aa4a093359e15fa32c8e1277fbbe90d16894d7c6032d29f4b34/ty-0.0.33-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:aeec29c91ea768601747da546c3efc20b72c2fb1bd52bcc786a5c6eeff51d27b", size = 10834987, upload-time = "2026-04-28T10:45:40.738Z" }, + { url = "https://files.pythonhosted.org/packages/31/16/9835fbcf5338af1a1917bd28fdb8a7193c210b83f243aa286fa9f79cb3ad/ty-0.0.33-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a535977c52bbb5f7e96b8b70a6ad375ad077f4a9ff2492508ea3816a2b403819", size = 10968968, upload-time = "2026-04-28T10:45:30.26Z" }, + { url = "https://files.pythonhosted.org/packages/36/69/64c76aabc1bc70c7f24b686cd93c3407f8ea430905e395f59bf9603ef571/ty-0.0.33-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1d732facf39fcb221ba279d469c5040d37883e964f123b1563888efd34818180", size = 11458077, upload-time = "2026-04-28T10:45:45.971Z" }, + { url = "https://files.pythonhosted.org/packages/91/84/fae27b0c4718776a298690d31ca4cc1995f2e3e1c63a7b59e84c41498e9a/ty-0.0.33-py3-none-win32.whl", hash = "sha256:d90960b574428dc252f85e8598ec5fcb7f619794196b2fc95a90da075ed4681c", size = 10345364, upload-time = "2026-04-28T10:45:16.836Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a0/a2938b23ae3e1a09a2d7c189e2ac5f7113676bae4e0e23948b568e18e5f8/ty-0.0.33-py3-none-win_amd64.whl", hash = "sha256:c1c3aec62c44de610c6e95f0a4e97ac3dbc07934bfdbf1fd90d758c9ff72f48e", size = 11342470, upload-time = "2026-04-28T10:45:26.455Z" }, + { url = "https://files.pythonhosted.org/packages/ab/62/7fb948aace38d2f6329261bb33c035a8484549c74f1db28649c7a4c6fed9/ty-0.0.33-py3-none-win_arm64.whl", hash = "sha256:0d44f99ba1b441e55e2aa301b2ac0a21112784931b46a5f66f4ea9efe5620d97", size = 10742673, upload-time = "2026-04-28T10:45:35.555Z" }, ] [[package]]