@@ -3,16 +3,14 @@ import { document, knowledgeBase, workspaceFile } from '@sim/db/schema'
33import { createLogger } from '@sim/logger'
44import { and , asc , eq , isNull , like , or } from 'drizzle-orm'
55import { NextResponse } from 'next/server'
6+ import { getEnv } from '@/lib/core/config/env'
7+ import { getBaseUrl , getInternalApiBaseUrl , parseOriginList } from '@/lib/core/utils/urls'
68import { getFileMetadata } from '@/lib/uploads'
79import type { StorageContext } from '@/lib/uploads/config'
810import { BLOB_CHAT_CONFIG , S3_CHAT_CONFIG } from '@/lib/uploads/config'
911import type { StorageConfig } from '@/lib/uploads/core/storage-client'
1012import { getFileMetadataByKey } from '@/lib/uploads/server/metadata'
11- import {
12- extractStorageKey ,
13- inferContextFromKey ,
14- isInternalFileUrl ,
15- } from '@/lib/uploads/utils/file-utils'
13+ import { extractStorageKey , inferContextFromKey } from '@/lib/uploads/utils/file-utils'
1614import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
1715import { isUuid } from '@/executor/constants'
1816
@@ -374,25 +372,59 @@ async function verifyCopilotFileAccess(
374372 }
375373}
376374
375+ /**
376+ * Origins this app legitimately serves files from: the public base URL, the
377+ * internal API base URL, and any configured `TRUSTED_ORIGINS`. A document
378+ * `fileUrl` only authorizes storage access when it is relative (same-origin) or
379+ * absolute with one of these origins.
380+ */
381+ function getInternalServeOrigins ( ) : Set < string > {
382+ const origins = new Set < string > ( )
383+ for ( const resolve of [ getBaseUrl , getInternalApiBaseUrl ] ) {
384+ try {
385+ origins . add ( new URL ( resolve ( ) ) . origin )
386+ } catch {
387+ // NEXT_PUBLIC_APP_URL unset/invalid — skip; relative fileUrls still resolve.
388+ }
389+ }
390+ for ( const origin of parseOriginList ( getEnv ( 'TRUSTED_ORIGINS' ) ) ) {
391+ origins . add ( origin )
392+ }
393+ return origins
394+ }
395+
377396/**
378397 * Resolve a knowledge-base document's stored `fileUrl` to the canonical storage
379- * key it points at, but only when the URL is an internal Sim file-serve URL .
398+ * key it points at, but only for internal Sim file-serve URLs .
380399 *
381- * External `http(s)://` URLs and `data:` URIs are accepted for ingestion but
382- * intentionally return `null` here so they can never authorize access to an
383- * internal storage object. Returns the storage key (e.g. `kb/<ts>-name.txt`)
384- * only for internal knowledge-base URLs, `null` otherwise .
400+ * Relative paths are same-origin by definition; absolute URLs must match an
401+ * internal serve origin. A foreign host that embeds `/api/files/serve/` in its
402+ * path, and `data:` URIs, return `null` so an attacker-planted document can
403+ * never authorize access to another tenant's storage object .
385404 */
386- function resolveInternalKbKey ( fileUrl : string | null ) : string | null {
387- if ( ! fileUrl || ! isInternalFileUrl ( fileUrl ) ) {
405+ function resolveInternalKbKey ( fileUrl : string | null , allowedOrigins : Set < string > ) : string | null {
406+ if ( ! fileUrl ) {
388407 return null
389408 }
390- try {
391- const key = extractStorageKey ( fileUrl )
392- return key . startsWith ( 'kb/' ) ? key : null
393- } catch {
409+ let pathname : string
410+ if ( fileUrl . startsWith ( '/' ) ) {
411+ pathname = fileUrl
412+ } else {
413+ try {
414+ const parsed = new URL ( fileUrl )
415+ if ( ! allowedOrigins . has ( parsed . origin ) ) {
416+ return null
417+ }
418+ pathname = parsed . pathname
419+ } catch {
420+ return null
421+ }
422+ }
423+ if ( ! pathname . startsWith ( '/api/files/serve/' ) ) {
394424 return null
395425 }
426+ const key = extractStorageKey ( pathname )
427+ return key . startsWith ( 'kb/' ) ? key : null
396428}
397429
398430/**
@@ -437,8 +469,9 @@ async function verifyKBFileAccess(
437469
438470 // Owner is the earliest document whose fileUrl resolves to EXACTLY this key; substring
439471 // matches and cross-workspace references never establish ownership.
472+ const allowedOrigins = getInternalServeOrigins ( )
440473 const owningDocument = candidateDocuments . find (
441- ( doc ) => resolveInternalKbKey ( doc . fileUrl ) === cloudKey
474+ ( doc ) => resolveInternalKbKey ( doc . fileUrl , allowedOrigins ) === cloudKey
442475 )
443476
444477 if ( owningDocument ) {
0 commit comments