@@ -508,6 +508,78 @@ export class WorkspaceVFS {
508508 return findWorkspaceFileRecord ( files , `files/${ canonicalMatch [ 1 ] } ` )
509509 }
510510
511+ /**
512+ * Renders a renderable doc (pptx/docx/pdf) record to a contact-sheet image and
513+ * returns it as a model-readable JPEG attachment. Shared by the `/render` and
514+ * `/compiled` reads so a binary doc is NEVER attached as a raw (non-PDF)
515+ * `document` block — the model only reads images and application/pdf. Compiles
516+ * the source first when needed (E2B doc sandbox, else isolated-vm); uses the
517+ * binary directly for already-binary uploads. Throws on compile/render failure
518+ * (the caller's try/catch reports it).
519+ */
520+ private async renderDocRecordResult (
521+ record : WorkspaceFileRecord ,
522+ ext : string ,
523+ buildMessage : ( pageCount : number ) => string
524+ ) : Promise < FileReadResult > {
525+ if ( typeof record . size === 'number' && record . size > MAX_DOC_READ_INPUT_BYTES ) {
526+ return {
527+ content : JSON . stringify ( { ok : false , error : 'File is too large to render' } ) ,
528+ totalLines : 1 ,
529+ }
530+ }
531+ const buffer = await fetchWorkspaceFileBuffer ( record )
532+ if ( buffer . length > MAX_DOC_READ_INPUT_BYTES ) {
533+ return {
534+ content : JSON . stringify ( { ok : false , error : 'File is too large to render' } ) ,
535+ totalLines : 1 ,
536+ }
537+ }
538+ // Already-binary uploads render directly; source files are compiled first
539+ // (E2B regime -> doc sandbox: Node pptx/docx, Python pdf; otherwise
540+ // isolated-vm pptxgenjs/docx-js/pdf-lib).
541+ let bin : Buffer
542+ if ( isBinaryDocBuffer ( buffer , ext ) ) {
543+ bin = buffer
544+ } else {
545+ const code = buffer . toString ( 'utf-8' )
546+ if ( Buffer . byteLength ( code , 'utf-8' ) > MAX_DOCUMENT_PREVIEW_CODE_BYTES ) {
547+ return {
548+ content : JSON . stringify ( { ok : false , error : 'File source exceeds maximum size' } ) ,
549+ totalLines : 1 ,
550+ }
551+ }
552+ if ( isE2BDocEnabled && getE2BDocFormat ( record . name ) ) {
553+ bin = (
554+ await compileDoc ( { source : code , fileName : record . name , workspaceId : this . _workspaceId } )
555+ ) . buffer
556+ } else {
557+ const taskId = BINARY_DOC_TASKS [ ext ]
558+ if ( ! taskId ) {
559+ return {
560+ content : JSON . stringify ( { ok : false , error : 'Cannot render this file' } ) ,
561+ totalLines : 1 ,
562+ }
563+ }
564+ bin = await runSandboxTask ( taskId , { code, workspaceId : this . _workspaceId } )
565+ }
566+ }
567+ const { grid, pageCount } = await renderDocToGrid ( {
568+ binary : bin ,
569+ ext,
570+ workspaceId : this . _workspaceId ,
571+ } )
572+ return {
573+ content : buildMessage ( pageCount ) ,
574+ totalLines : 1 ,
575+ attachment : {
576+ type : 'file' ,
577+ name : `${ record . name } .render.jpg` ,
578+ source : { type : 'base64' , media_type : 'image/jpeg' , data : grid . toString ( 'base64' ) } ,
579+ } ,
580+ }
581+ }
582+
511583 /**
512584 * Attempt to read dynamic workspace file content from storage.
513585 * Handles explicit /content reads for images, PDFs, documents, and text files.
@@ -529,6 +601,29 @@ export class WorkspaceVFS {
529601 const e2bFmt = isE2BDocEnabled ? getE2BDocFormat ( record . name ) : null
530602 const taskId = BINARY_DOC_TASKS [ ext ]
531603 if ( ! e2bFmt && ! taskId ) return null
604+
605+ // Only PDF can be attached as a model-readable `document` block —
606+ // Bedrock/Anthropic document blocks accept application/pdf ONLY. Attaching
607+ // a raw pptx/docx/xlsx binary is rejected by the provider (400). So for
608+ // pptx/docx, render to page images (which the model CAN read) and return
609+ // those directly — /compiled can never emit an invalid document block for
610+ // these formats. xlsx isn't renderable; direct to /extract for its content.
611+ if ( ext !== 'pdf' ) {
612+ if ( isRenderableDocExt ( ext ) ) {
613+ const compiledName = record . name
614+ return await this . renderDocRecordResult (
615+ record ,
616+ ext ,
617+ ( pageCount ) =>
618+ `${ compiledName } : the raw ${ ext . toUpperCase ( ) } binary isn't model-readable, so it was rendered to ${ pageCount } page image(s) for inspection.`
619+ )
620+ }
621+ return {
622+ content : `${ record . name } is a spreadsheet — read "files/by-id/${ record . id } /extract" for its contents.` ,
623+ totalLines : 1 ,
624+ }
625+ }
626+
532627 const buffer = await fetchWorkspaceFileBuffer ( record )
533628 const code = buffer . toString ( 'utf-8' )
534629 if ( Buffer . byteLength ( code , 'utf-8' ) > MAX_DOCUMENT_PREVIEW_CODE_BYTES ) {
@@ -537,41 +632,30 @@ export class WorkspaceVFS {
537632 totalLines : 1 ,
538633 }
539634 }
540- let compiled : Buffer
541- let contentType : string
542- if ( e2bFmt ) {
543- // E2B: load the compile-once S3 artifact (or build it on first read).
544- const out = await compileDoc ( {
545- source : code ,
546- fileName : record . name ,
547- workspaceId : this . _workspaceId ,
548- } )
549- compiled = out . buffer
550- contentType = out . contentType
551- } else {
552- compiled = await runSandboxTask ( taskId , { code, workspaceId : this . _workspaceId } )
553- contentType =
554- ext === 'pdf'
555- ? 'application/pdf'
556- : ext === 'docx'
557- ? 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
558- : 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
559- }
635+ const compiled = e2bFmt
636+ ? (
637+ await compileDoc ( {
638+ source : code ,
639+ fileName : record . name ,
640+ workspaceId : this . _workspaceId ,
641+ } )
642+ ) . buffer
643+ : await runSandboxTask ( taskId , { code, workspaceId : this . _workspaceId } )
560644 if ( compiled . length > MAX_COMPILED_ATTACHMENT_BYTES ) {
561645 return {
562646 content : `[Compiled artifact too large: ${ record . name } (${ compiled . length } bytes, limit ${ MAX_COMPILED_ATTACHMENT_BYTES } )]` ,
563647 totalLines : 1 ,
564648 }
565649 }
566650 return {
567- content : `Compiled file: ${ record . name } (${ compiled . length } bytes, ${ contentType } )` ,
651+ content : `Compiled file: ${ record . name } (${ compiled . length } bytes, application/pdf )` ,
568652 totalLines : 1 ,
569653 attachment : {
570654 type : 'file' ,
571655 name : record . name ,
572656 source : {
573657 type : 'base64' ,
574- media_type : contentType ,
658+ media_type : 'application/pdf' ,
575659 data : compiled . toString ( 'base64' ) ,
576660 } ,
577661 } ,
@@ -611,61 +695,13 @@ export class WorkspaceVFS {
611695 totalLines : 1 ,
612696 }
613697 }
614- if ( typeof record . size === 'number' && record . size > MAX_DOC_READ_INPUT_BYTES ) {
615- return {
616- content : JSON . stringify ( { ok : false , error : 'File is too large to render' } ) ,
617- totalLines : 1 ,
618- }
619- }
620- const buffer = await fetchWorkspaceFileBuffer ( record )
621- if ( buffer . length > MAX_DOC_READ_INPUT_BYTES ) {
622- return {
623- content : JSON . stringify ( { ok : false , error : 'File is too large to render' } ) ,
624- totalLines : 1 ,
625- }
626- }
627- // Already-binary uploads render directly; source files are compiled first
628- // (E2B regime -> doc sandbox: Node pptx/docx, Python pdf; otherwise
629- // isolated-vm pptxgenjs/docx-js/pdf-lib).
630- let bin : Buffer
631- if ( isBinaryDocBuffer ( buffer , ext ) ) {
632- bin = buffer
633- } else {
634- const code = buffer . toString ( 'utf-8' )
635- if ( Buffer . byteLength ( code , 'utf-8' ) > MAX_DOCUMENT_PREVIEW_CODE_BYTES ) {
636- return {
637- content : JSON . stringify ( { ok : false , error : 'File source exceeds maximum size' } ) ,
638- totalLines : 1 ,
639- }
640- }
641- if ( isE2BDocEnabled && getE2BDocFormat ( record . name ) ) {
642- bin = (
643- await compileDoc ( {
644- source : code ,
645- fileName : record . name ,
646- workspaceId : this . _workspaceId ,
647- } )
648- ) . buffer
649- } else {
650- const taskId = BINARY_DOC_TASKS [ ext ]
651- if ( ! taskId ) return null
652- bin = await runSandboxTask ( taskId , { code, workspaceId : this . _workspaceId } )
653- }
654- }
655- const { grid, pageCount } = await renderDocToGrid ( {
656- binary : bin ,
698+ const renderName = record . name
699+ return await this . renderDocRecordResult (
700+ record ,
657701 ext ,
658- workspaceId : this . _workspaceId ,
659- } )
660- return {
661- content : `Rendered ${ pageCount } page(s) of ${ record . name } as a contact-sheet grid for visual QA. Inspect each page for text overflow/cutoff, overlapping elements, low contrast, misalignment, and leftover placeholder text; fix and re-render until clean.` ,
662- totalLines : 1 ,
663- attachment : {
664- type : 'file' ,
665- name : `${ record . name } .render.jpg` ,
666- source : { type : 'base64' , media_type : 'image/jpeg' , data : grid . toString ( 'base64' ) } ,
667- } ,
668- }
702+ ( pageCount ) =>
703+ `Rendered ${ pageCount } page(s) of ${ renderName } as a contact-sheet grid for visual QA. Inspect each page for text overflow/cutoff, overlapping elements, low contrast, misalignment, and leftover placeholder text; fix and re-render until clean.`
704+ )
669705 } catch ( err ) {
670706 logger . warn ( 'Render read failed via VFS' , {
671707 workspaceId : this . _workspaceId ,
0 commit comments