From 92f0c8d2767831054306902826ddec1162d5c714 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 2 Jun 2026 12:21:50 -0700 Subject: [PATCH 1/2] feat(apify): add run task, get dataset items, and get run tools --- apps/docs/content/docs/en/tools/apify.mdx | 73 +++++++++- .../integrations/data/integrations.json | 16 ++- apps/sim/blocks/blocks/apify.ts | 132 +++++++++++++----- apps/sim/tools/apify/get_dataset_items.ts | 98 +++++++++++++ apps/sim/tools/apify/get_run.ts | 92 ++++++++++++ apps/sim/tools/apify/index.ts | 4 + apps/sim/tools/apify/run_actor_async.ts | 38 ++--- apps/sim/tools/apify/run_actor_sync.ts | 7 +- apps/sim/tools/apify/run_task.ts | 123 ++++++++++++++++ apps/sim/tools/apify/types.ts | 102 ++++++++++---- apps/sim/tools/registry.ts | 11 +- 11 files changed, 600 insertions(+), 96 deletions(-) create mode 100644 apps/sim/tools/apify/get_dataset_items.ts create mode 100644 apps/sim/tools/apify/get_run.ts create mode 100644 apps/sim/tools/apify/run_task.ts diff --git a/apps/docs/content/docs/en/tools/apify.mdx b/apps/docs/content/docs/en/tools/apify.mdx index 66bfd18aa79..40c30ec70bd 100644 --- a/apps/docs/content/docs/en/tools/apify.mdx +++ b/apps/docs/content/docs/en/tools/apify.mdx @@ -30,7 +30,7 @@ These operations equip your agents to automate, scrape, and orchestrate data col ## Usage Instructions -Integrate Apify into your workflow. Run any Apify actor with custom input and retrieve results. Supports both synchronous and asynchronous execution with automatic dataset fetching. +Integrate Apify into your workflow. Run any Apify actor or saved task with custom input, fetch dataset items, and check run status. Supports both synchronous and asynchronous execution with automatic dataset fetching. @@ -87,4 +87,75 @@ Run an APIFY actor asynchronously with polling for long-running tasks | `datasetId` | string | Dataset ID containing results | | `items` | array | Dataset items \(if completed\) | +### `apify_run_task` + +Run a saved APIFY actor task synchronously and get dataset items (max 5 minutes) + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | APIFY API token from console.apify.com/account#/integrations | +| `taskId` | string | Yes | Task ID or username/task-name. Examples: "janedoe/my-task", "moJRLRc85AitArpNN" | +| `input` | string | No | JSON string that overrides the task\'s saved input. Example: \{"startUrls": \[\{"url": "https://example.com"\}\]\} | +| `itemLimit` | number | No | Max dataset items to return \(1-250000\). Example: 500 | +| `memory` | number | No | Memory in megabytes allocated for the run \(128-32768\). Example: 1024 for 1GB | +| `timeout` | number | No | Timeout in seconds for the run. Example: 300 for 5 minutes | +| `build` | string | No | Actor build to run. Examples: "latest", "beta", "1.2.3" | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `success` | boolean | Whether the task run succeeded | +| `status` | string | Run status \(SUCCEEDED, FAILED, etc.\) | +| `items` | array | Dataset items produced by the run | + +### `apify_get_dataset_items` + +Retrieve items stored in an APIFY dataset + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | APIFY API token from console.apify.com/account#/integrations | +| `datasetId` | string | Yes | Dataset ID to read items from. Example: "9RnD3Pql2vGZkc5H5" | +| `itemLimit` | number | No | Max items to return \(1-250000\). Default: all items. Example: 500 | +| `offset` | number | No | Number of items to skip at the start. Default: 0 | +| `fields` | string | No | Comma-separated list of fields to include. Example: "title,url,price" | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `success` | boolean | Whether the items were retrieved | +| `datasetId` | string | Dataset ID the items were read from | +| `items` | array | Items stored in the dataset | +| `count` | number | Number of items returned | + +### `apify_get_run` + +Get the status and details of an APIFY actor run + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | APIFY API token from console.apify.com/account#/integrations | +| `runId` | string | Yes | Actor run ID to fetch. Example: "HG7ML7M8z78YcAPEB" | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `success` | boolean | Whether the run was found | +| `runId` | string | APIFY run ID | +| `status` | string | Run status \(READY, RUNNING, SUCCEEDED, FAILED, etc.\) | +| `startedAt` | string | When the run started \(ISO timestamp\) | +| `finishedAt` | string | When the run finished \(ISO timestamp\) | +| `datasetId` | string | Default dataset ID for the run | +| `keyValueStoreId` | string | Default key-value store ID for the run | +| `stats` | json | Run statistics \(memory, CPU, duration\) | + diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json index 9aeefb235fe..f371f03ebe5 100644 --- a/apps/sim/app/(landing)/integrations/data/integrations.json +++ b/apps/sim/app/(landing)/integrations/data/integrations.json @@ -772,7 +772,7 @@ "slug": "apify", "name": "Apify", "description": "Run Apify actors and retrieve results", - "longDescription": "Integrate Apify into your workflow. Run any Apify actor with custom input and retrieve results. Supports both synchronous and asynchronous execution with automatic dataset fetching.", + "longDescription": "Integrate Apify into your workflow. Run any Apify actor or saved task with custom input, fetch dataset items, and check run status. Supports both synchronous and asynchronous execution with automatic dataset fetching.", "bgColor": "#E0E0E0", "iconName": "ApifyIcon", "docsUrl": "https://docs.sim.ai/tools/apify", @@ -784,9 +784,21 @@ { "name": "Run Actor (Async)", "description": "Run an APIFY actor asynchronously with polling for long-running tasks" + }, + { + "name": "Run Task", + "description": "Run a saved APIFY actor task synchronously and get dataset items (max 5 minutes)" + }, + { + "name": "Get Dataset Items", + "description": "Retrieve items stored in an APIFY dataset" + }, + { + "name": "Get Run", + "description": "Get the status and details of an APIFY actor run" } ], - "operationCount": 2, + "operationCount": 5, "triggers": [], "triggerCount": 0, "authType": "api-key", diff --git a/apps/sim/blocks/blocks/apify.ts b/apps/sim/blocks/blocks/apify.ts index 4db48fcc194..66da993a7a3 100644 --- a/apps/sim/blocks/blocks/apify.ts +++ b/apps/sim/blocks/blocks/apify.ts @@ -1,14 +1,18 @@ import { ApifyIcon } from '@/components/icons' import type { BlockConfig } from '@/blocks/types' -import { IntegrationType } from '@/blocks/types' +import { AuthMode, IntegrationType } from '@/blocks/types' import type { RunActorResult } from '@/tools/apify/types' +const RUN_OPERATIONS = ['apify_run_actor_sync', 'apify_run_actor_async'] +const RUN_OR_TASK_OPERATIONS = [...RUN_OPERATIONS, 'apify_run_task'] + export const ApifyBlock: BlockConfig = { type: 'apify', name: 'Apify', description: 'Run Apify actors and retrieve results', + authMode: AuthMode.ApiKey, longDescription: - 'Integrate Apify into your workflow. Run any Apify actor with custom input and retrieve results. Supports both synchronous and asynchronous execution with automatic dataset fetching.', + 'Integrate Apify into your workflow. Run any Apify actor or saved task with custom input, fetch dataset items, and check run status. Supports both synchronous and asynchronous execution with automatic dataset fetching.', docsLink: 'https://docs.sim.ai/tools/apify', category: 'tools', integrationType: IntegrationType.Search, @@ -24,6 +28,9 @@ export const ApifyBlock: BlockConfig = { options: [ { label: 'Run Actor', id: 'apify_run_actor_sync' }, { label: 'Run Actor (Async)', id: 'apify_run_actor_async' }, + { label: 'Run Task', id: 'apify_run_task' }, + { label: 'Get Dataset Items', id: 'apify_get_dataset_items' }, + { label: 'Get Run', id: 'apify_get_run' }, ], value: () => 'apify_run_actor_sync', }, @@ -40,7 +47,32 @@ export const ApifyBlock: BlockConfig = { title: 'Actor ID', type: 'short-input', placeholder: 'e.g., janedoe/my-actor or actor ID', - required: true, + condition: { field: 'operation', value: RUN_OPERATIONS }, + required: { field: 'operation', value: RUN_OPERATIONS }, + }, + { + id: 'taskId', + title: 'Task ID', + type: 'short-input', + placeholder: 'e.g., janedoe/my-task or task ID', + condition: { field: 'operation', value: 'apify_run_task' }, + required: { field: 'operation', value: 'apify_run_task' }, + }, + { + id: 'datasetId', + title: 'Dataset ID', + type: 'short-input', + placeholder: 'e.g., 9RnD3Pql2vGZkc5H5', + condition: { field: 'operation', value: 'apify_get_dataset_items' }, + required: { field: 'operation', value: 'apify_get_dataset_items' }, + }, + { + id: 'runId', + title: 'Run ID', + type: 'short-input', + placeholder: 'e.g., HG7ML7M8z78YcAPEB', + condition: { field: 'operation', value: 'apify_get_run' }, + required: { field: 'operation', value: 'apify_get_run' }, }, { id: 'input', @@ -49,6 +81,7 @@ export const ApifyBlock: BlockConfig = { language: 'json', placeholder: '{\n "startUrl": "https://example.com",\n "maxPages": 10\n}', required: false, + condition: { field: 'operation', value: RUN_OR_TASK_OPERATIONS }, wandConfig: { enabled: true, prompt: `Generate a JSON configuration object for an Apify actor based on the user's description. @@ -82,6 +115,8 @@ Return ONLY the valid JSON object - no explanations, no markdown.`, type: 'short-input', placeholder: 'Memory in MB (e.g., 1024 for 1GB, 2048 for 2GB)', required: false, + mode: 'advanced', + condition: { field: 'operation', value: RUN_OR_TASK_OPERATIONS }, }, { id: 'timeout', @@ -89,6 +124,8 @@ Return ONLY the valid JSON object - no explanations, no markdown.`, type: 'short-input', placeholder: 'Timeout in seconds (e.g., 300 for 5 min)', required: false, + mode: 'advanced', + condition: { field: 'operation', value: RUN_OR_TASK_OPERATIONS }, }, { id: 'build', @@ -96,6 +133,8 @@ Return ONLY the valid JSON object - no explanations, no markdown.`, type: 'short-input', placeholder: 'Build version (e.g., "latest", "beta", "1.2.3")', required: false, + mode: 'advanced', + condition: { field: 'operation', value: RUN_OR_TASK_OPERATIONS }, }, { id: 'waitForFinish', @@ -103,10 +142,8 @@ Return ONLY the valid JSON object - no explanations, no markdown.`, type: 'short-input', placeholder: 'Initial wait time in seconds (0-60)', required: false, - condition: { - field: 'operation', - value: 'apify_run_actor_async', - }, + mode: 'advanced', + condition: { field: 'operation', value: 'apify_run_actor_async' }, }, { id: 'itemLimit', @@ -114,47 +151,58 @@ Return ONLY the valid JSON object - no explanations, no markdown.`, type: 'short-input', placeholder: 'Max dataset items to fetch (1-250000)', required: false, + mode: 'advanced', condition: { field: 'operation', - value: 'apify_run_actor_async', + value: ['apify_run_actor_async', 'apify_run_task', 'apify_get_dataset_items'], }, }, + { + id: 'offset', + title: 'Offset', + type: 'short-input', + placeholder: 'Number of items to skip (default 0)', + required: false, + mode: 'advanced', + condition: { field: 'operation', value: 'apify_get_dataset_items' }, + }, + { + id: 'fields', + title: 'Fields', + type: 'short-input', + placeholder: 'Comma-separated fields (e.g., title,url,price)', + required: false, + mode: 'advanced', + condition: { field: 'operation', value: 'apify_get_dataset_items' }, + }, ], tools: { - access: ['apify_run_actor_sync', 'apify_run_actor_async'], + access: [ + 'apify_run_actor_sync', + 'apify_run_actor_async', + 'apify_run_task', + 'apify_get_dataset_items', + 'apify_get_run', + ], config: { tool: (params) => params.operation, params: (params: Record) => { const { operation, ...rest } = params - const result: Record = { - apiKey: rest.apiKey, - actorId: rest.actorId, - } - - if (rest.input) { - result.input = rest.input - } - - if (rest.memory) { - result.memory = Number(rest.memory) - } - - if (rest.timeout) { - result.timeout = Number(rest.timeout) - } - - if (rest.build) { - result.build = rest.build - } - - if (rest.waitForFinish) { - result.waitForFinish = Number(rest.waitForFinish) - } - - if (rest.itemLimit) { - result.itemLimit = Number(rest.itemLimit) - } + const result: Record = { apiKey: rest.apiKey } + + if (rest.actorId) result.actorId = rest.actorId + if (rest.taskId) result.taskId = rest.taskId + if (rest.datasetId) result.datasetId = rest.datasetId + if (rest.runId) result.runId = rest.runId + if (rest.input) result.input = rest.input + if (rest.build) result.build = rest.build + if (rest.fields) result.fields = rest.fields + if (rest.memory) result.memory = Number(rest.memory) + if (rest.timeout) result.timeout = Number(rest.timeout) + if (rest.waitForFinish) result.waitForFinish = Number(rest.waitForFinish) + if (rest.itemLimit) result.itemLimit = Number(rest.itemLimit) + if (rest.offset) result.offset = Number(rest.offset) return result }, @@ -165,19 +213,27 @@ Return ONLY the valid JSON object - no explanations, no markdown.`, operation: { type: 'string', description: 'Operation to perform' }, apiKey: { type: 'string', description: 'Apify API token' }, actorId: { type: 'string', description: 'Actor ID or username/actor-name' }, + taskId: { type: 'string', description: 'Task ID or username/task-name' }, + datasetId: { type: 'string', description: 'Dataset ID to read items from' }, + runId: { type: 'string', description: 'Actor run ID to fetch' }, input: { type: 'string', description: 'Actor input as JSON string' }, memory: { type: 'number', description: 'Memory in MB (128-32768)' }, timeout: { type: 'number', description: 'Timeout in seconds' }, build: { type: 'string', description: 'Actor build version' }, waitForFinish: { type: 'number', description: 'Initial wait time in seconds' }, itemLimit: { type: 'number', description: 'Max dataset items to fetch' }, + offset: { type: 'number', description: 'Number of items to skip' }, + fields: { type: 'string', description: 'Comma-separated fields to include' }, }, outputs: { - success: { type: 'boolean', description: 'Whether the actor run succeeded' }, + success: { type: 'boolean', description: 'Whether the operation succeeded' }, runId: { type: 'string', description: 'Apify run ID' }, status: { type: 'string', description: 'Run status (SUCCEEDED, FAILED, etc.)' }, datasetId: { type: 'string', description: 'Dataset ID containing results' }, items: { type: 'json', description: 'Dataset items (if completed)' }, + count: { type: 'number', description: 'Number of items returned (Get Dataset Items)' }, + startedAt: { type: 'string', description: 'When the run started (Get Run)' }, + finishedAt: { type: 'string', description: 'When the run finished (Get Run)' }, }, } diff --git a/apps/sim/tools/apify/get_dataset_items.ts b/apps/sim/tools/apify/get_dataset_items.ts new file mode 100644 index 00000000000..51a323c35e7 --- /dev/null +++ b/apps/sim/tools/apify/get_dataset_items.ts @@ -0,0 +1,98 @@ +import type { GetDatasetItemsParams, GetDatasetItemsResult } from '@/tools/apify/types' +import type { ToolConfig } from '@/tools/types' + +export const apifyGetDatasetItemsTool: ToolConfig = { + id: 'apify_get_dataset_items', + name: 'APIFY Get Dataset Items', + description: 'Retrieve items stored in an APIFY dataset', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'APIFY API token from console.apify.com/account#/integrations', + }, + datasetId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset ID to read items from. Example: "9RnD3Pql2vGZkc5H5"', + }, + itemLimit: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Max items to return (1-250000). Default: all items. Example: 500', + }, + offset: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Number of items to skip at the start. Default: 0', + }, + fields: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Comma-separated list of fields to include. Example: "title,url,price"', + }, + }, + + request: { + url: (params) => { + const encodedDatasetId = encodeURIComponent(params.datasetId.trim()) + const baseUrl = `https://api.apify.com/v2/datasets/${encodedDatasetId}/items` + const queryParams = new URLSearchParams() + + queryParams.set('clean', 'true') + if (params.itemLimit) { + const limit = Math.max(1, Math.min(params.itemLimit, 250000)) + queryParams.set('limit', limit.toString()) + } + if (params.offset) { + queryParams.set('offset', params.offset.toString()) + } + if (params.fields) { + queryParams.set('fields', params.fields) + } + + return `${baseUrl}?${queryParams.toString()}` + }, + method: 'GET', + headers: (params) => ({ + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response, params) => { + if (!response.ok) { + const errorText = await response.text() + return { + success: false, + output: { success: false, datasetId: params?.datasetId ?? '', items: [], count: 0 }, + error: `APIFY API error: ${errorText}`, + } + } + + const items = await response.json() + const list = Array.isArray(items) ? items : [] + return { + success: true, + output: { + success: true, + datasetId: params?.datasetId ?? '', + items: list, + count: list.length, + }, + } + }, + + outputs: { + success: { type: 'boolean', description: 'Whether the items were retrieved' }, + datasetId: { type: 'string', description: 'Dataset ID the items were read from' }, + items: { type: 'array', description: 'Items stored in the dataset' }, + count: { type: 'number', description: 'Number of items returned' }, + }, +} diff --git a/apps/sim/tools/apify/get_run.ts b/apps/sim/tools/apify/get_run.ts new file mode 100644 index 00000000000..f0c90105266 --- /dev/null +++ b/apps/sim/tools/apify/get_run.ts @@ -0,0 +1,92 @@ +import type { ApifyRun, GetRunParams, GetRunResult } from '@/tools/apify/types' +import type { ToolConfig } from '@/tools/types' + +export const apifyGetRunTool: ToolConfig = { + id: 'apify_get_run', + name: 'APIFY Get Run', + description: 'Get the status and details of an APIFY actor run', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'APIFY API token from console.apify.com/account#/integrations', + }, + runId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Actor run ID to fetch. Example: "HG7ML7M8z78YcAPEB"', + }, + }, + + request: { + url: (params) => + `https://api.apify.com/v2/actor-runs/${encodeURIComponent(params.runId.trim())}`, + method: 'GET', + headers: (params) => ({ + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response, params) => { + if (!response.ok) { + const errorText = await response.text() + return { + success: false, + output: { + success: false, + runId: params?.runId ?? '', + status: 'ERROR', + startedAt: null, + finishedAt: null, + datasetId: null, + keyValueStoreId: null, + stats: null, + }, + error: `APIFY API error: ${errorText}`, + } + } + + const data = await response.json() + const run = data.data as ApifyRun + return { + success: true, + output: { + success: true, + runId: run.id, + status: run.status, + startedAt: run.startedAt ?? null, + finishedAt: run.finishedAt ?? null, + datasetId: run.defaultDatasetId ?? null, + keyValueStoreId: run.defaultKeyValueStoreId ?? null, + stats: run.stats ?? null, + }, + } + }, + + outputs: { + success: { type: 'boolean', description: 'Whether the run was found' }, + runId: { type: 'string', description: 'APIFY run ID' }, + status: { type: 'string', description: 'Run status (READY, RUNNING, SUCCEEDED, FAILED, etc.)' }, + startedAt: { + type: 'string', + description: 'When the run started (ISO timestamp)', + optional: true, + }, + finishedAt: { + type: 'string', + description: 'When the run finished (ISO timestamp)', + optional: true, + }, + datasetId: { type: 'string', description: 'Default dataset ID for the run', optional: true }, + keyValueStoreId: { + type: 'string', + description: 'Default key-value store ID for the run', + optional: true, + }, + stats: { type: 'json', description: 'Run statistics (memory, CPU, duration)', optional: true }, + }, +} diff --git a/apps/sim/tools/apify/index.ts b/apps/sim/tools/apify/index.ts index a53f9d36e95..d772a7d825e 100644 --- a/apps/sim/tools/apify/index.ts +++ b/apps/sim/tools/apify/index.ts @@ -1,2 +1,6 @@ +export { apifyGetDatasetItemsTool } from './get_dataset_items' +export { apifyGetRunTool } from './get_run' export { apifyRunActorAsyncTool } from './run_actor_async' export { apifyRunActorSyncTool } from './run_actor_sync' +export { apifyRunTaskTool } from './run_task' +export * from './types' diff --git a/apps/sim/tools/apify/run_actor_async.ts b/apps/sim/tools/apify/run_actor_async.ts index 2a88539ecda..77d5f885b16 100644 --- a/apps/sim/tools/apify/run_actor_async.ts +++ b/apps/sim/tools/apify/run_actor_async.ts @@ -1,6 +1,6 @@ import { sleep } from '@sim/utils/helpers' import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/core/execution-limits' -import type { RunActorParams, RunActorResult } from '@/tools/apify/types' +import type { ApifyRun, RunActorParams, RunActorResult } from '@/tools/apify/types' import type { ToolConfig } from '@/tools/types' const POLL_INTERVAL_MS = 5000 @@ -70,12 +70,10 @@ export const apifyRunActorAsyncTool: ToolConfig request: { url: (params) => { - const encodedActorId = encodeURIComponent(params.actorId) + const encodedActorId = encodeURIComponent(params.actorId.trim()) const baseUrl = `https://api.apify.com/v2/acts/${encodedActorId}/runs` const queryParams = new URLSearchParams() - queryParams.set('token', params.apiKey) - if (params.waitForFinish !== undefined) { const waitTime = Math.max(0, Math.min(params.waitForFinish, 60)) queryParams.set('waitForFinish', waitTime.toString()) @@ -90,7 +88,8 @@ export const apifyRunActorAsyncTool: ToolConfig queryParams.set('build', params.build) } - return `${baseUrl}?${queryParams.toString()}` + const query = queryParams.toString() + return query ? `${baseUrl}?${query}` : baseUrl }, method: 'POST', headers: (params) => ({ @@ -121,9 +120,15 @@ export const apifyRunActorAsyncTool: ToolConfig } const data = await response.json() + const run = data.data as ApifyRun return { success: true, - output: data.data, + output: { + success: true, + runId: run.id, + status: run.status, + datasetId: run.defaultDatasetId, + }, } }, @@ -132,8 +137,7 @@ export const apifyRunActorAsyncTool: ToolConfig return result } - const runData = result.output as any - const runId = runData.id + const runId = result.output.runId let elapsedTime = 0 @@ -141,15 +145,11 @@ export const apifyRunActorAsyncTool: ToolConfig await sleep(POLL_INTERVAL_MS) elapsedTime += POLL_INTERVAL_MS - const encodedActorId = encodeURIComponent(params.actorId) - const statusResponse = await fetch( - `https://api.apify.com/v2/acts/${encodedActorId}/runs/${runId}?token=${params.apiKey}`, - { - headers: { - Authorization: `Bearer ${params.apiKey}`, - }, - } - ) + const statusResponse = await fetch(`https://api.apify.com/v2/actor-runs/${runId}`, { + headers: { + Authorization: `Bearer ${params.apiKey}`, + }, + }) if (!statusResponse.ok) { return { @@ -160,7 +160,7 @@ export const apifyRunActorAsyncTool: ToolConfig } const statusData = await statusResponse.json() - const run = statusData.data + const run = statusData.data as ApifyRun if ( run.status === 'SUCCEEDED' || @@ -171,7 +171,7 @@ export const apifyRunActorAsyncTool: ToolConfig if (run.status === 'SUCCEEDED') { const limit = Math.max(1, Math.min(params.itemLimit || 100, 250000)) const itemsResponse = await fetch( - `https://api.apify.com/v2/datasets/${run.defaultDatasetId}/items?token=${params.apiKey}&limit=${limit}`, + `https://api.apify.com/v2/datasets/${run.defaultDatasetId}/items?limit=${limit}`, { headers: { Authorization: `Bearer ${params.apiKey}`, diff --git a/apps/sim/tools/apify/run_actor_sync.ts b/apps/sim/tools/apify/run_actor_sync.ts index c36c7668e2f..261fd4a4527 100644 --- a/apps/sim/tools/apify/run_actor_sync.ts +++ b/apps/sim/tools/apify/run_actor_sync.ts @@ -52,12 +52,10 @@ export const apifyRunActorSyncTool: ToolConfig = request: { url: (params) => { - const encodedActorId = encodeURIComponent(params.actorId) + const encodedActorId = encodeURIComponent(params.actorId.trim()) const baseUrl = `https://api.apify.com/v2/acts/${encodedActorId}/run-sync-get-dataset-items` const queryParams = new URLSearchParams() - queryParams.set('token', params.apiKey) - if (params.memory) { queryParams.set('memory', params.memory.toString()) } @@ -68,7 +66,8 @@ export const apifyRunActorSyncTool: ToolConfig = queryParams.set('build', params.build) } - return `${baseUrl}?${queryParams.toString()}` + const query = queryParams.toString() + return query ? `${baseUrl}?${query}` : baseUrl }, method: 'POST', headers: (params) => ({ diff --git a/apps/sim/tools/apify/run_task.ts b/apps/sim/tools/apify/run_task.ts new file mode 100644 index 00000000000..3e4f5530e2e --- /dev/null +++ b/apps/sim/tools/apify/run_task.ts @@ -0,0 +1,123 @@ +import type { RunTaskParams, RunTaskResult } from '@/tools/apify/types' +import type { ToolConfig } from '@/tools/types' + +export const apifyRunTaskTool: ToolConfig = { + id: 'apify_run_task', + name: 'APIFY Run Task', + description: 'Run a saved APIFY actor task synchronously and get dataset items (max 5 minutes)', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'APIFY API token from console.apify.com/account#/integrations', + }, + taskId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: + 'Task ID or username/task-name. Examples: "janedoe/my-task", "moJRLRc85AitArpNN"', + }, + input: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: + 'JSON string that overrides the task\'s saved input. Example: {"startUrls": [{"url": "https://example.com"}]}', + }, + itemLimit: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Max dataset items to return (1-250000). Example: 500', + }, + memory: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Memory in megabytes allocated for the run (128-32768). Example: 1024 for 1GB', + }, + timeout: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Timeout in seconds for the run. Example: 300 for 5 minutes', + }, + build: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Actor build to run. Examples: "latest", "beta", "1.2.3"', + }, + }, + + request: { + url: (params) => { + const encodedTaskId = encodeURIComponent(params.taskId.trim()) + const baseUrl = `https://api.apify.com/v2/actor-tasks/${encodedTaskId}/run-sync-get-dataset-items` + const queryParams = new URLSearchParams() + + if (params.itemLimit) { + const limit = Math.max(1, Math.min(params.itemLimit, 250000)) + queryParams.set('limit', limit.toString()) + } + if (params.memory) { + queryParams.set('memory', params.memory.toString()) + } + if (params.timeout) { + queryParams.set('timeout', params.timeout.toString()) + } + if (params.build) { + queryParams.set('build', params.build) + } + + const query = queryParams.toString() + return query ? `${baseUrl}?${query}` : baseUrl + }, + method: 'POST', + headers: (params) => ({ + Authorization: `Bearer ${params.apiKey}`, + 'Content-Type': 'application/json', + }), + body: (params) => { + if (params.input) { + try { + return JSON.parse(params.input) + } catch { + throw new Error('Invalid JSON in input parameter') + } + } + return {} + }, + }, + + transformResponse: async (response) => { + if (!response.ok) { + const errorText = await response.text() + return { + success: false, + output: { success: false, status: 'ERROR', items: [] }, + error: `APIFY API error: ${errorText}`, + } + } + + const items = await response.json() + return { + success: true, + output: { + success: true, + status: 'SUCCEEDED', + items: Array.isArray(items) ? items : [], + }, + } + }, + + outputs: { + success: { type: 'boolean', description: 'Whether the task run succeeded' }, + status: { type: 'string', description: 'Run status (SUCCEEDED, FAILED, etc.)' }, + items: { type: 'array', description: 'Dataset items produced by the run' }, + }, +} diff --git a/apps/sim/tools/apify/types.ts b/apps/sim/tools/apify/types.ts index 4e6f11e4c34..06616a0a369 100644 --- a/apps/sim/tools/apify/types.ts +++ b/apps/sim/tools/apify/types.ts @@ -1,27 +1,7 @@ import type { ToolResponse } from '@/tools/types' -interface ApifyActor { - id: string - name: string - username: string - description?: string - stats?: { - lastRunStartedAt?: string - } -} - -export interface RunActorParams { - apiKey: string - actorId: string - input?: string - waitForFinish?: number // For async tool: 0-60 seconds initial wait - itemLimit?: number // For async tool: 1-250000 items, default 100 - memory?: number // Memory in MB (128-32768) - timeout?: number - build?: string // Actor build to run (e.g., "latest", "beta", build tag/number) -} - -interface ApifyRun { +/** Apify actor run object returned by the run/status endpoints. */ +export interface ApifyRun { id: string actId: string status: @@ -33,10 +13,22 @@ interface ApifyRun { | 'TIMED-OUT' | 'ABORTING' | 'TIMING-OUT' - startedAt: string + startedAt?: string finishedAt?: string - defaultDatasetId: string - defaultKeyValueStoreId: string + defaultDatasetId?: string + defaultKeyValueStoreId?: string + stats?: Record +} + +export interface RunActorParams { + apiKey: string + actorId: string + input?: string + waitForFinish?: number // For async tool: 0-60 seconds initial wait + itemLimit?: number // For async tool: 1-250000 items, default 100 + memory?: number // Memory in MB (128-32768) + timeout?: number // Timeout in seconds + build?: string // Actor build to run (e.g., "latest", "beta", build tag/number) } export interface RunActorResult extends ToolResponse { @@ -45,11 +37,59 @@ export interface RunActorResult extends ToolResponse { runId: string status: string datasetId?: string - items?: any[] - stats?: { - inputRecords?: number - outputRecords?: number - duration?: number - } + items?: unknown[] + } +} + +export interface RunTaskParams { + apiKey: string + taskId: string + input?: string + memory?: number + timeout?: number + build?: string + itemLimit?: number +} + +export interface RunTaskResult extends ToolResponse { + output: { + success: boolean + status: string + items: unknown[] + } +} + +export interface GetDatasetItemsParams { + apiKey: string + datasetId: string + itemLimit?: number + offset?: number + fields?: string +} + +export interface GetDatasetItemsResult extends ToolResponse { + output: { + success: boolean + datasetId: string + items: unknown[] + count: number + } +} + +export interface GetRunParams { + apiKey: string + runId: string +} + +export interface GetRunResult extends ToolResponse { + output: { + success: boolean + runId: string + status: string + startedAt: string | null + finishedAt: string | null + datasetId: string | null + keyValueStoreId: string | null + stats: Record | null } } diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index 596d394ddb6..ff6a5ce0b86 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -121,7 +121,13 @@ import { amplitudeUserProfileTool, amplitudeUserSearchTool, } from '@/tools/amplitude' -import { apifyRunActorAsyncTool, apifyRunActorSyncTool } from '@/tools/apify' +import { + apifyGetDatasetItemsTool, + apifyGetRunTool, + apifyRunActorAsyncTool, + apifyRunActorSyncTool, + apifyRunTaskTool, +} from '@/tools/apify' import { apolloAccountBulkCreateTool, apolloAccountBulkUpdateTool, @@ -5062,6 +5068,9 @@ export const tools: Record = { ahrefs_broken_backlinks: ahrefsBrokenBacklinksTool, apify_run_actor_sync: apifyRunActorSyncTool, apify_run_actor_async: apifyRunActorAsyncTool, + apify_run_task: apifyRunTaskTool, + apify_get_dataset_items: apifyGetDatasetItemsTool, + apify_get_run: apifyGetRunTool, apollo_people_search: apolloPeopleSearchTool, apollo_people_enrich: apolloPeopleEnrichTool, apollo_people_bulk_enrich: apolloPeopleBulkEnrichTool, From 6282a90a29a5426c07e884dfb9889a3031dd817c Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 2 Jun 2026 12:28:50 -0700 Subject: [PATCH 2/2] fix(apify): guard undefined dataset id and forward explicit offset=0 --- apps/sim/blocks/blocks/apify.ts | 3 ++- apps/sim/tools/apify/get_dataset_items.ts | 2 +- apps/sim/tools/apify/run_actor_async.ts | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/sim/blocks/blocks/apify.ts b/apps/sim/blocks/blocks/apify.ts index 66da993a7a3..844be210d1e 100644 --- a/apps/sim/blocks/blocks/apify.ts +++ b/apps/sim/blocks/blocks/apify.ts @@ -202,7 +202,8 @@ Return ONLY the valid JSON object - no explanations, no markdown.`, if (rest.timeout) result.timeout = Number(rest.timeout) if (rest.waitForFinish) result.waitForFinish = Number(rest.waitForFinish) if (rest.itemLimit) result.itemLimit = Number(rest.itemLimit) - if (rest.offset) result.offset = Number(rest.offset) + if (rest.offset !== undefined && rest.offset !== null && rest.offset !== '') + result.offset = Number(rest.offset) return result }, diff --git a/apps/sim/tools/apify/get_dataset_items.ts b/apps/sim/tools/apify/get_dataset_items.ts index 51a323c35e7..5c4ee9b3201 100644 --- a/apps/sim/tools/apify/get_dataset_items.ts +++ b/apps/sim/tools/apify/get_dataset_items.ts @@ -51,7 +51,7 @@ export const apifyGetDatasetItemsTool: ToolConfig run.status === 'ABORTED' || run.status === 'TIMED-OUT' ) { - if (run.status === 'SUCCEEDED') { + if (run.status === 'SUCCEEDED' && run.defaultDatasetId) { const limit = Math.max(1, Math.min(params.itemLimit || 100, 250000)) const itemsResponse = await fetch( `https://api.apify.com/v2/datasets/${run.defaultDatasetId}/items?limit=${limit}`,