Skip to content

Commit e2c2d9a

Browse files
authored
feat(apify): add run task, get dataset items, and get run tools (#4851)
* feat(apify): add run task, get dataset items, and get run tools * fix(apify): guard undefined dataset id and forward explicit offset=0
1 parent 34ee7f9 commit e2c2d9a

11 files changed

Lines changed: 602 additions & 97 deletions

File tree

apps/docs/content/docs/en/tools/apify.mdx

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ These operations equip your agents to automate, scrape, and orchestrate data col
3030

3131
## Usage Instructions
3232

33-
Integrate Apify into your workflow. Run any Apify actor with custom input and retrieve results. Supports both synchronous and asynchronous execution with automatic dataset fetching.
33+
Integrate Apify into your workflow. Run any Apify actor or saved task with custom input, fetch dataset items, and check run status. Supports both synchronous and asynchronous execution with automatic dataset fetching.
3434

3535

3636

@@ -87,4 +87,75 @@ Run an APIFY actor asynchronously with polling for long-running tasks
8787
| `datasetId` | string | Dataset ID containing results |
8888
| `items` | array | Dataset items \(if completed\) |
8989

90+
### `apify_run_task`
91+
92+
Run a saved APIFY actor task synchronously and get dataset items (max 5 minutes)
93+
94+
#### Input
95+
96+
| Parameter | Type | Required | Description |
97+
| --------- | ---- | -------- | ----------- |
98+
| `apiKey` | string | Yes | APIFY API token from console.apify.com/account#/integrations |
99+
| `taskId` | string | Yes | Task ID or username/task-name. Examples: "janedoe/my-task", "moJRLRc85AitArpNN" |
100+
| `input` | string | No | JSON string that overrides the task\'s saved input. Example: \{"startUrls": \[\{"url": "https://example.com"\}\]\} |
101+
| `itemLimit` | number | No | Max dataset items to return \(1-250000\). Example: 500 |
102+
| `memory` | number | No | Memory in megabytes allocated for the run \(128-32768\). Example: 1024 for 1GB |
103+
| `timeout` | number | No | Timeout in seconds for the run. Example: 300 for 5 minutes |
104+
| `build` | string | No | Actor build to run. Examples: "latest", "beta", "1.2.3" |
105+
106+
#### Output
107+
108+
| Parameter | Type | Description |
109+
| --------- | ---- | ----------- |
110+
| `success` | boolean | Whether the task run succeeded |
111+
| `status` | string | Run status \(SUCCEEDED, FAILED, etc.\) |
112+
| `items` | array | Dataset items produced by the run |
113+
114+
### `apify_get_dataset_items`
115+
116+
Retrieve items stored in an APIFY dataset
117+
118+
#### Input
119+
120+
| Parameter | Type | Required | Description |
121+
| --------- | ---- | -------- | ----------- |
122+
| `apiKey` | string | Yes | APIFY API token from console.apify.com/account#/integrations |
123+
| `datasetId` | string | Yes | Dataset ID to read items from. Example: "9RnD3Pql2vGZkc5H5" |
124+
| `itemLimit` | number | No | Max items to return \(1-250000\). Default: all items. Example: 500 |
125+
| `offset` | number | No | Number of items to skip at the start. Default: 0 |
126+
| `fields` | string | No | Comma-separated list of fields to include. Example: "title,url,price" |
127+
128+
#### Output
129+
130+
| Parameter | Type | Description |
131+
| --------- | ---- | ----------- |
132+
| `success` | boolean | Whether the items were retrieved |
133+
| `datasetId` | string | Dataset ID the items were read from |
134+
| `items` | array | Items stored in the dataset |
135+
| `count` | number | Number of items returned |
136+
137+
### `apify_get_run`
138+
139+
Get the status and details of an APIFY actor run
140+
141+
#### Input
142+
143+
| Parameter | Type | Required | Description |
144+
| --------- | ---- | -------- | ----------- |
145+
| `apiKey` | string | Yes | APIFY API token from console.apify.com/account#/integrations |
146+
| `runId` | string | Yes | Actor run ID to fetch. Example: "HG7ML7M8z78YcAPEB" |
147+
148+
#### Output
149+
150+
| Parameter | Type | Description |
151+
| --------- | ---- | ----------- |
152+
| `success` | boolean | Whether the run was found |
153+
| `runId` | string | APIFY run ID |
154+
| `status` | string | Run status \(READY, RUNNING, SUCCEEDED, FAILED, etc.\) |
155+
| `startedAt` | string | When the run started \(ISO timestamp\) |
156+
| `finishedAt` | string | When the run finished \(ISO timestamp\) |
157+
| `datasetId` | string | Default dataset ID for the run |
158+
| `keyValueStoreId` | string | Default key-value store ID for the run |
159+
| `stats` | json | Run statistics \(memory, CPU, duration\) |
160+
90161

apps/sim/app/(landing)/integrations/data/integrations.json

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@
772772
"slug": "apify",
773773
"name": "Apify",
774774
"description": "Run Apify actors and retrieve results",
775-
"longDescription": "Integrate Apify into your workflow. Run any Apify actor with custom input and retrieve results. Supports both synchronous and asynchronous execution with automatic dataset fetching.",
775+
"longDescription": "Integrate Apify into your workflow. Run any Apify actor or saved task with custom input, fetch dataset items, and check run status. Supports both synchronous and asynchronous execution with automatic dataset fetching.",
776776
"bgColor": "#E0E0E0",
777777
"iconName": "ApifyIcon",
778778
"docsUrl": "https://docs.sim.ai/tools/apify",
@@ -784,9 +784,21 @@
784784
{
785785
"name": "Run Actor (Async)",
786786
"description": "Run an APIFY actor asynchronously with polling for long-running tasks"
787+
},
788+
{
789+
"name": "Run Task",
790+
"description": "Run a saved APIFY actor task synchronously and get dataset items (max 5 minutes)"
791+
},
792+
{
793+
"name": "Get Dataset Items",
794+
"description": "Retrieve items stored in an APIFY dataset"
795+
},
796+
{
797+
"name": "Get Run",
798+
"description": "Get the status and details of an APIFY actor run"
787799
}
788800
],
789-
"operationCount": 2,
801+
"operationCount": 5,
790802
"triggers": [],
791803
"triggerCount": 0,
792804
"authType": "api-key",

apps/sim/blocks/blocks/apify.ts

Lines changed: 95 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
import { ApifyIcon } from '@/components/icons'
22
import type { BlockConfig } from '@/blocks/types'
3-
import { IntegrationType } from '@/blocks/types'
3+
import { AuthMode, IntegrationType } from '@/blocks/types'
44
import type { RunActorResult } from '@/tools/apify/types'
55

6+
const RUN_OPERATIONS = ['apify_run_actor_sync', 'apify_run_actor_async']
7+
const RUN_OR_TASK_OPERATIONS = [...RUN_OPERATIONS, 'apify_run_task']
8+
69
export const ApifyBlock: BlockConfig<RunActorResult> = {
710
type: 'apify',
811
name: 'Apify',
912
description: 'Run Apify actors and retrieve results',
13+
authMode: AuthMode.ApiKey,
1014
longDescription:
11-
'Integrate Apify into your workflow. Run any Apify actor with custom input and retrieve results. Supports both synchronous and asynchronous execution with automatic dataset fetching.',
15+
'Integrate Apify into your workflow. Run any Apify actor or saved task with custom input, fetch dataset items, and check run status. Supports both synchronous and asynchronous execution with automatic dataset fetching.',
1216
docsLink: 'https://docs.sim.ai/tools/apify',
1317
category: 'tools',
1418
integrationType: IntegrationType.Search,
@@ -24,6 +28,9 @@ export const ApifyBlock: BlockConfig<RunActorResult> = {
2428
options: [
2529
{ label: 'Run Actor', id: 'apify_run_actor_sync' },
2630
{ label: 'Run Actor (Async)', id: 'apify_run_actor_async' },
31+
{ label: 'Run Task', id: 'apify_run_task' },
32+
{ label: 'Get Dataset Items', id: 'apify_get_dataset_items' },
33+
{ label: 'Get Run', id: 'apify_get_run' },
2734
],
2835
value: () => 'apify_run_actor_sync',
2936
},
@@ -40,7 +47,32 @@ export const ApifyBlock: BlockConfig<RunActorResult> = {
4047
title: 'Actor ID',
4148
type: 'short-input',
4249
placeholder: 'e.g., janedoe/my-actor or actor ID',
43-
required: true,
50+
condition: { field: 'operation', value: RUN_OPERATIONS },
51+
required: { field: 'operation', value: RUN_OPERATIONS },
52+
},
53+
{
54+
id: 'taskId',
55+
title: 'Task ID',
56+
type: 'short-input',
57+
placeholder: 'e.g., janedoe/my-task or task ID',
58+
condition: { field: 'operation', value: 'apify_run_task' },
59+
required: { field: 'operation', value: 'apify_run_task' },
60+
},
61+
{
62+
id: 'datasetId',
63+
title: 'Dataset ID',
64+
type: 'short-input',
65+
placeholder: 'e.g., 9RnD3Pql2vGZkc5H5',
66+
condition: { field: 'operation', value: 'apify_get_dataset_items' },
67+
required: { field: 'operation', value: 'apify_get_dataset_items' },
68+
},
69+
{
70+
id: 'runId',
71+
title: 'Run ID',
72+
type: 'short-input',
73+
placeholder: 'e.g., HG7ML7M8z78YcAPEB',
74+
condition: { field: 'operation', value: 'apify_get_run' },
75+
required: { field: 'operation', value: 'apify_get_run' },
4476
},
4577
{
4678
id: 'input',
@@ -49,6 +81,7 @@ export const ApifyBlock: BlockConfig<RunActorResult> = {
4981
language: 'json',
5082
placeholder: '{\n "startUrl": "https://example.com",\n "maxPages": 10\n}',
5183
required: false,
84+
condition: { field: 'operation', value: RUN_OR_TASK_OPERATIONS },
5285
wandConfig: {
5386
enabled: true,
5487
prompt: `Generate a JSON configuration object for an Apify actor based on the user's description.
@@ -82,79 +115,95 @@ Return ONLY the valid JSON object - no explanations, no markdown.`,
82115
type: 'short-input',
83116
placeholder: 'Memory in MB (e.g., 1024 for 1GB, 2048 for 2GB)',
84117
required: false,
118+
mode: 'advanced',
119+
condition: { field: 'operation', value: RUN_OR_TASK_OPERATIONS },
85120
},
86121
{
87122
id: 'timeout',
88123
title: 'Timeout',
89124
type: 'short-input',
90125
placeholder: 'Timeout in seconds (e.g., 300 for 5 min)',
91126
required: false,
127+
mode: 'advanced',
128+
condition: { field: 'operation', value: RUN_OR_TASK_OPERATIONS },
92129
},
93130
{
94131
id: 'build',
95132
title: 'Build',
96133
type: 'short-input',
97134
placeholder: 'Build version (e.g., "latest", "beta", "1.2.3")',
98135
required: false,
136+
mode: 'advanced',
137+
condition: { field: 'operation', value: RUN_OR_TASK_OPERATIONS },
99138
},
100139
{
101140
id: 'waitForFinish',
102141
title: 'Wait For Finish',
103142
type: 'short-input',
104143
placeholder: 'Initial wait time in seconds (0-60)',
105144
required: false,
106-
condition: {
107-
field: 'operation',
108-
value: 'apify_run_actor_async',
109-
},
145+
mode: 'advanced',
146+
condition: { field: 'operation', value: 'apify_run_actor_async' },
110147
},
111148
{
112149
id: 'itemLimit',
113150
title: 'Item Limit',
114151
type: 'short-input',
115152
placeholder: 'Max dataset items to fetch (1-250000)',
116153
required: false,
154+
mode: 'advanced',
117155
condition: {
118156
field: 'operation',
119-
value: 'apify_run_actor_async',
157+
value: ['apify_run_actor_async', 'apify_run_task', 'apify_get_dataset_items'],
120158
},
121159
},
160+
{
161+
id: 'offset',
162+
title: 'Offset',
163+
type: 'short-input',
164+
placeholder: 'Number of items to skip (default 0)',
165+
required: false,
166+
mode: 'advanced',
167+
condition: { field: 'operation', value: 'apify_get_dataset_items' },
168+
},
169+
{
170+
id: 'fields',
171+
title: 'Fields',
172+
type: 'short-input',
173+
placeholder: 'Comma-separated fields (e.g., title,url,price)',
174+
required: false,
175+
mode: 'advanced',
176+
condition: { field: 'operation', value: 'apify_get_dataset_items' },
177+
},
122178
],
123179

124180
tools: {
125-
access: ['apify_run_actor_sync', 'apify_run_actor_async'],
181+
access: [
182+
'apify_run_actor_sync',
183+
'apify_run_actor_async',
184+
'apify_run_task',
185+
'apify_get_dataset_items',
186+
'apify_get_run',
187+
],
126188
config: {
127189
tool: (params) => params.operation,
128190
params: (params: Record<string, any>) => {
129191
const { operation, ...rest } = params
130-
const result: Record<string, any> = {
131-
apiKey: rest.apiKey,
132-
actorId: rest.actorId,
133-
}
134-
135-
if (rest.input) {
136-
result.input = rest.input
137-
}
138-
139-
if (rest.memory) {
140-
result.memory = Number(rest.memory)
141-
}
142-
143-
if (rest.timeout) {
144-
result.timeout = Number(rest.timeout)
145-
}
146-
147-
if (rest.build) {
148-
result.build = rest.build
149-
}
150-
151-
if (rest.waitForFinish) {
152-
result.waitForFinish = Number(rest.waitForFinish)
153-
}
154-
155-
if (rest.itemLimit) {
156-
result.itemLimit = Number(rest.itemLimit)
157-
}
192+
const result: Record<string, any> = { apiKey: rest.apiKey }
193+
194+
if (rest.actorId) result.actorId = rest.actorId
195+
if (rest.taskId) result.taskId = rest.taskId
196+
if (rest.datasetId) result.datasetId = rest.datasetId
197+
if (rest.runId) result.runId = rest.runId
198+
if (rest.input) result.input = rest.input
199+
if (rest.build) result.build = rest.build
200+
if (rest.fields) result.fields = rest.fields
201+
if (rest.memory) result.memory = Number(rest.memory)
202+
if (rest.timeout) result.timeout = Number(rest.timeout)
203+
if (rest.waitForFinish) result.waitForFinish = Number(rest.waitForFinish)
204+
if (rest.itemLimit) result.itemLimit = Number(rest.itemLimit)
205+
if (rest.offset !== undefined && rest.offset !== null && rest.offset !== '')
206+
result.offset = Number(rest.offset)
158207

159208
return result
160209
},
@@ -165,19 +214,27 @@ Return ONLY the valid JSON object - no explanations, no markdown.`,
165214
operation: { type: 'string', description: 'Operation to perform' },
166215
apiKey: { type: 'string', description: 'Apify API token' },
167216
actorId: { type: 'string', description: 'Actor ID or username/actor-name' },
217+
taskId: { type: 'string', description: 'Task ID or username/task-name' },
218+
datasetId: { type: 'string', description: 'Dataset ID to read items from' },
219+
runId: { type: 'string', description: 'Actor run ID to fetch' },
168220
input: { type: 'string', description: 'Actor input as JSON string' },
169221
memory: { type: 'number', description: 'Memory in MB (128-32768)' },
170222
timeout: { type: 'number', description: 'Timeout in seconds' },
171223
build: { type: 'string', description: 'Actor build version' },
172224
waitForFinish: { type: 'number', description: 'Initial wait time in seconds' },
173225
itemLimit: { type: 'number', description: 'Max dataset items to fetch' },
226+
offset: { type: 'number', description: 'Number of items to skip' },
227+
fields: { type: 'string', description: 'Comma-separated fields to include' },
174228
},
175229

176230
outputs: {
177-
success: { type: 'boolean', description: 'Whether the actor run succeeded' },
231+
success: { type: 'boolean', description: 'Whether the operation succeeded' },
178232
runId: { type: 'string', description: 'Apify run ID' },
179233
status: { type: 'string', description: 'Run status (SUCCEEDED, FAILED, etc.)' },
180234
datasetId: { type: 'string', description: 'Dataset ID containing results' },
181235
items: { type: 'json', description: 'Dataset items (if completed)' },
236+
count: { type: 'number', description: 'Number of items returned (Get Dataset Items)' },
237+
startedAt: { type: 'string', description: 'When the run started (Get Run)' },
238+
finishedAt: { type: 'string', description: 'When the run finished (Get Run)' },
182239
},
183240
}

0 commit comments

Comments
 (0)