Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
0b9019d
v0.6.23: MCP fixes, remove local state in favor of server state, moth…
waleedlatif1 Apr 4, 2026
a54dcbe
v0.6.24: copilot feedback wiring, captcha fixes
waleedlatif1 Apr 4, 2026
28af223
v0.6.25: cloudwatch, cloudformation, live kb sync, linear fixes, post…
waleedlatif1 Apr 5, 2026
d889f32
v0.6.26: ui improvements, multiple response blocks, docx previews, ol…
waleedlatif1 Apr 5, 2026
316bc8c
v0.6.27: new triggers, mothership improvements, files archive, queuei…
waleedlatif1 Apr 7, 2026
3f508e4
v0.6.28: new docs, delete confirmation standardization, dagster integ…
waleedlatif1 Apr 7, 2026
d6ec115
v0.6.29: login improvements, posthog telemetry (#4026)
TheodoreSpeaks Apr 7, 2026
d7da35b
v0.6.30: slack trigger enhancements, connectors performance improveme…
waleedlatif1 Apr 8, 2026
cf233bb
v0.6.31: elevenlabs voice, trigger.dev fixes, cloud whitelabeling for…
waleedlatif1 Apr 8, 2026
f8f3758
v0.6.32: BYOK fixes, ui improvements, cloudwatch tools, jsm tools ext…
waleedlatif1 Apr 9, 2026
3c8bb40
v0.6.33: polling improvements, jsm forms tools, credentials reactquer…
waleedlatif1 Apr 9, 2026
d33acf4
v0.6.34: trigger.dev fixes, CI speedup, atlassian error extractor
waleedlatif1 Apr 9, 2026
4f40c4c
v0.6.35: additional jira fields, HITL docs, logs cleanup efficiency
waleedlatif1 Apr 10, 2026
cbfab1c
v0.6.36: new chunkers, sockets state machine, google sheets/drive/cal…
waleedlatif1 Apr 11, 2026
4309d06
v0.6.37: audit logs page, isolated-vm worker rotation, permission gro…
waleedlatif1 Apr 12, 2026
8b57476
v0.6.38: models page
waleedlatif1 Apr 12, 2026
e3d0e74
v0.6.39: billing fixes, tools audit, landing fix
waleedlatif1 Apr 13, 2026
0ac0539
v0.6.40: mothership tool loop, new skills, agiloft, STS, IAM integrat…
waleedlatif1 Apr 14, 2026
3838b6e
v0.6.41: webhooks fix, workers removal
waleedlatif1 Apr 14, 2026
fc07922
v0.6.42: mothership nested file reads, search modal improvements
waleedlatif1 Apr 14, 2026
3a1b1a8
v0.6.43: mothership billing idempotency, env var resolution fixes
waleedlatif1 Apr 14, 2026
46ffc49
v0.6.44: streamdown, mothership intelligence, excel extension
waleedlatif1 Apr 15, 2026
010435c
v0.6.45: superagent, csp, brightdata integration, gemini response for…
Sg312 Apr 15, 2026
c0bc62c
Merge pull request #4190 from simstudioai/staging
icecrasher321 Apr 16, 2026
387cc97
v0.6.46: mothership queueing, web vitals
waleedlatif1 Apr 16, 2026
2dbc7fd
v0.6.47: files focusing, documentation, opus 4.7
waleedlatif1 Apr 16, 2026
8a50f18
v0.6.48: import csv into tables, subflow fixes, CSP updates
waleedlatif1 Apr 16, 2026
dcf3302
v0.6.49: deploy sockets event, resolver, logs improvements, monday.co…
waleedlatif1 Apr 17, 2026
bc09865
v0.6.50: ppt/doc/pdf worker isolation, docs, chat, sidebar improvements
icecrasher321 Apr 18, 2026
5f56e46
v0.6.51: tables improvements, billing fixes, 404 pages, code hygiene
waleedlatif1 Apr 20, 2026
ca3bbf1
v0.6.52: data retention, docs updates, slack manifest generator, secu…
waleedlatif1 Apr 22, 2026
bbf400f
v0.6.53: permissions groups migration, docs updates
waleedlatif1 Apr 22, 2026
7c619e7
Merge pull request #4261 from simstudioai/staging
icecrasher321 Apr 22, 2026
64cfda5
v0.6.54: mothership tracing, db pool size increase
icecrasher321 Apr 22, 2026
7ca736a
v0.6.55: standardize monorepo conventions, api key hash, thinking tex…
waleedlatif1 Apr 23, 2026
6066fc1
v0.6.56: data retention improvements, tables column double click resi…
waleedlatif1 Apr 24, 2026
3422f64
Merge pull request #4285 from simstudioai/staging
waleedlatif1 Apr 24, 2026
595c4c3
Merge pull request #4293 from simstudioai/staging
TheodoreSpeaks Apr 24, 2026
d6c1bc2
v0.6.58: queue abort state machine improvement, contributing guide
icecrasher321 Apr 25, 2026
58a3ae2
v0.6.59: gpt 5.5, security hardening, parallel subagents rendering
icecrasher321 Apr 27, 2026
489f2d3
v0.6.60: copilot security improvements, slack canvas ops, retention j…
icecrasher321 Apr 27, 2026
22ccaf1
feat(block): Allow wait block to wait up to 30 days
TheodoreSpeaks Apr 29, 2026
a4b5df1
restore ff
TheodoreSpeaks Apr 29, 2026
6aa3fe3
v0.6.61: SAP integration, live URLs for browser use, 5xx error catego…
icecrasher321 Apr 29, 2026
ecbf5e5
Merge pull request #4342 from simstudioai/staging
TheodoreSpeaks Apr 29, 2026
0c32dd4
Filter out waits from hitl endpoints
TheodoreSpeaks Apr 30, 2026
7678245
Use correct count, filtering out wait blocks
TheodoreSpeaks Apr 30, 2026
2aaf2b7
v0.6.62: firecrawl parse, new gmail tools, trace improvements, tool f…
waleedlatif1 May 2, 2026
d445b9c
v0.6.63: knowledgebase UI, folder search in mothership
waleedlatif1 May 2, 2026
c234b01
Merge remote-tracking branch 'origin/main' into feat/long-waits
TheodoreSpeaks May 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ export const POST = withRouteHandler(
contextId,
resumeInput,
userId,
allowedPauseKinds: ['human'],
})

if (enqueueResult.status === 'queued') {
Expand Down
161 changes: 161 additions & 0 deletions apps/sim/app/api/resume/poll/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import { db } from '@sim/db'
import { pausedExecutions } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { generateShortId } from '@sim/utils/id'
import { and, eq, isNotNull, lte } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { verifyCronAuth } from '@/lib/auth/internal'
import { acquireLock, releaseLock } from '@/lib/core/config/redis'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager'

const logger = createLogger('TimePauseResumePoll')

export const dynamic = 'force-dynamic'
export const maxDuration = 120

const LOCK_KEY = 'time-pause-resume-poll-lock'
const LOCK_TTL_SECONDS = 120
const POLL_BATCH_LIMIT = 200

interface StoredPausePoint {
contextId?: string
resumeStatus?: string
pauseKind?: string
resumeAt?: string
}

export const GET = withRouteHandler(async (request: NextRequest) => {
const requestId = generateShortId()

const authError = verifyCronAuth(request, 'Time-pause resume poll')
if (authError) return authError

const lockAcquired = await acquireLock(LOCK_KEY, requestId, LOCK_TTL_SECONDS)
if (!lockAcquired) {
return NextResponse.json(
{ success: true, message: 'Polling already in progress – skipped', requestId },
{ status: 202 }
)
}

let claimedRows = 0
let dispatched = 0
const failures: { executionId: string; contextId: string; error: string }[] = []

try {
const now = new Date()

const dueRows = await db
.select({
id: pausedExecutions.id,
executionId: pausedExecutions.executionId,
workflowId: pausedExecutions.workflowId,
pausePoints: pausedExecutions.pausePoints,
metadata: pausedExecutions.metadata,
})
.from(pausedExecutions)
.where(
and(
eq(pausedExecutions.status, 'paused'),
isNotNull(pausedExecutions.nextResumeAt),
lte(pausedExecutions.nextResumeAt, now)
)
)
.limit(POLL_BATCH_LIMIT)
Comment on lines +56 to +66
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 No ORDER BY on batch query — high-volume queues risk row starvation

Without an explicit ORDER BY, PostgreSQL returns rows in an unspecified order. When the queue depth exceeds POLL_BATCH_LIMIT = 200, the same 200 rows may be returned on every invocation (e.g. lowest physical heap order), while later-inserted rows are perpetually skipped. Adding .orderBy(pausedExecutions.nextResumeAt) ensures the most-overdue entries are always processed first and that all rows are eventually drained.

.orderBy(pausedExecutions.nextResumeAt)
.limit(POLL_BATCH_LIMIT)


claimedRows = dueRows.length

for (const row of dueRows) {
const points = (row.pausePoints ?? {}) as Record<string, StoredPausePoint>
const metadata = (row.metadata ?? {}) as Record<string, unknown>
const userId = typeof metadata.executorUserId === 'string' ? metadata.executorUserId : ''

const duePoints: StoredPausePoint[] = []
let nextRemaining: Date | null = null

for (const point of Object.values(points)) {
if (point.pauseKind !== 'time' || !point.resumeAt) continue
if (point.resumeStatus && point.resumeStatus !== 'paused') continue

const resumeAt = new Date(point.resumeAt)
if (Number.isNaN(resumeAt.getTime())) continue

if (resumeAt <= now) {
duePoints.push(point)
} else if (!nextRemaining || resumeAt < nextRemaining) {
nextRemaining = resumeAt
}
}

for (const point of duePoints) {
const contextId = point.contextId
if (!contextId) continue
try {
const enqueueResult = await PauseResumeManager.enqueueOrStartResume({
executionId: row.executionId,
contextId,
resumeInput: {},
userId,
})

if (enqueueResult.status === 'starting') {
PauseResumeManager.startResumeExecution({
resumeEntryId: enqueueResult.resumeEntryId,
resumeExecutionId: enqueueResult.resumeExecutionId,
pausedExecution: enqueueResult.pausedExecution,
contextId: enqueueResult.contextId,
resumeInput: enqueueResult.resumeInput,
userId: enqueueResult.userId,
}).catch((error) => {
logger.error('Background time-pause resume failed', {
executionId: row.executionId,
contextId,
error: toError(error).message,
})
})
}
dispatched++
} catch (error) {
const message = toError(error).message
logger.warn('Failed to dispatch time-pause resume', {
executionId: row.executionId,
contextId,
error: message,
})
failures.push({ executionId: row.executionId, contextId, error: message })
}
}

// We never auto-retry a failed dispatch: workflow blocks aren't idempotent, and an
// operator must investigate stranded rows by hand. Setting nextResumeAt to the next
// future pause (or null) drops the row out of the poll, surfacing the failure.
await db
.update(pausedExecutions)
.set({ nextResumeAt: nextRemaining })
.where(eq(pausedExecutions.id, row.id))
Comment thread
TheodoreSpeaks marked this conversation as resolved.
}

logger.info('Time-pause resume poll completed', {
requestId,
claimedRows,
dispatched,
failureCount: failures.length,
Comment on lines +92 to +144
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Failed dispatches permanently strand executions

When enqueueOrStartResume throws for a due pause point, the error is caught and pushed to failures[], but nextRemaining is unaffected (it only tracks future points). The loop then runs UPDATE … SET next_resume_at = nextRemaining (effectively NULL when all points were due). After this update, the row no longer satisfies the cron query (isNotNull(nextResumeAt)), so it is silently abandoned and the workflow is permanently stuck in status = 'paused'.

Any transient failure — DB timeout, lock contention, network hiccup inside enqueueOrStartResume — turns into a permanent hang with no visible alert and no retry path.

A simple fix is to re-schedule failed points by putting their resumeAt back into nextRemaining:

for (const point of duePoints) {
  const contextId = point.contextId
  if (!contextId) continue
  try {
    // ... dispatch ...
    dispatched++
  } catch (error) {
    const message = toError(error).message
    logger.warn('Failed to dispatch time-pause resume', { ... })
    failures.push({ executionId: row.executionId, contextId, error: message })
    // Re-queue failed point
    if (point.resumeAt) {
      const retryAt = new Date(point.resumeAt)
      if (!Number.isNaN(retryAt.getTime())) {
        if (!nextRemaining || retryAt < nextRemaining) nextRemaining = retryAt
      }
    }
  }
}

Alternatively, schedule a short retry (e.g. new Date(Date.now() + 60_000)) to avoid hammering a bad point at full frequency.

})

return NextResponse.json({
success: true,
requestId,
claimedRows,
dispatched,
failures,
})
} catch (error) {
const message = toError(error).message
logger.error('Time-pause resume poll failed', { requestId, error: message })
return NextResponse.json({ success: false, requestId, error: message }, { status: 500 })
} finally {
await releaseLock(LOCK_KEY, requestId).catch(() => {})
}
})
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ interface PausePointWithQueue {
latestResumeEntry?: ResumeQueueEntrySummary | null
parallelScope?: any
loopScope?: any
pauseKind?: 'human' | 'time'
resumeAt?: string
}

interface PausedExecutionSummary {
Expand Down
21 changes: 14 additions & 7 deletions apps/sim/blocks/blocks/wait.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ const WaitIcon = (props: SVGProps<SVGSVGElement>) => createElement(PauseCircle,
export const WaitBlock: BlockConfig = {
type: 'wait',
name: 'Wait',
description: 'Pause workflow execution for a specified time delay',
description: 'Pause workflow execution for up to 30 days',
longDescription:
'Pauses workflow execution for a specified time interval. The wait executes a simple sleep for the configured duration.',
'Pauses workflow execution for a specified time interval. Waits up to five minutes are held in-process; longer waits suspend the workflow and resume automatically once the configured duration elapses.',
bestPractices: `
- Use for simple time delays (max 10 minutes)
- Configure the wait amount and unit (seconds or minutes)
- Time-based waits are interruptible via workflow cancellation
- Configure the wait amount and unit (seconds, minutes, hours, or days)
- Maximum wait duration is 30 days
- Waits up to 5 minutes execute in-process and are interruptible via workflow cancellation
- Longer waits suspend the workflow; the execution resumes automatically when the timer fires
- Enter a positive number for the wait amount
`,
category: 'blocks',
Expand All @@ -26,7 +27,7 @@ export const WaitBlock: BlockConfig = {
id: 'timeValue',
title: 'Wait Amount',
type: 'short-input',
description: 'Max: 600 seconds or 10 minutes',
description: 'Max: 30 days',
placeholder: '10',
value: () => '10',
required: true,
Expand All @@ -38,6 +39,8 @@ export const WaitBlock: BlockConfig = {
options: [
{ label: 'Seconds', id: 'seconds' },
{ label: 'Minutes', id: 'minutes' },
{ label: 'Hours', id: 'hours' },
{ label: 'Days', id: 'days' },
],
value: () => 'seconds',
required: true,
Expand All @@ -53,7 +56,7 @@ export const WaitBlock: BlockConfig = {
},
timeUnit: {
type: 'string',
description: 'Wait duration unit (seconds or minutes)',
description: 'Wait duration unit (seconds, minutes, hours, or days)',
},
},
outputs: {
Expand All @@ -65,5 +68,9 @@ export const WaitBlock: BlockConfig = {
type: 'string',
description: 'Status of the wait block (waiting, completed, cancelled)',
},
resumeAt: {
type: 'string',
description: 'ISO timestamp at which a suspended wait will resume (long waits only)',
},
},
}
2 changes: 2 additions & 0 deletions apps/sim/executor/execution/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,8 @@ export class ExecutionEngine {
parallelScope: pause.parallelScope,
loopScope: pause.loopScope,
resumeLinks: pause.resumeLinks,
pauseKind: pause.pauseKind,
resumeAt: pause.resumeAt,
}))

return {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ export class HumanInTheLoopBlockHandler implements BlockHandler {
parallelScope,
loopScope,
resumeLinks,
pauseKind: 'human',
}

const responseOutput: Record<string, any> = {
Expand Down
Loading
Loading