diff --git a/bin/codecept.js b/bin/codecept.js index 212441579..cf83b4434 100755 --- a/bin/codecept.js +++ b/bin/codecept.js @@ -72,7 +72,11 @@ if (process.versions.node && process.versions.node.split('.') && process.version program.usage(' [options]') program.version(Codecept.version()) -program.command('init [path]').description('Creates dummy config in current dir or [path]').action(commandHandler('../lib/command/init.js')) +program + .command('init [path]') + .description('Creates dummy config in current dir or [path]') + .option('-y, --yes', 'skip prompts and use defaults (Playwright + chromium, BASE_URL env for url)') + .action(commandHandler('../lib/command/init.js')) program .command('check') diff --git a/bin/mcp-server.js b/bin/mcp-server.js old mode 100644 new mode 100755 index f80ea0bad..d3d7cb53b --- a/bin/mcp-server.js +++ b/bin/mcp-server.js @@ -1,3 +1,4 @@ +#!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' @@ -17,6 +18,11 @@ import { } from '../lib/utils/trace.js' import event from '../lib/event.js' import recorder from '../lib/recorder.js' +import WebElement from '../lib/element/WebElement.js' +import { locate, within, session, secret, inject, pause } from '../lib/index.js' +import { tryTo, retryTo, hopeThat } from '../lib/effects.js' +import step from '../lib/steps.js' +import { element, eachElement, expectElement, expectAnyElement, expectAllElements } from '../lib/els.js' import { setPauseHandler, pauseNow } from '../lib/pause.js' import { EventEmitter } from 'events' import { fileURLToPath, pathToFileURL } from 'url' @@ -39,6 +45,7 @@ let shellSessionActive = false let bootstrapDone = false let currentPluginsSig = '' let currentAiTraceDir = null // mirrors the dir aiTrace plugin computes per test/session +let aiTraceEnabled = false // tracked across the session so tool responses can surface a hint when off event.dispatcher.on(event.test.before, test => { try { @@ -47,7 +54,27 @@ event.dispatcher.on(event.test.before, test => { } catch {} }) -const SESSION_REQUIRED_ERROR = 'No active CodeceptJS session. Call `start_browser` to open a shell session, or `run_test` (use `pause()` in the test, or set `pauseAt`) to inspect during a test run.' +function aiTraceHint() { + if (aiTraceEnabled) return undefined + return 'aiTrace plugin is disabled — re-run start_browser with plugins={ aiTrace: { enabled: true } } to capture per-step DOM/ARIA/console traces for debugging.' +} + +function applyMochaGrep(grep) { + if (!grep) return + const mocha = typeof container.mocha === 'function' ? container.mocha() : container.mocha + if (mocha && typeof mocha.grep === 'function') mocha.grep(grep) +} + +function pauseAtMatcher(pauseAt) { + if (pauseAt == null) return () => false + if (typeof pauseAt === 'number') return (idx) => idx === pauseAt + if (typeof pauseAt === 'string') { + const m = pauseAt.match(/^\/(.+)\/([gimsuy]*)$/) + const re = m ? new RegExp(m[1], m[2]) : new RegExp(pauseAt.replace(/[.+?^${}()|[\]\\]/g, '\\$&'), 'i') + return (_idx, name) => re.test(name) + } + return () => false +} async function ensureBootstrap() { if (bootstrapDone) return @@ -80,9 +107,9 @@ async function endShellSession() { shellSessionActive = false } -function ensureSession() { +async function ensureSession() { if (shellSessionActive || pausedController) return - throw new Error(SESSION_REQUIRED_ERROR) + await startShellSession() } function normalizePluginOverrides(plugins) { @@ -109,18 +136,33 @@ function pluginsSignature(plugins) { async function teardownContainer() { if (!containerInitialized) return - await endShellSession() - const helpers = container.helpers() - for (const helperName in helpers) { - const helper = helpers[helperName] - try { if (helper._finish) await helper._finish() } catch {} + try { + await closeBrowser() + try { if (codecept?.teardown) await codecept.teardown() } catch {} + } finally { + containerInitialized = false + browserStarted = false + bootstrapDone = false + aiTraceEnabled = false + codecept = null + currentPluginsSig = '' } - try { if (codecept?.teardown) await codecept.teardown() } catch {} - containerInitialized = false - browserStarted = false - bootstrapDone = false - codecept = null - currentPluginsSig = '' +} + +let shutdownStarted = false +function installShutdownHooks() { + const onSignal = (signal) => { + if (shutdownStarted) return + shutdownStarted = true + teardownContainer().finally(() => process.exit(signal === 'SIGINT' ? 130 : 0)) + } + process.on('SIGTERM', () => onSignal('SIGTERM')) + process.on('SIGINT', () => onSignal('SIGINT')) + process.on('beforeExit', () => { + if (shutdownStarted) return + shutdownStarted = true + teardownContainer().catch(() => {}) + }) } let runLock = Promise.resolve() @@ -331,15 +373,17 @@ function outputBaseDir() { // pause(), the handler registered via setPauseHandler resolves a "paused" // promise that run_test is racing against test completion. The "pause" tool // then drives the REPL by mutating next/abort and resolving the controller. -let pausedController = null // { resolveContinue, registeredVariables } -let pendingRunPromise = null // run_test's run() promise while paused -let pendingRunResults = null // results array being collected while paused -let pendingRunCleanup = null // cleanup callback to detach test.after / step.after listeners -let pendingTestFile = null // file path of the test currently running -let pendingStepInfo = null // { index, name, status } of the last step that fired step.after +let pausedController = null +let pendingRunPromise = null +let pendingRunResults = null +let pendingRunCleanup = null +let pendingTestFile = null +let pendingStepInfo = null +let abortRun = false const pauseEvents = new EventEmitter() setPauseHandler(({ registeredVariables }) => { + if (abortRun) return Promise.reject(new Error('MCP session aborted')) return new Promise(resolve => { pausedController = { registeredVariables, @@ -352,6 +396,33 @@ setPauseHandler(({ registeredVariables }) => { }) }) +async function cancelRun() { + if (!pendingRunPromise && !pausedController) return false + abortRun = true + if (typeof pendingRunCleanup === 'function') { try { pendingRunCleanup() } catch {} } + if (pausedController) { try { pausedController.resolveContinue() } catch {} ; pausedController = null } + if (pendingRunPromise) { + try { await Promise.race([pendingRunPromise.catch(() => {}), new Promise(r => setTimeout(r, 5000))]) } catch {} + } + pendingRunPromise = null + pendingRunResults = null + pendingTestFile = null + pendingStepInfo = null + abortRun = false + return true +} + +async function closeBrowser() { + if (!containerInitialized) return + await cancelRun() + await endShellSession() + for (const helper of Object.values(container.helpers() || {})) { + try { if (helper._cleanup) await helper._cleanup() } catch {} + try { if (helper._finishTest) await helper._finishTest() } catch {} + } + browserStarted = false +} + async function captureLiveArtifacts(prefix = 'pause') { const helper = pickActingHelper(container.helpers()) if (!helper) return {} @@ -388,10 +459,15 @@ function collectRunCompletion(errorMessage) { pendingRunResults = null pendingTestFile = null pendingStepInfo = null + let error = errorMessage || null + if (!error && results.length === 0) { + error = 'No tests ran and no error was reported. The Mocha instance may have been disposed (set mocha.cleanReferencesAfterRun=false in config) or the test file matched no scenarios.' + } return { - status: 'completed', + status: error ? 'failed' : 'completed', reporterJson: { stats, tests: results }, - error: errorMessage, + error, + aiTraceHint: aiTraceHint(), } } @@ -443,98 +519,133 @@ async function initCodecept(configPath, pluginOverrides) { // aiTrace is the canonical per-step ARIA/HTML/screenshot capture for MCP. // Always on so run_code / continue can read the latest snapshot from disk // instead of double-capturing through grabAriaSnapshot etc. - applyPluginOverrides(config, { aiTrace: {}, ...plugins }) + applyPluginOverrides(config, { aiTrace: { on: 'step' }, browser: { show: false }, ...plugins }) codecept = new Codecept(config, {}) await codecept.init(testRoot) - await container.create(config, {}) await container.started() containerInitialized = true browserStarted = true + aiTraceEnabled = config.plugins?.aiTrace?.enabled === true currentPluginsSig = sig } -const PLUGINS_DESCRIPTION = 'Enable CodeceptJS plugins for this run, mirroring the CLI `-p` flag. Keys are plugin names (e.g. screencast, aiTrace, pause, pageInfo, heal, retryFailedStep, screenshotOnFail, autoDelay). Value `true` or `{}` enables with defaults; an object merges options, e.g. {"screencast": {"saveScreenshots": true}, "aiTrace": {"on": "fail"}}. Changing the plugin set tears down and re-initializes the container (closes the browser).' +async function formatReturnValue(value) { + if (value instanceof WebElement) return await value.describe() + if (Array.isArray(value) && value.length && value.every(v => v instanceof WebElement)) { + return await Promise.all(value.map(v => v.describe())) + } + return value +} const server = new Server( { name: 'codeceptjs-mcp-server', version: '1.0.0' }, { capabilities: { tools: {} } } ) +const PLUGINS_PROP = { + type: 'object', + description: 'Plugin configs to enable for this session, keyed by plugin name. Same shape as `plugins` in codecept.conf.js — each value is the plugin\'s config object (`enabled: true` is added automatically). Common entries:\n' + + ' • { browser: { show: true } } — visible browser (headed)\n' + + ' • { browser: { show: false } } — headless\n' + + ' • { browser: { browser: "firefox", windowSize: "1280x720" } } — switch browser + viewport\n' + + ' • { pause: { on: "fail" } } / { screenshot: { on: "step" } } / { aiTrace: {} }\n' + + 'Override or add to whatever the project config already enables.', + additionalProperties: { type: 'object' }, +} + +const CONFIG_PROP = { + type: 'string', + description: 'Path to codecept.conf.js (or .cjs). Defaults to $CODECEPTJS_CONFIG, then ./codecept.conf.js in $CODECEPTJS_PROJECT_DIR or cwd. Only needed for projects with a non-standard config location.', +} + server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'list_tests', - description: 'List all tests in the CodeceptJS project', - inputSchema: { type: 'object', properties: { config: { type: 'string' } } }, + description: 'List all tests in the CodeceptJS project. Uses the active session if start_browser was called, otherwise auto-inits with project defaults.', + inputSchema: { type: 'object', properties: {} }, }, { name: 'list_actions', - description: 'List all available CodeceptJS actions (I.* methods)', - inputSchema: { type: 'object', properties: { config: { type: 'string' } } }, + description: 'List all available CodeceptJS actions (I.* methods). Uses the active session if start_browser was called, otherwise auto-inits with project defaults.', + inputSchema: { type: 'object', properties: {} }, }, { name: 'run_code', - description: 'Run arbitrary CodeceptJS code.', + description: 'Run arbitrary CodeceptJS code. Response includes `availableObjects` listing every symbol in scope (I, helpers, container, step, tryTo, within, etc.).', inputSchema: { type: 'object', properties: { code: { type: 'string' }, timeout: { type: 'number' }, - config: { type: 'string' }, saveArtifacts: { type: 'boolean' }, + settleMs: { type: 'number', description: 'Wait N ms after the code finishes before capturing artifacts. Default 300. Set higher (1000+) when actions trigger slow re-renders, or 0 to skip.' }, }, required: ['code'], }, }, { name: 'run_test', - description: 'Run a specific test. If the test calls pause() — or if pauseAt is set and reached — returns early with status "paused" so the agent can inspect via run_code and release with continue. Otherwise returns the json reporter result on completion. To learn step indices for pauseAt, run "list" with --steps or call run_step_by_step first.', + description: 'Run a specific test. Returns reporter JSON with one entry per scenario; each entry has a `traceFile` (file:// URL) pointing to the aiTrace markdown for that scenario — Read it on failures to see the failing step\'s DOM/ARIA/screenshot. If aiTrace is disabled the response includes an `aiTraceHint`. If the test calls pause() — or if pauseAt is set and reached — returns early with status "paused" so the agent can inspect via run_code and release with continue. To learn step indices for pauseAt, call run_step_by_step first. Auto-inits with project defaults if no session is active — call start_browser first to customize launch (e.g. plugins={ browser: { show: true } } to watch the run).', inputSchema: { type: 'object', properties: { test: { type: 'string' }, timeout: { type: 'number' }, - config: { type: 'string' }, - pauseAt: { type: 'number', description: '1-based step index. Test will pause after the Nth step completes. Useful as a programmatic breakpoint without editing the test.' }, - plugins: { type: 'object', description: PLUGINS_DESCRIPTION, additionalProperties: true }, + grep: { type: 'string', description: 'Filter scenarios by title (passed to mocha.grep). Mirrors --grep on the CLI.' }, + pauseAt: { + description: 'Programmatic breakpoint. Either a 1-based step index (number) or a step-name match (string — substring case-insensitive, or `/regex/i` literal). Examples: 5 / "fill field" / "/grab.*url/i".', + oneOf: [{ type: 'number' }, { type: 'string' }], + }, + plugins: PLUGINS_PROP, }, required: ['test'], }, }, { name: 'run_step_by_step', - description: 'Run a test interactively, pausing after every step. Returns paused payload after the first step (URL/title/contentSize, last step info, suggestions). Call continue to advance one step (and re-pause), or run_code/snapshot to inspect state. The test runs to completion when no more steps remain.', + description: 'Run a test interactively, pausing after every step. Returns paused payload after the first step (URL/title/contentSize, last step info, suggestions). Call continue to advance one step (and re-pause), or run_code/snapshot to inspect state. On completion each scenario in `reporterJson.tests[]` has a `traceFile` (file:// URL) for the per-step aiTrace markdown — Read it for the full execution log. Much more useful when start_browser was called with plugins={ browser: { show: true } } so you can watch what happens between pauses.', inputSchema: { type: 'object', properties: { test: { type: 'string' }, timeout: { type: 'number' }, - config: { type: 'string' }, - plugins: { type: 'object', description: PLUGINS_DESCRIPTION, additionalProperties: true }, + grep: { type: 'string', description: 'Filter scenarios by title (passed to mocha.grep). Mirrors --grep on the CLI.' }, + plugins: PLUGINS_PROP, }, required: ['test'], }, }, { name: 'start_browser', - description: 'Start the browser session.', - inputSchema: { type: 'object', properties: { config: { type: 'string' } } }, + description: 'Start the session — initializes the codeceptjs container, loads helpers, and applies any plugin overrides. This is the only tool that customizes initialization; every other tool either uses the active session or auto-inits with project defaults.\n\n' + + 'MCP enforces two plugin defaults so the agent gets useful telemetry:\n' + + ' • aiTrace: { on: "step", enabled: true } — per-step DOM/ARIA/console/screenshot traces for debugging\n' + + ' • browser: { show: false, enabled: true } — headless by default\n' + + 'Both can be overridden via the `plugins` arg. To watch the run live: plugins={ browser: { show: true } }. To skip per-step trace overhead on a re-run: plugins={ aiTrace: { enabled: false } } (or { on: "fail" } to only capture failures). To switch config or plugins mid-session, call stop_browser first.', + inputSchema: { + type: 'object', + properties: { + config: CONFIG_PROP, + plugins: PLUGINS_PROP, + }, + }, }, { name: 'stop_browser', - description: 'Stop the browser session.', + description: 'Stop the session, close browsers, and tear down the container. Required before re-initing with different config or plugins.', inputSchema: { type: 'object', properties: {} }, }, { name: 'snapshot', - description: 'Capture current browser state (HTML, ARIA, screenshot, console, URL) without performing any action.', + description: 'Capture current browser state (HTML, ARIA, screenshot, console, URL) without performing any action. Returns `traceFile` (file:// URL) to a markdown trace bundling the captured artifacts — Read it for full context. Auto-inits with project defaults if no session is active.', inputSchema: { type: 'object', properties: { - config: { type: 'string' }, fullPage: { type: 'boolean' }, + settleMs: { type: 'number', description: 'Wait N ms before capturing. Default 300. Set higher when the previous action is still re-rendering, or 0 to skip.' }, }, }, }, @@ -548,6 +659,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({ }, }, }, + { + name: 'cancel', + description: 'Abort the currently paused or in-progress test run without closing the browser. Use when you want to bail out of a paused test and start something else without going through stop_browser/start_browser. The browser session and Mocha state stay alive.', + inputSchema: { type: 'object', properties: {} }, + }, ], })) @@ -557,8 +673,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { try { switch (name) { case 'list_tests': { - const configPath = args?.config - await initCodecept(configPath) + await initCodecept() codecept.loadTests() const tests = codecept.testFiles.map(testFile => { @@ -573,8 +688,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } case 'list_actions': { - const configPath = args?.config - await initCodecept(configPath) + await initCodecept() const helpers = container.helpers() const supportI = container.support('I') @@ -602,27 +716,33 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } case 'start_browser': { - const configPath = args?.config + const { config: configPath, plugins } = args || {} if (browserStarted && shellSessionActive) { - return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session already active' }, null, 2) }] } + return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session already active', plugins: plugins ?? null }, null, 2) }] } + } + await initCodecept(configPath, plugins) + if (containerInitialized && !browserStarted) { + for (const helper of Object.values(container.helpers() || {})) { + try { if (helper._beforeSuite) await helper._beforeSuite() } catch {} + } + browserStarted = true } - await initCodecept(configPath) await startShellSession() - return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session started — run_code and snapshot are now available' }, null, 2) }] } + return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session started — run_code and snapshot are now available', plugins: plugins ?? null }, null, 2) }] } } case 'stop_browser': { if (!containerInitialized) { return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser not initialized' }, null, 2) }] } } - await teardownContainer() - return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser stopped successfully' }, null, 2) }] } + await closeBrowser() + return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser stopped — Mocha and config preserved; call start_browser to reopen' }, null, 2) }] } } case 'snapshot': { - const { config: configPath, fullPage = false } = args || {} - await initCodecept(configPath) - ensureSession() + const { fullPage = false, settleMs = 300 } = args || {} + await initCodecept() + await ensureSession() const helper = pickActingHelper(container.helpers()) if (!helper) throw new Error('No supported acting helper available (Playwright, Puppeteer, WebDriver).') @@ -630,6 +750,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { const dir = snapshotDirFor(outputBaseDir()) mkdirp.sync(dir) + if (settleMs > 0) await new Promise(r => setTimeout(r, settleMs)) const captured = await captureSnapshot(helper, { dir, prefix: 'snapshot', fullPage }) const traceFile = writeTraceMarkdown({ dir, @@ -648,6 +769,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { dir, traceFile: pathToFileURL(traceFile).href, artifacts: artifactsToFileUrls(captured, dir), + aiTraceHint: aiTraceHint(), }, null, 2), }], } @@ -684,21 +806,32 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { }) } + case 'cancel': { + const cancelled = await cancelRun() + await ensureSession() + return { content: [{ type: 'text', text: JSON.stringify({ status: cancelled ? 'Run cancelled — browser kept open' : 'No run in progress' }, null, 2) }] } + } + case 'run_code': { - const { code, timeout = 60000, config: configPath, saveArtifacts = true } = args - await initCodecept(configPath) - ensureSession() + const { code, timeout = 60000, saveArtifacts = true, settleMs = 300 } = args + await initCodecept() + await ensureSession() - const I = container.support('I') - if (!I) throw new Error('I object not available. Make sure helpers are configured.') + const support = container.supportObjects() || {} + if (!support.I) throw new Error('I object not available. Make sure helpers are configured.') const result = { status: 'unknown', output: '', error: null, commands: [], artifacts: {} } const commands = [] + let lastStepValue const onStepAfter = step => { try { commands.push(step.toString()) } catch {} } + const onStepPassed = (step, val) => { + if (val !== undefined) lastStepValue = val + } event.dispatcher.on(event.step.after, onStepAfter) + event.dispatcher.on(event.step.passed, onStepPassed) const traceDir = traceDirFor(`mcp_${Date.now()}`, 'run_code', outputBaseDir()) mkdirp.sync(traceDir) @@ -728,13 +861,27 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { console[m] = captureLog(m) } + const scope = { + locate, within, session, secret, inject, pause, share: container.share, + tryTo, retryTo, hopeThat, + step, element, eachElement, expectElement, expectAnyElement, expectAllElements, + container, helpers: container.helpers(), + ...support, + } + const paramNames = ['I', ...Object.keys(scope).filter(k => k !== 'I').sort()] + const paramValues = paramNames.map(k => scope[k]) + + const wasPaused = !!pausedController + if (wasPaused) recorder.session.start('mcp_run_code') + let returnValue try { - const asyncFn = new Function('I', `return (async () => { ${code} })()`) + const asyncFn = new Function(...paramNames, `return (async () => { ${code} })()`) returnValue = await Promise.race([ - asyncFn(I), + asyncFn(...paramValues), new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)), ]) + await recorder.promise() result.status = 'success' result.output = 'Code executed successfully' @@ -745,11 +892,21 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } finally { for (const m of consoleMethods) console[m] = origConsoleMethods[m] try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {} + try { event.dispatcher.removeListener(event.step.passed, onStepPassed) } catch {} + if (wasPaused) { + try { recorder.session.restore('mcp_run_code') } catch {} + } else { + try { recorder.reset() } catch {} + } } result.commands = commands result.logs = consoleLogs if (consoleLogs.length === MAX_LOG_ENTRIES) result.logsTruncated = true + result.availableObjects = paramNames + + if (returnValue === undefined) returnValue = await Promise.resolve(lastStepValue) + returnValue = await formatReturnValue(returnValue) if (returnValue !== undefined) { const json = typeof returnValue === 'string' ? returnValue : safeStringify(returnValue, [], 2) @@ -763,6 +920,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { const helper = pickActingHelper(container.helpers()) if (helper) { try { + if (settleMs > 0) await new Promise(r => setTimeout(r, settleMs)) captured = await captureSnapshot(helper, { dir: traceDir, prefix: 'mcp' }) result.artifacts = artifactsToFileUrls(captured, traceDir) } catch (e) { @@ -790,6 +948,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { }) result.dir = traceDir result.traceFile = pathToFileURL(traceFile).href + result.aiTraceHint = aiTraceHint() return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] } } @@ -799,9 +958,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { if (pausedController) { throw new Error('A previous run_test is still paused. Call "continue" first.') } - const { test, timeout = 60000, config: configPathArg, pauseAt, plugins } = args || {} - await initCodecept(configPathArg, plugins) + const { test, timeout = 60000, pauseAt, grep, plugins } = args || {} + await initCodecept(undefined, plugins) await endShellSession() + applyMochaGrep(grep) return await withSilencedIO(async () => { codecept.loadTests() @@ -822,26 +982,27 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { pendingTestFile = testFile pendingStepInfo = null let stepIndex = 0 + const matchPauseAt = pauseAtMatcher(pauseAt) const onAfter = t => { + const aiTrace = t.artifacts?.aiTrace pendingRunResults.push({ title: t.title, file: t.file, status: t.err ? 'failed' : 'passed', error: t.err?.message, duration: t.duration, + traceFile: aiTrace ? pathToFileURL(aiTrace).href : null, }) } const onStepAfter = step => { stepIndex += 1 - try { - pendingStepInfo = { index: stepIndex, name: step.toString(), status: step.status } - } catch { - pendingStepInfo = { index: stepIndex } - } - if (typeof pauseAt === 'number' && stepIndex === pauseAt) { - pauseNow() - } + const idx = stepIndex + const name = (() => { try { return step.toString() } catch { return '' } })() + recorder.add('mcp pause info', () => { + pendingStepInfo = { index: idx, name, status: step.status } + }) + if (matchPauseAt(idx, name)) pauseNow() } event.dispatcher.on(event.test.after, onAfter) event.dispatcher.on(event.step.after, onStepAfter) @@ -883,6 +1044,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } const final = collectRunCompletion(runError?.message) + await startShellSession() return { content: [{ type: 'text', text: JSON.stringify({ ...final, file: testFile }, null, 2) }] } }) }) @@ -893,9 +1055,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { if (pausedController) { throw new Error('A previous run is still paused. Call "continue" first.') } - const { test, timeout = 60000, config: configPath, plugins } = args || {} - await initCodecept(configPath, plugins) + const { test, timeout = 60000, grep, plugins } = args || {} + await initCodecept(undefined, plugins) await endShellSession() + applyMochaGrep(grep) return await withSilencedIO(async () => { codecept.loadTests() @@ -918,22 +1081,23 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { let stepIndex = 0 const onAfter = t => { + const aiTrace = t.artifacts?.aiTrace pendingRunResults.push({ title: t.title, file: t.file, status: t.err ? 'failed' : 'passed', error: t.err?.message, duration: t.duration, + traceFile: aiTrace ? pathToFileURL(aiTrace).href : null, }) } const onStepAfter = step => { stepIndex += 1 - try { - pendingStepInfo = { index: stepIndex, name: step.toString(), status: step.status } - } catch { - pendingStepInfo = { index: stepIndex } - } - // Pause after every step — agent calls continue to advance. + const idx = stepIndex + const name = (() => { try { return step.toString() } catch { return '' } })() + recorder.add('mcp pause info', () => { + pendingStepInfo = { index: idx, name, status: step.status } + }) pauseNow() } event.dispatcher.on(event.test.after, onAfter) @@ -975,8 +1139,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } } - // Test had zero steps (or finished before first pause) — return completion const final = collectRunCompletion(runError?.message) + await startShellSession() return { content: [{ type: 'text', text: JSON.stringify({ ...final, file: testFile }, null, 2) }] } }) }) @@ -994,6 +1158,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { }) async function main() { + installShutdownHooks() const transport = new StdioServerTransport() await server.connect(transport) } diff --git a/docs/continuous-integration.md b/docs/continuous-integration.md index 4c8efff2e..36bb7b448 100644 --- a/docs/continuous-integration.md +++ b/docs/continuous-integration.md @@ -3,20 +3,495 @@ permalink: /continuous-integration title: Continuous Integration --- - - # Continuous Integration -> Help us improve this article. [Write how did you set up CodeceptJS for CI](https://codecept.discourse.group/c/CodeceptJS-issues-in-general/ci/9) and see your post listed here! +CodeceptJS runs in any CI system that can install Node.js. The work is in the surrounding environment: a headless browser, a driver server for WebDriver, failure artifacts to upload, and a parallelization strategy that keeps the wall-clock time reasonable. This guide covers each step and provides drop-in configs for the major CI systems. + +## Preparing tests for CI + +A CI-ready suite needs only a few things: + +- **Headless mode.** Playwright runs headless by default — only act if you set `show: true` locally. To toggle it from CI, export `HEADLESS=true` and read it from your config. +- **Colored logs.** Export `FORCE_COLOR=1` so CodeceptJS output renders correctly in CI log viewers. +- **Failure artifacts.** Keep `screenshotOnFail` enabled (it is on by default). For Playwright, also enable `trace` and `video` in the helper config — they make a remote failure diagnosable from a single artifact. +- **Self-healing for flaky tests.** Use the [`heal` plugin](/heal) to recover from broken locators. The `retryFailedStep` plugin is already enabled by default — you do not need to configure it. + +You do **not** need to set `CI=true`. Every CI provider exports it automatically, and CodeceptJS reads it to relax certain timeouts. + +## Installing browsers and drivers + +### Playwright + +Playwright needs browser binaries plus Linux system libraries. The recommended approach (per the [official Playwright CI docs](https://playwright.dev/docs/ci)) is: + +```bash +npm ci +npx playwright install --with-deps chromium +``` + +`--with-deps` pulls in `libnss`, fonts, and other OS packages. To install all engines, drop the `chromium` argument. Playwright explicitly recommends against caching browser binaries — restoring the cache takes about as long as a fresh download. + +If you prefer the official Playwright Docker image, see the [Playwright Docker docs](https://playwright.dev/docs/docker). Pin the image tag to **the same version as your installed `playwright` package** — a mismatched image will fail to find browser executables. The examples below use `node:20` + `npx playwright install --with-deps` to avoid this version-pin problem entirely. + +### WebDriver + +CodeceptJS's WebDriver helper talks to any WebDriver-protocol endpoint. In CI, the simplest setup is a [Selenium Docker container](https://github.com/SeleniumHQ/docker-selenium): + +```bash +docker run -d --net=host --shm-size=2g selenium/standalone-chrome +``` + +Point the helper at it: + +```js +helpers: { + WebDriver: { + url: 'http://localhost:8000', + browser: 'chrome', + host: process.env.SELENIUM_HOST || 'localhost', + port: parseInt(process.env.SELENIUM_PORT || '4444', 10), + } +} +``` + +For an alternative without Selenium, see the [WebDriver helper docs](/webdriver) — recent WebdriverIO versions can manage drivers (chromedriver, geckodriver) directly. Selenium is still the most portable choice for CI. + +`--shm-size=2g` matters. The default 64 MB causes Chrome tabs to crash on heavy pages. + +## Running tests + +A single process: + +```bash +npx codeceptjs run +``` + +Parallel workers on one machine: + +```bash +npx codeceptjs run-workers 4 --by pool +``` + +`--by pool` distributes tests dynamically: each worker grabs the next test as it finishes, so no worker sits idle. See [Parallel Execution](/parallel) for `--by test` and `--by suite`. + +Sharded across multiple machines (CI matrix): + +```bash +npx codeceptjs run --shard 1/4 +npx codeceptjs run --shard 2/4 +npx codeceptjs run --shard 3/4 +npx codeceptjs run --shard 4/4 +``` + +You can combine the two — each shard runs on its own machine, and `run-workers` parallelizes within the shard. + +Filter by tag: + +```bash +npx codeceptjs run --grep "@smoke" +npx codeceptjs run --grep "@slow" --invert +``` + +## Reporting + +For CI test reporting, use [`@testomatio/reporter`](https://github.com/testomatio/reporter). It ships built-in **pipes** that publish results directly into the CI platform's UI — no XML wrangling required. + +| CI | Recommended pipes | Result | +|---|---|---| +| GitHub Actions | `github` + `html` | PR check annotations + a self-contained HTML report | +| GitLab CI | `gitlab` | Merge request widget with test results | +| Bitbucket Pipelines | `bitbucket` | Pipeline test report | +| Any | `html` | HTML report you can upload as an artifact | + +Install: + +```bash +npm i --save-dev @testomatio/reporter +``` + +See the [reporter README](https://github.com/testomatio/reporter) for the per-pipe environment variables. + +Whatever reporter you use, also upload the `output/` directory as a build artifact. It contains failure screenshots and, with Playwright, traces and videos. + +For other reporter formats, see [Reports](/reports). + +## CI system examples + +The examples below use Playwright by default. A WebDriver-with-Selenium variant follows where it differs. + +### GitHub Actions — Playwright + +`.github/workflows/tests.yml`: + +```yaml +name: Tests +on: + push: + branches: [main] + pull_request: + +env: + FORCE_COLOR: 1 + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + - run: npm ci + - run: npx playwright install --with-deps chromium + - run: npx codeceptjs run-workers 4 --by pool + - uses: actions/upload-artifact@v4 + if: failure() + with: + name: codeceptjs-output + path: output/ +``` + +### GitHub Actions — WebDriver + Selenium + +```yaml +name: WebDriver Tests +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + services: + selenium: + image: selenium/standalone-chrome + ports: + - 4444:4444 + options: --shm-size=2g + env: + SELENIUM_HOST: localhost + SELENIUM_PORT: 4444 + FORCE_COLOR: 1 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + - run: npm ci + - run: npx codeceptjs run-workers 2 --by pool + - uses: actions/upload-artifact@v4 + if: failure() + with: + name: codeceptjs-output + path: output/ +``` + +### GitHub Actions — Sharding matrix + +Each shard runs on its own runner in parallel: + +```yaml +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + shard: ['1/4', '2/4', '3/4', '4/4'] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + - run: npm ci + - run: npx playwright install --with-deps chromium + - run: npx codeceptjs run --shard ${{ matrix.shard }} + - uses: actions/upload-artifact@v4 + if: failure() + with: + name: output-shard-${{ strategy.job-index }} + path: output/ +``` + +### GitLab CI + +`.gitlab-ci.yml`: + +```yaml +stages: [test] + +playwright: + stage: test + image: node:20 + variables: + FORCE_COLOR: "1" + parallel: 4 + before_script: + - npm ci + - npx playwright install --with-deps chromium + script: + - npx codeceptjs run --shard $CI_NODE_INDEX/$CI_NODE_TOTAL + artifacts: + when: on_failure + paths: + - output/ + expire_in: 1 week + +webdriver: + stage: test + image: node:20 + services: + - name: selenium/standalone-chrome + alias: selenium + variables: + SELENIUM_HOST: selenium + SELENIUM_PORT: "4444" + script: + - npm ci + - npx codeceptjs run-workers 2 --by pool + artifacts: + when: on_failure + paths: [output/] +``` + +`$CI_NODE_INDEX` is 1-based, which matches CodeceptJS's `--shard` syntax exactly. + +### Bitbucket Pipelines + +`bitbucket-pipelines.yml`: + +```yaml +image: node:20 + +definitions: + services: + selenium: + image: selenium/standalone-chrome + memory: 2048 + +pipelines: + default: + - step: + name: Install + caches: [node] + script: + - npm ci + - npx playwright install --with-deps chromium + - parallel: + - step: + name: Shard 1/4 + script: + - npx codeceptjs run --shard 1/4 + artifacts: + - output/** + - step: + name: Shard 2/4 + script: + - npx codeceptjs run --shard 2/4 + artifacts: + - output/** + - step: + name: Shard 3/4 + script: + - npx codeceptjs run --shard 3/4 + artifacts: + - output/** + - step: + name: Shard 4/4 + script: + - npx codeceptjs run --shard 4/4 + artifacts: + - output/** +``` + +For WebDriver, attach the Selenium service to the step: + +```yaml +pipelines: + default: + - step: + image: node:20 + services: [selenium] + script: + - npm ci + - export SELENIUM_HOST=localhost SELENIUM_PORT=4444 + - npx codeceptjs run-workers 2 --by pool + artifacts: + - output/** +``` + +### Jenkins + +`Jenkinsfile` (declarative pipeline): + +```groovy +pipeline { + agent { + docker { + image 'node:20' + args '-u root' + } + } + environment { + FORCE_COLOR = '1' + } + stages { + stage('Install') { + steps { + sh 'npm ci' + sh 'npx playwright install --with-deps chromium' + } + } + stage('Test') { + parallel { + stage('Shard 1/4') { steps { sh 'npx codeceptjs run --shard 1/4' } } + stage('Shard 2/4') { steps { sh 'npx codeceptjs run --shard 2/4' } } + stage('Shard 3/4') { steps { sh 'npx codeceptjs run --shard 3/4' } } + stage('Shard 4/4') { steps { sh 'npx codeceptjs run --shard 4/4' } } + } + } + } + post { + failure { + archiveArtifacts artifacts: 'output/**', allowEmptyArchive: true + } + } +} +``` + +For WebDriver, launch Selenium alongside the test container: + +```groovy +stage('Test') { + steps { + script { + docker.image('selenium/standalone-chrome') + .withRun('--shm-size=2g -p 4444:4444') { c -> + sh ''' + export SELENIUM_HOST=localhost SELENIUM_PORT=4444 + npx codeceptjs run-workers 2 --by pool + ''' + } + } + } +} +``` + +### CircleCI + +`.circleci/config.yml`: + +```yaml +version: 2.1 + +jobs: + test: + docker: + - image: cimg/node:20.18-browsers + parallelism: 4 + steps: + - checkout + - run: npm ci + - run: npx playwright install --with-deps chromium + - run: + name: Run shard + command: | + INDEX=$((CIRCLE_NODE_INDEX + 1)) + npx codeceptjs run --shard ${INDEX}/${CIRCLE_NODE_TOTAL} + - store_artifacts: + path: output + + webdriver: + docker: + - image: cimg/node:20.18 + - image: selenium/standalone-chrome + environment: + SELENIUM_HOST: localhost + SELENIUM_PORT: 4444 + steps: + - checkout + - run: npm ci + - run: npx codeceptjs run-workers 2 --by pool + - store_artifacts: + path: output + +workflows: + test: + jobs: + - test + - webdriver +``` + +`CIRCLE_NODE_INDEX` is 0-based, so add 1 to match CodeceptJS's 1-based `--shard` index. + +### Azure Pipelines + +`azure-pipelines.yml`: + +```yaml +trigger: [main] + +pool: + vmImage: ubuntu-latest + +strategy: + parallel: 4 + +steps: + - task: NodeTool@0 + inputs: + versionSpec: '20.x' + - script: npm ci + displayName: Install dependencies + - script: npx playwright install --with-deps chromium + displayName: Install Playwright browsers + - script: | + npx codeceptjs run --shard $(System.JobPositionInPhase)/$(System.TotalJobsInPhase) + displayName: Run shard $(System.JobPositionInPhase)/$(System.TotalJobsInPhase) + env: + FORCE_COLOR: 1 + - task: PublishBuildArtifacts@1 + condition: failed() + inputs: + pathToPublish: output + artifactName: codeceptjs-output-$(System.JobPositionInPhase) +``` + +For WebDriver, run Selenium as a sidecar before tests: + +```yaml + - script: docker run -d --net=host --shm-size=2g selenium/standalone-chrome + displayName: Start Selenium + - script: | + export SELENIUM_HOST=localhost SELENIUM_PORT=4444 + npx codeceptjs run-workers 2 --by pool + displayName: Run tests +``` + +## Docker + +The official `codeceptjs/codeceptjs` image runs Playwright, Puppeteer, and WebDriver suites without further setup. Pass runtime flags through `CODECEPT_ARGS` and the worker count through `NO_OF_WORKERS`. See [Docker](/docker) for the full reference and Compose examples. + +## Tips + +- **Raise per-test timeouts in CI.** CI machines are slower than your laptop. Bump `timeout` in `codecept.conf.js` when assertions race the page. +- **Diagnose from logs.** Re-run with `--debug` or `DEBUG=codeceptjs:*` when a job fails and you cannot reproduce locally. +- **Selenium Chrome: always `--shm-size=2g`.** The default 64 MB causes tab crashes on heavy pages. +- **Custom Playwright images: install OS deps.** When you cannot use `mcr.microsoft.com/playwright`, run `npx playwright install --with-deps` to pull in `libnss`, fonts, and other system libraries. +- **Upload `output/` only on failure.** Successful runs produce no useful artifacts. + +## See also -Continuous Integration services allows you to delegate the control of running tests to external system. -CodeceptJS plays well with all types of CI even when there is no documentation on this topic, it is still easy to set up with any kind of hosted or cloud CI. -Our community prepared some valuable recipes for setting up CI systems with CodeceptJS. +- [Playwright CI guide](https://playwright.dev/docs/ci) — upstream notes on browser install, sharding, and per-platform config. +- [Playwright Docker image](https://playwright.dev/docs/docker) — image tags and the version-pinning rule. +- [WebdriverIO Selenium Grid](https://webdriver.io/docs/seleniumgrid) — connection options for `host`/`port`/`path`. +- [Selenium Docker images](https://github.com/SeleniumHQ/docker-selenium) — image variants (`standalone-chrome`, `standalone-firefox`, debug images with VNC). -## Recipes +## Community recipes -* ### [CodeceptJS - Codefresh Integration](https://codecept.discourse.group/t/codeceptjs-codefresh-integration/) -* ### [CodeceptJS GitLab Integration](https://codecept.discourse.group/t/codeceptjs-gitlab-integration/) -* ### [CodeceptJS - Jenkins Integration](https://codecept.discourse.group/t/codeceptjs-jenkins-integration/) -* ### [CodeceptJS Integration with TeamCity](https://codecept.discourse.group/t/codeceptjs-integration-with-teamcity/) +- [CodeceptJS — Codefresh Integration](https://codecept.discourse.group/t/codeceptjs-codefresh-integration/) +- [CodeceptJS — GitLab Integration](https://codecept.discourse.group/t/codeceptjs-gitlab-integration/) +- [CodeceptJS — Jenkins Integration](https://codecept.discourse.group/t/codeceptjs-jenkins-integration/) +- [CodeceptJS — TeamCity Integration](https://codecept.discourse.group/t/codeceptjs-integration-with-teamcity/) +Got a setup that works for you? [Share your recipe](https://codecept.discourse.group/c/CodeceptJS-issues-in-general/ci/9) and we will list it here. diff --git a/docs/mcp.md b/docs/mcp.md index 02edd3bc1..b3a3333f1 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -1,158 +1,186 @@ # CodeceptJS MCP Server -Model Context Protocol (MCP) server for CodeceptJS enables AI agents (like Claude) to interact with and control CodeceptJS tests programmatically. +Model Context Protocol (MCP) server for CodeceptJS. Lets AI agents drive a CodeceptJS browser session — list tests, run arbitrary `I.*` code, pause-and-poke through a scenario, capture artifacts, and read aiTrace markdown — all in-process, sharing one browser and one container. ## Overview -The MCP server provides AI agents with tools to: -- List all tests in a CodeceptJS project -- List all available CodeceptJS actions (I.* methods) -- Run arbitrary CodeceptJS code with artifacts capture, return value, and `console.log` capture -- Run specific tests with detailed output -- Run tests step by step for detailed analysis -- Capture a point-in-time snapshot of the browser without any action -- Start and stop browser sessions -- Capture screenshots, ARIA snapshots, formatted HTML, browser console logs, and storage state (cookies + localStorage) +The MCP server exposes the following tools: -## Installation +- `list_tests` / `list_actions` — enumerate tests and `I.*` methods +- `start_browser` / `stop_browser` — open / close the session (only place plugin overrides go) +- `run_code` — run arbitrary JS with `I` and the full CodeceptJS scope; captures steps, console, return value, and a settled-state snapshot +- `snapshot` — capture URL/HTML/ARIA/screenshot/console/storage at any moment +- `run_test` — run a specific scenario; supports `pauseAt` for programmatic breakpoints +- `run_step_by_step` — pause after every step +- `continue` — release a paused test (run-to-end, run-to-next-pause, or run-to-finish) +- `cancel` — abort the in-progress / paused run without closing the browser -Install the MCP SDK dependency: +## Invocation -```bash -npm install @modelcontextprotocol/sdk -``` +Two ways to launch the server: + +- `npx codeceptjs-mcp` — the published bin +- `node node_modules/codeceptjs/bin/mcp-server.js` — direct path, useful for editor / agent configs -The MCP server binary is available at `bin/mcp-server.js`. +> ⚠️ **Run from the project's local `codeceptjs`, never a global install.** +> The MCP server resolves helpers, plugins, page objects, and custom support from the project's `node_modules`. A globally installed `codeceptjs` won't see project-local helpers (`@codeceptjs/helper`, `@codeceptjs/configure`, custom plugins) or your `include:` support objects, and per-project versions can drift from the global one. Always invoke via `npx codeceptjs-mcp` from inside the project directory, or point your MCP client config at `/node_modules/codeceptjs/bin/mcp-server.js` directly. ## Configuration -Configure the MCP server in your Claude Desktop or MCP-compatible client configuration: +Set up the MCP server in your client (Claude Desktop, Cursor, Continue, etc.): -### Basic Configuration +### Basic ```json { "mcpServers": { "codeceptjs": { - "command": "node", - "args": ["path/to/codeceptjs/bin/mcp-server.js"] + "command": "npx", + "args": ["codeceptjs-mcp"] } } } ``` -With basic configuration, the server looks for `codecept.conf.js` in the current working directory. - -### Configuration with Environment Variables +The server looks for `codecept.conf.js` (then `.cjs`) in the current working directory. -Use environment variables to specify the CodeceptJS project directory and config file: +### With env vars ```json { "mcpServers": { "codeceptjs": { - "command": "node", - "args": ["path/to/codeceptjs/bin/mcp-server.js"], + "command": "npx", + "args": ["codeceptjs-mcp"], "env": { - "CODECEPTJS_CONFIG": "/path/to/your/codecept.conf.js", - "CODECEPTJS_PROJECT_DIR": "/path/to/your/project" + "CODECEPTJS_CONFIG": "/absolute/path/to/codecept.conf.js", + "CODECEPTJS_PROJECT_DIR": "/absolute/path/to/project" } } } } ``` -**Environment Variables:** - | Variable | Description | |----------|-------------| -| `CODECEPTJS_CONFIG` | Absolute path to the CodeceptJS configuration file | -| `CODECEPTJS_PROJECT_DIR` | Absolute path to the project root directory | +| `CODECEPTJS_CONFIG` | Absolute path to `codecept.conf.js`. Overrides cwd lookup. | +| `CODECEPTJS_PROJECT_DIR` | Absolute path to the project root. Used as the resolution base for the config file. | -### Example: Full Claude Desktop Configuration +## Session Defaults -```json -{ - "mcpServers": { - "codeceptjs-mcp": { - "command": "node", - "args": ["D:/projects/my-project/node_modules/codeceptjs/bin/mcp-server.js"], - "env": { - "CODECEPTJS_CONFIG": "D:/projects/my-project/codecept.conf.js", - "CODECEPTJS_PROJECT_DIR": "D:/projects/my-project" - } - } - } -} -``` +When the session starts, the MCP server enforces two plugin defaults so the agent gets useful telemetry out of the box: + +- **`aiTrace: { enabled: true, on: 'step' }`** — every step persists DOM/ARIA/screenshot/console artifacts to `output/trace__/`. Each scenario's `traceFile` is returned in run results so the agent can `Read` the markdown directly. +- **`browser: { enabled: true, show: false }`** — headless. Switch to headed via `start_browser` `plugins` arg. + +Both can be overridden (or disabled) via `start_browser`'s `plugins` argument. The `codecept.conf.js`'s own plugin config still merges in for keys the user explicitly set there. ## Available Tools -### list_tests +### `start_browser` -List all tests in the CodeceptJS project. +Initializes the session — loads config, builds the container, opens the browser, kicks off the synthetic test scope so `run_code` and `snapshot` work. This is the only tool that customizes initialization; every other tool either uses the active session or auto-inits with project defaults. **Parameters:** -- `config` (optional): Path to codecept.conf.js (default: codecept.conf.js) +- `config` (string, optional) — absolute path to `codecept.conf.js`. Defaults to `$CODECEPTJS_CONFIG`, then `./codecept.conf.js` in `$CODECEPTJS_PROJECT_DIR` or cwd. +- `plugins` (object, optional) — plugin configs keyed by name. Same shape as `plugins` in `codecept.conf.js`; `enabled: true` is added automatically. Most useful entries: + - `{ browser: { show: true } }` — visible browser + - `{ browser: { browser: "firefox", windowSize: "1280x720" } }` — switch browser + viewport + - `{ aiTrace: { enabled: false } }` — disable per-step trace overhead on a re-run + - `{ pause: { on: "fail" } }` / `{ screenshot: { on: "step" } }` — any other plugin works the same way **Returns:** ```json { - "count": 5, - "tests": [ - { - "file": "/full/path/to/test/file.js", - "relativePath": "tests/example_test.js" - } - ] + "status": "Session started — run_code and snapshot are now available", + "plugins": { "browser": { "show": false } } } ``` -**Example:** +### `stop_browser` + +Closes the browser handles, drops the synthetic test scope, but **keeps the container, codecept, and Mocha alive**. Subsequent `start_browser` reopens the browser without rebuilding everything — important because ESM-loaded test files don't re-execute their top-level `Scenario(...)` on reload, so a fresh Mocha would have no suites. + +**Parameters:** none + +**Returns:** +```json +{ "status": "Browser stopped — Mocha and config preserved; call start_browser to reopen" } +``` + +### `cancel` + +Aborts the currently paused or in-progress test run **without closing the browser**. Use when you want to bail out of a paused test and start something else. Mocha + container stay alive; the next `run_test` / `run_step_by_step` works immediately. + +**Parameters:** none + +**Returns:** +```json +{ "status": "Run cancelled — browser kept open" } +``` + +### `list_tests` + +Lists all tests resolved from the project's `tests:` glob. + +**Parameters:** none + +**Returns:** ```json { - "name": "list_tests", - "arguments": { - "config": "/path/to/codecept.conf.js" - } + "count": 5, + "tests": [ + { "file": "/abs/path/to/work_orders_test.js", "relativePath": "work_orders_test.js" } + ] } ``` -### list_actions +### `list_actions` -List all available CodeceptJS actions (I.* methods) from enabled helpers and support objects. +Lists every `I.*` method from enabled helpers and support objects. -**Parameters:** -- `config` (optional): Path to codecept.conf.js +**Parameters:** none **Returns:** ```json { "count": 120, "actions": [ - { - "helper": "Playwright", - "action": "amOnPage", - "signature": "I.amOnPage(url)" - }, - { - "helper": "Playwright", - "action": "click", - "signature": "I.click(locator, context)" - } + { "helper": "Playwright", "action": "amOnPage", "signature": "I.amOnPage(url)" }, + { "helper": "SupportObject", "action": "loginAsAdmin", "signature": "I.loginAsAdmin()" } ] } ``` -### run_code +### `run_code` -Run arbitrary CodeceptJS code. The tool captures the value the code returns, every `I.*` step it runs, anything written to `console.log` / `console.info` / `console.warn` / `console.error` / `console.debug`, plus a final-state snapshot of the page. +Run arbitrary JavaScript inside the live test scope. Captures steps, console output, return value, and a final-state snapshot. **Parameters:** -- `code` (required): CodeceptJS code to execute. May `return` a value and use `console.*` for debugging. -- `timeout` (optional): Timeout in milliseconds (default: 60000) -- `config` (optional): Path to codecept.conf.js -- `saveArtifacts` (optional): Save final-state artifacts to disk (default: true) +- `code` (string, required) — JS source. Use `await` on `I.*` calls. +- `timeout` (number, optional) — ms (default `60000`). +- `saveArtifacts` (boolean, optional) — capture final-state artifacts (default `true`). +- `settleMs` (number, optional) — wait this many ms after the code finishes before capturing artifacts (default `300`). Bump to `1000`+ for slow re-renders, `0` to skip. + +**Scope (everything reachable as a bare identifier in `code`):** + +| Symbol | Source | +|--------|--------| +| `I` | The actor (with all helper methods) | +| Custom support objects | `include:` in `codecept.conf.js` (e.g. page objects, `login` from `auth` plugin) | +| `locate`, `within`, `session`, `secret`, `inject`, `pause`, `share` | from `codeceptjs` | +| `tryTo`, `retryTo`, `hopeThat` | from `codeceptjs/effects` | +| `step` | from `codeceptjs/steps` | +| `element`, `eachElement`, `expectElement`, `expectAnyElement`, `expectAllElements` | from `codeceptjs/els` | +| `container` | the DI container | +| `helpers` | live helpers map (e.g. `helpers.Playwright.page` for raw Playwright access) | + +The full live list is returned in every response under `availableObjects`. + +**Return-value handling:** +- An explicit `return X` is JSON-stringified (with circular-ref handling). Capped at 20 KB. +- If you forget `return`, the last grabbed step value is returned automatically (`await I.grabTitle()` on the last line works). +- A returned `WebElement` (or array of them, from `I.grabWebElement(s)`) is auto-described to a plain object: `{ text, html, visible, enabled, attrs }`. **Returns:** ```json @@ -160,14 +188,10 @@ Run arbitrary CodeceptJS code. The tool captures the value the code returns, eve "status": "success", "output": "Code executed successfully", "error": null, - "commands": [ - "I.amOnPage(\"/\")", - "I.grabTextFrom(\"h1\")" - ], - "logs": [ - { "level": "log", "message": "headline Welcome", "t": 47 } - ], + "commands": ["I am on page \"/\"", "I grab text from \"h1\""], + "logs": [{ "level": "log", "message": "headline Welcome", "t": 47 }], "returnValue": "{\n \"url\": \"http://localhost:8000/\",\n \"text\": \"Welcome\"\n}", + "availableObjects": ["I", "container", "eachElement", "element", "expectAllElements", "expectAnyElement", "expectElement", "helpers", "hopeThat", "inject", "locate", "login", "pause", "retryTo", "secret", "session", "share", "step", "tryTo", "within"], "artifacts": { "url": "http://localhost:8000/", "html": "file:///output/trace_run_code_.../mcp_page.html", @@ -178,35 +202,33 @@ Run arbitrary CodeceptJS code. The tool captures the value the code returns, eve "cookieCount": 3, "localStorageCount": 5 }, + "ariaDiff": "...", "dir": "/output/trace_run_code_...", "traceFile": "file:///output/trace_run_code_.../trace.md" } ``` -**Notes:** -- `returnValue` is the value the code's last `return` statement produced, JSON-stringified with circular-ref handling. Capped at 20 KB; `returnValueTruncated: true` is set if it was cut. -- `logs` is an in-order list of console output captured during execution. Each entry has `{ level, message, t }` where `t` is ms since the code started. Capped at 100 entries × 2 KB per message; `logsTruncated: true` is set if hit. `console.*` writes do not pollute MCP stdio — they're captured in-memory only. -- `commands` is the list of `I.*` calls observed during execution (via the recorder). -- `artifacts.storage` is omitted when both cookies and localStorage are empty. +- `traceFile` — markdown summary of this call. `Read` it for full context. +- `ariaDiff` — present when the call mutated the page; diff between the previous aiTrace ARIA snapshot and the new one. +- `aiTraceHint` — appears when aiTrace is disabled, suggesting how to re-enable it. **Example:** ```json { "name": "run_code", "arguments": { - "code": "await I.amOnPage('/'); const t = await I.grabTextFrom('h1'); console.log('headline', t); return { url: await I.grabCurrentUrl(), text: t };", - "timeout": 30000 + "code": "await I.amOnPage('/'); const t = await I.grabTextFrom('h1'); return { url: await I.grabCurrentUrl(), text: t };" } } ``` -### snapshot +### `snapshot` -Capture the current state of the browser without performing any action. Useful for inspecting what's on the page right now (URL, cookies, localStorage, formatted HTML, ARIA, screenshot, browser console logs) when reasoning between actions. +Capture the current browser state without performing any action. **Parameters:** -- `config` (optional): Path to codecept.conf.js -- `fullPage` (optional): Take a full-page screenshot (default: false) +- `fullPage` (boolean, optional) — full-page screenshot (default `false`). +- `settleMs` (number, optional) — wait before capture (default `300`). **Returns:** ```json @@ -227,265 +249,146 @@ Capture the current state of the browser without performing any action. Useful f } ``` -**Example:** -```json -{ - "name": "snapshot", - "arguments": { "fullPage": true } -} -``` - -### continue +### `run_test` -Release a paused test (one that called `pause()` during `run_test`) and let it run to completion. Returns the final reporter result. +Run a specific scenario. Returns reporter JSON with one entry per scenario; each entry has a `traceFile` (file:// URL) pointing to the per-scenario aiTrace markdown — `Read` it on failures to see the failing step's DOM/ARIA/screenshot. -To inspect or manipulate state while the test is paused, use [`run_code`](#run_code) — it operates on the same container the test is using. +If the test calls `pause()` — or if `pauseAt` matches a step — returns early with `status: "paused"` so the agent can inspect via `run_code` and release with `continue` (or abort with `cancel`). **Parameters:** -- `timeout` (optional): ms to wait for the test to finish after continuing (default 60000). - -**Returns:** -```json -{ - "status": "completed", - "reporterJson": { "stats": { "tests": 1, "passes": 1, "failures": 0 }, "tests": [...] }, - "error": null -} -``` - -**Example flow:** - -```json -{ "name": "run_test", "arguments": { "test": "checkout_test" } } -// → { "status": "paused", "file": "...", "note": "..." } - -{ "name": "run_code", "arguments": { "code": "return await I.grabCurrentUrl()" } } -// → { "status": "success", "returnValue": "http://...", "artifacts": { ... } } - -{ "name": "run_code", "arguments": { "code": "await I.click('Save')" } } -// → { "status": "success", "artifacts": { ... } } - -{ "name": "continue", "arguments": {} } -// → { "status": "completed", "reporterJson": { ... } } -``` - -**Notes:** -- Pause runs in-process: `run_code` and the test share the same `I` / browser. There's no subprocess, no IPC. -- `run_test` and `continue` wrap test execution in the same `withSilencedIO` helper that `run_step_by_step` uses, so step output doesn't interleave with the MCP JSON-RPC stream. Stdout/stderr are restored before each tool call returns. -- TTY behaviour (`npx codeceptjs run --debug` at a terminal) is unchanged — `pause()` opens the readline REPL whenever `process.stdin.isTTY` is true. - -### run_test - -Run a specific test by name or file path. Runs in-process so it shares the same `I` / browser as `run_code` and `snapshot`. If the test calls `pause()` — or if `pauseAt` is set and the Nth step completes — this tool returns early and the agent drives the session through `run_code` and `continue`. - -**Parameters:** -- `test` (required): Test name or file path -- `timeout` (optional): Timeout in milliseconds (default: 60000) -- `config` (optional): Path to codecept.conf.js -- `pauseAt` (optional): 1-based step index. The test pauses after the Nth step completes. Use this as a programmatic breakpoint without editing the test. Discover step indices via the `list` CLI (`--steps`) or via `run_step_by_step`. - -**Returns (test completed normally):** +- `test` (string, required) — file path or partial test name; resolved to a single test file. +- `timeout` (number, optional) — overall ms (default `60000`). +- `grep` (string, optional) — filter scenarios by title; passed to `mocha.grep`. Mirrors `--grep` on the CLI. +- `pauseAt` (number | string, optional) — programmatic breakpoint. Either: + - `number` — 1-based step index (test pauses after the Nth step completes) + - `string` — case-insensitive substring match against step name + - `"/regex/i"` — regex literal (the `/.../i` form is honored verbatim) + +**Returns (completed normally):** ```json { "status": "completed", "file": "/path/to/test.js", - "reporterJson": { "stats": { "tests": 1, "passes": 1, "failures": 0 }, "tests": [...] }, + "reporterJson": { + "stats": { "tests": 1, "passes": 1, "failures": 0 }, + "tests": [ + { + "title": "lists materials", + "file": "/path/to/materials_test.js", + "status": "passed", + "duration": 4123, + "traceFile": "file:///output/trace_materials__lists_materials_xxxx/trace.md" + } + ] + }, "error": null } ``` -**Returns (test reached `pause()` or `pauseAt`):** +**Returns (paused):** ```json { "status": "paused", "file": "/path/to/test.js", - "pausedAfter": { "index": 3, "name": "I.click(\"Save\")", "status": "passed" }, - "page": { "url": "https://example.com/checkout", "title": "Checkout", "contentSize": 18432 }, + "pausedAfter": { "index": 7, "name": "I select option {\"css\":\"main select\"}, \"Flux\"", "status": "success" }, + "page": { "url": "https://app.example.com/materials", "title": "Materials", "contentSize": 18432 }, "suggestions": [ "Call snapshot to capture URL/HTML/ARIA/screenshot/console/storage at this point", "Call run_code to inspect or manipulate state (e.g. return await I.grabText(\"h1\"))", - "Call continue to release the pause and let the test finish" + "Call continue to release the pause and let the test run the next step (or finish)" ] } ``` -**Features:** -- Automatically resolves test names to file paths -- Supports partial test name matching -- Runs in-process; results assembled from CodeceptJS test events -- Yields on `pause()` (or `pauseAt`) so the agent can inspect via `run_code` and release with `continue` - -**Example:** +**Examples:** ```json -{ - "name": "run_test", - "arguments": { - "test": "basic_navigation_test", - "timeout": 60000 - } -} +{ "name": "run_test", "arguments": { "test": "checkout_test", "pauseAt": 5 } } +{ "name": "run_test", "arguments": { "test": "checkout_test", "pauseAt": "fill field" } } +{ "name": "run_test", "arguments": { "test": "checkout_test", "pauseAt": "/grab.*url/i" } } ``` -### run_step_by_step +### `run_step_by_step` -Run a test interactively, pausing after every step. Returns a paused payload after the first step completes — the agent then calls `continue` to advance one step at a time, or `run_code` / `snapshot` to inspect state at any pause. +Run a test interactively, pausing after every step. The agent advances with `continue` or inspects with `run_code` / `snapshot`. **Parameters:** -- `test` (required): Test name or file path -- `timeout` (optional): per-call timeout in milliseconds (default: 60000) -- `config` (optional): Path to codecept.conf.js +- `test` (string, required) +- `timeout` (number, optional) +- `grep` (string, optional) +- `plugins` (object, optional) — same as `start_browser`. Most useful is `{ browser: { show: true } }` so you can watch the run between pauses. **Returns (after each step):** ```json { "status": "paused", "file": "/path/to/test.js", - "pausedAfter": { "index": 1, "name": "I.amOnPage(\"/\")", "status": "passed" }, + "pausedAfter": { "index": 1, "name": "I am on page \"/\"", "status": "success" }, "page": { "url": "http://localhost:8000/", "title": "Test App", "contentSize": 1832 }, - "suggestions": [ - "Call snapshot to capture URL/HTML/ARIA/screenshot/console/storage at this point", - "Call run_code to inspect or manipulate state ...", - "Call continue to release the pause and let the test run the next step (or finish)" - ] + "suggestions": [...] } ``` -**Returns (after the last step):** -```json -{ "status": "completed", "file": "...", "reporterJson": { "stats": {...}, "tests": [...] } } -``` - -**Flow:** -```json -{ "name": "run_step_by_step", "arguments": { "test": "checkout_test" } } -// → { "status": "paused", "pausedAfter": { "index": 1, ... } } - -{ "name": "snapshot", "arguments": {} } -// → full artifact bundle for step 1 - -{ "name": "continue", "arguments": {} } -// → { "status": "paused", "pausedAfter": { "index": 2, ... } } - -{ "name": "continue", "arguments": {} } -// → ... and so on, until { "status": "completed", "reporterJson": {...} } -``` - -For a one-shot breakpoint (pause once at a specific step rather than every step), use `run_test` with `pauseAt: N` instead. - -For per-step trace artifacts written to disk (HTML / ARIA / screenshot / console / storage per step) without the interactive flow, enable the `aiTrace` plugin. +**Returns (after the last step):** same shape as `run_test`'s completed response — every scenario carries its `traceFile`. -### start_browser +### `continue` -Start the browser session (initializes CodeceptJS container). +Release a paused test. The test runs until the next pause (`run_step_by_step`), the next `pause()` call, or completion. **Parameters:** -- `config` (optional): Path to codecept.conf.js +- `timeout` (number, optional) — ms to wait for the next pause / completion (default `60000`). -**Returns:** -```json -{ - "status": "Browser started successfully" -} -``` - -**Note:** Browser is automatically started on first code execution. This tool is useful for pre-initialization. +**Returns (re-paused):** same shape as `run_test`'s paused response, with the new `pausedAfter` index. -### stop_browser +**Returns (completed):** same shape as `run_test`'s completed response. -Stop the browser session and cleanup resources. +## Pause-and-poke flow -**Parameters:** -- None - -**Returns:** ```json -{ - "status": "Browser stopped successfully" -} -``` - -**Note:** Useful for releasing resources between long-running sessions. - -## Testing - -### Run MCP Server Tests - -The MCP server includes a comprehensive test suite: - -```bash -node test/mcp/mcp_server_test.js -``` - -Tests cover: -- Tool listing and schema validation -- Test enumeration -- Action listing -- Code execution with artifacts -- Test execution (run_test) -- Step-by-step execution -- Browser lifecycle -- Error handling - -### Run Demo Tests with MCP - -**Important: Start the test web server first!** - -The MCP test scenarios require a web server running on port 8000. Start it in a separate terminal: - -```bash -# Option 1: Using http-server (recommended) -cd test/mcp -npx http-server -p 8000 - -# Option 2: Using Python -cd test/mcp -python -m http.server 8000 +{ "name": "run_step_by_step", "arguments": { "test": "checkout_test" } } +// → { "status": "paused", "pausedAfter": { "index": 1, ... } } -# Option 3: Using PHP -cd test/mcp -php -S localhost:8000 -``` +{ "name": "snapshot", "arguments": {} } +// → full artifact bundle for step 1 -The server will start at http://127.0.0.1:8000 +{ "name": "run_code", "arguments": { "code": "return await I.grabCurrentUrl()" } } +// → { "status": "success", "returnValue": "http://...", "artifacts": { ... } } -**Keep this terminal open** while running tests through MCP/Claude. +{ "name": "run_code", "arguments": { "code": "await I.click('Save')" } } +// → { "status": "success", ... } — actually mutates the live page -Once the server is running, you can use Claude to run tests: +{ "name": "continue", "arguments": {} } +// → { "status": "paused", "pausedAfter": { "index": 2, ... } } +// ... or bail out: +{ "name": "cancel", "arguments": {} } +// → { "status": "Run cancelled — browser kept open" } ``` -"List all tests" -"Run basic navigation test" -"Run form interaction test step by step" -``` - -**Note:** If tests fail with ERR_CONNECTION_REFUSED, make sure the web server is running on port 8000. -## Trace Files for AI Debugging +Notes: +- Pause runs in-process: `run_code` and the test share the same `I` / browser. There's no subprocess, no IPC. +- `run_test` / `run_step_by_step` / `continue` silence stdout/stderr while running so step output doesn't interleave with the MCP JSON-RPC stream. +- TTY behaviour is unchanged — `npx codeceptjs run --debug` at a terminal still opens the readline REPL when `process.stdin.isTTY` is true. The MCP server only intercepts pause when its handler is registered. -When using `run_step_by_step`, the server generates trace files that provide rich context for AI agents: +## Trace files (aiTrace) -### Trace File Structure +When `aiTrace` is on (the default for MCP sessions), every step in a scenario produces: ``` output/ -└── trace_Test_Name_abc123/ - ├── 0000__screenshot.png # Screenshot after step 0 - ├── 0000__page.html # Formatted HTML (minified -> trash classes/scripts/styles stripped -> beautified) - ├── 0000__aria.txt # ARIA snapshot after step 0 (Playwright only) - ├── 0000__console.json # Browser console logs (normalized to {type, text}) +└── trace_Materials__lists_materials_/ ├── 0001__screenshot.png - ├── 0001__page.html - ├── 0001__aria.txt + ├── 0001__page.html # minified → trash classes/scripts/styles stripped → beautified + ├── 0001__aria.txt # Playwright only ├── 0001__console.json - ├── final_storage.json # Cookies + localStorage at test end (run_step_by_step fallback) - └── trace.md # AI-friendly summary with links to all of the above + ├── 0002_... + └── trace.md # AI-friendly markdown index ``` -For ad-hoc `run_code` and `snapshot()` runs, only a single set of artifacts is produced (`mcp_*` / `snapshot_*` prefix), since there are no per-step iterations. +`run_test` / `run_step_by_step` results expose the `trace.md` URL per scenario (`reporterJson.tests[].traceFile`) — `Read` it on failure to see exactly what the failing step saw. -### Using Trace Files with AI +For ad-hoc `run_code` / `snapshot` runs, only a single set of artifacts is produced (`mcp_*` / `snapshot_*` prefix), packaged with their own `trace.md`. -The `trace.md` file provides structured information perfect for AI analysis: +### `trace.md` shape ```markdown # Test: Login functionality @@ -495,11 +398,10 @@ The `trace.md` file provides structured information perfect for AI analysis: ## Steps -1. **I.amOnPage("/login")** - passed (150ms) -2. **I.fillField("#username", "user")** - passed (80ms) -3. **I.fillField("#password", "pass")** - passed (75ms) -4. **I.click("#login")** - passed (100ms) -5. **I.see("Welcome")** - failed (50ms) +1. **I.amOnPage("/login")** — passed (150ms) +2. **I.fillField("#username", "user")** — passed (80ms) +3. **I.click("#login")** — passed (100ms) +4. **I.see("Welcome")** — failed (50ms) ## Error @@ -507,148 +409,76 @@ Element "Welcome" not found ## Artifacts -- Screenshot: 0005_screenshot.png -- HTML: 0005_page.html -- ARIA: 0005_aria.txt +- Screenshot: 0004_screenshot.png +- HTML: 0004_page.html +- ARIA: 0004_aria.txt ``` -AI agents can use these artifacts to: -- Visualize what the test saw at each step -- Analyze page structure via ARIA -- Debug issues using HTML snapshots -- Identify errors from console logs - -## HTML Formatting +## HTML formatting -Every HTML snapshot saved by the MCP server (and the aiTrace / pageInfo plugins, since they share the same `captureSnapshot` funnel in `lib/utils/captureSnapshot.js`) is processed through a three-stage pipeline before being written to disk: +Every HTML snapshot saved by the MCP server (and the `aiTrace` / `pageInfo` plugins, since they all funnel through `captureSnapshot` in `lib/utils/trace.js`) goes through: -1. **Minify** (via `html-minifier-terser`) — strips comments, collapses whitespace, removes redundant attributes. -2. **Clean** — drops `