From ba8f85c0a1205126c281a921074d1bfd6a3eae97 Mon Sep 17 00:00:00 2001 From: jdalton Date: Sun, 26 Apr 2026 22:38:59 -0400 Subject: [PATCH 01/16] chore: env allowlist + path-guard + token-guard + hooks .mts + bootstrap + cascade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidated PR — combines the original work from #1279, #1280, #1281 plus follow-up commits (private-name rule, socket-registry pin cascades) into a single squashed commit. Includes: - env allowlist + .cache/ + CLAUDE.md hygiene (drop NODE_COMPILE_CACHE convention; restore .cache/** exclude in tsconfigs; propagate CLAUDE.md sorting/open-PR/paths/inclusive-language/Set-sort/ don't-revert-untouched/private-name rules; replace whitelist/blacklist with allowlist/denylist) - path-guard infra (PreToolUse hook + scripts/check-paths.mts gate + .github/paths-allowlist.yml + /path-guard skill — enforces "1 path, 1 reference" so multi-stage build paths are constructed exactly once) - token-guard hook (renamed from token-hygiene; word-boundary match for sensitive env names; ALWAYS_DANGEROUS gates on hasRedaction so redacted env dumps pass) - .sh -> .mts hook conversion on Node 25+ (stable type stripping; _helpers.mts hard-fails at module load if Node < 25; husky shims invoke node directly; SOCKET_CLI_NO_API_TOKEN=1 for pre-commit tests) - internal hook package rename (drop @socketsecurity/ scope from hook-path-guard, hook-token-guard, hook-check-new-deps; private, never published) - xport lock-step manifest (scripts/xport.mts + scripts/xport-schema.mts + scripts/xport-emit-schema.mts + xport.schema.json) - bootstrap-from-registry (scripts/bootstrap-from-registry.mts downloads zero-dep Socket packages from npm registry into node_modules/ via preinstall hook, solving fresh-clone chicken-and-egg) - socket-registry pins cascaded to ceab1e26 (picks up the @socketsecurity/lib bootstrap move from the install action into setup, so consumers calling only setup also benefit) --- .claude/agents/security-reviewer.md | 4 +- .claude/hooks/check-new-deps/package.json | 2 +- .claude/hooks/path-guard/README.md | 66 ++ .claude/hooks/path-guard/index.mts | 339 ++++++ .claude/hooks/path-guard/package.json | 12 + .claude/hooks/path-guard/segments.mts | 80 ++ .../hooks/path-guard/test/path-guard.test.mts | 378 +++++++ .claude/hooks/path-guard/tsconfig.json | 15 + .claude/hooks/token-guard/README.md | 57 + .claude/hooks/token-guard/index.mts | 261 +++++ .claude/hooks/token-guard/package.json | 12 + .../token-guard/test/token-guard.test.mts | 225 ++++ .claude/hooks/token-guard/tsconfig.json | 15 + .claude/settings.json | 8 +- .claude/skills/_shared/path-guard-rule.md | 39 + .claude/skills/path-guard/SKILL.md | 248 +++++ .../path-guard/reference/check-paths.mts.tmpl | 946 +++++++++++++++++ .../path-guard/reference/claude-md-rule.md | 29 + .../reference/paths-allowlist.yml.tmpl | 28 + .claude/skills/security-scan/SKILL.md | 1 + .config/tsconfig.check.json | 2 + .env.example | 1 - .env.precommit | 1 - .git-hooks/_api-key-check.sh | 51 + .git-hooks/_helpers.mts | 304 ++++++ .git-hooks/_helpers.sh | 43 - .git-hooks/commit-msg | 90 -- .git-hooks/commit-msg.mts | 111 ++ .git-hooks/pre-commit.mts | 186 ++++ .git-hooks/pre-push | 200 ---- .git-hooks/pre-push.mts | 324 ++++++ .github/paths-allowlist.yml | 30 + .github/workflows/ci.yml | 8 +- .github/workflows/provenance.yml | 6 +- .github/workflows/weekly-update.yml | 8 +- .gitignore | 3 + .husky/commit-msg | 7 +- .husky/pre-commit | 32 +- .husky/pre-push | 2 +- CLAUDE.md | 72 +- package.json | 2 + packages/cli/.config/tsconfig.check.json | 1 + packages/cli/.env.test | 1 - .../utils/validation/check-input.test.mts | 2 +- .../babel/babel-plugin-inline-process-env.mts | 6 +- scripts/bootstrap-from-registry.mts | 199 ++++ scripts/check-paths.mts | 946 +++++++++++++++++ scripts/check.mts | 39 + scripts/xport-emit-schema.mts | 37 + scripts/xport-schema.mts | 355 +++++++ scripts/xport.mts | 989 ++++++++++++++++++ tsconfig.json | 4 +- xport.schema.json | 466 +++++++++ 53 files changed, 6921 insertions(+), 372 deletions(-) create mode 100644 .claude/hooks/path-guard/README.md create mode 100644 .claude/hooks/path-guard/index.mts create mode 100644 .claude/hooks/path-guard/package.json create mode 100644 .claude/hooks/path-guard/segments.mts create mode 100644 .claude/hooks/path-guard/test/path-guard.test.mts create mode 100644 .claude/hooks/path-guard/tsconfig.json create mode 100644 .claude/hooks/token-guard/README.md create mode 100644 .claude/hooks/token-guard/index.mts create mode 100644 .claude/hooks/token-guard/package.json create mode 100644 .claude/hooks/token-guard/test/token-guard.test.mts create mode 100644 .claude/hooks/token-guard/tsconfig.json create mode 100644 .claude/skills/_shared/path-guard-rule.md create mode 100644 .claude/skills/path-guard/SKILL.md create mode 100644 .claude/skills/path-guard/reference/check-paths.mts.tmpl create mode 100644 .claude/skills/path-guard/reference/claude-md-rule.md create mode 100644 .claude/skills/path-guard/reference/paths-allowlist.yml.tmpl create mode 100755 .git-hooks/_api-key-check.sh create mode 100644 .git-hooks/_helpers.mts delete mode 100644 .git-hooks/_helpers.sh delete mode 100755 .git-hooks/commit-msg create mode 100644 .git-hooks/commit-msg.mts create mode 100644 .git-hooks/pre-commit.mts delete mode 100755 .git-hooks/pre-push create mode 100644 .git-hooks/pre-push.mts create mode 100644 .github/paths-allowlist.yml create mode 100644 scripts/bootstrap-from-registry.mts create mode 100644 scripts/check-paths.mts create mode 100644 scripts/xport-emit-schema.mts create mode 100644 scripts/xport-schema.mts create mode 100644 scripts/xport.mts create mode 100644 xport.schema.json diff --git a/.claude/agents/security-reviewer.md b/.claude/agents/security-reviewer.md index a56250453..6ae108892 100644 --- a/.claude/agents/security-reviewer.md +++ b/.claude/agents/security-reviewer.md @@ -4,7 +4,7 @@ Apply these rules from CLAUDE.md exactly: **Safe File Operations**: Use safeDelete()/safeDeleteSync() from @socketsecurity/lib/fs. NEVER fs.rm(), fs.rmSync(), or rm -rf. Use os.tmpdir() + fs.mkdtemp() for temp dirs. NEVER use fetch() — use httpJson/httpText/httpRequest from @socketsecurity/lib/http-request. -**Absolute Rules**: NEVER use npx, pnpm dlx, or yarn dlx. Use pnpm exec or pnpm run with pinned devDeps. +**Absolute Rules**: NEVER use npx, pnpm dlx, or yarn dlx. Use pnpm exec or pnpm run with pinned devDeps. # zizmor: documentation-prohibition **Work Safeguards**: Scripts modifying multiple files must have backup/rollback. Git operations that rewrite history require explicit confirmation. @@ -12,7 +12,7 @@ Apply these rules from CLAUDE.md exactly: 1. **Secrets**: Hardcoded API keys, passwords, tokens, private keys in code or config 2. **Injection**: Command injection via shell: true or string interpolation in spawn/exec. Path traversal in file operations. -3. **Dependencies**: npx/dlx usage. Unpinned versions (^ or ~). Missing minimumReleaseAge bypass justification. +3. **Dependencies**: npx/dlx usage. Unpinned versions (^ or ~). Missing minimumReleaseAge bypass justification. # zizmor: documentation-checklist 4. **File operations**: fs.rm without safeDelete. process.chdir usage. fetch() usage (must use lib's httpRequest). 5. **GitHub Actions**: Unpinned action versions (must use full SHA). Secrets outside env blocks. Template injection from untrusted inputs. 6. **Error handling**: Sensitive data in error messages. Stack traces exposed to users. diff --git a/.claude/hooks/check-new-deps/package.json b/.claude/hooks/check-new-deps/package.json index 96d04649b..11f3f9a79 100644 --- a/.claude/hooks/check-new-deps/package.json +++ b/.claude/hooks/check-new-deps/package.json @@ -1,5 +1,5 @@ { - "name": "@socketsecurity/hook-check-new-deps", + "name": "hook-check-new-deps", "private": true, "type": "module", "main": "./index.mts", diff --git a/.claude/hooks/path-guard/README.md b/.claude/hooks/path-guard/README.md new file mode 100644 index 000000000..523a31b45 --- /dev/null +++ b/.claude/hooks/path-guard/README.md @@ -0,0 +1,66 @@ +# path-guard + +Claude Code `PreToolUse` hook that refuses `Edit`/`Write` tool calls that would *construct* a multi-segment build/output path inline in a `.mts` or `.cts` file. Mandatory across the Socket fleet — every repo ships this file byte-for-byte via `scripts/sync-scaffolding.mjs`. + +**Mantra: 1 path, 1 reference.** + +Construct a path *once* in the canonical `paths.mts` (or a build-infra helper); reference the computed value everywhere else. + +## What it blocks + +| Rule | Example | Fix | +|------|---------|-----| +| **A** — Multi-stage path constructed inline | `path.join(PKG, 'build', mode, 'out', 'Final', name)` | Construct in the package's `scripts/paths.mts` (or use `getFinalBinaryPath` from `build-infra/lib/paths`); import the computed value here | +| **B** — Cross-package path traversal | `path.join(PKG, '..', 'lief-builder', 'build', ...)` | Add `lief-builder: workspace:*` as a dep; import its `paths.mts` via the workspace `exports` field | + +The hook fires on `Edit` and `Write` tool calls when the target path ends in `.mts` or `.cts`. Other extensions (`.ts`, `.mjs`, `.js`, `.yml`, `.json`, `.md`) pass through — TS path code lives in `.mts` per CLAUDE.md, and other file types are covered by the `scripts/check-paths.mts` gate at commit time. + +## What it allows + +- Edits to a `paths.mts` (canonical constructor — every package's source of truth). +- Edits to `scripts/check-paths.mts` (the gate, which legitimately enumerates patterns). +- Edits to this hook's own files (the test suite has to enumerate the same patterns). +- Edits to `scripts/check-consistency.mts` (existing path-scanning gate). +- `path.join` calls with a single stage segment (e.g. `path.join(packageRoot, 'build', 'temp')`) — that's a one-off helper path, not a multi-stage build output. +- `path.join` calls with no stage segments at all (most general-purpose joins). +- Any string concatenation that doesn't go through `path.join` — the hook is regex-based and intentionally narrow; the gate runs a deeper scan at commit time. + +## Stage segments the hook recognizes + +These come from `build-infra/lib/constants.mts` `BUILD_STAGES` plus the lowercase directory-name siblings used by some builders: + +`Final`, `Release`, `Stripped`, `Compressed`, `Optimized`, `Synced`, `wasm`, `downloaded` + +Two or more in the same `path.join` call (or one stage + one of `'build'`/`'out'` + one mode `'dev'`/`'prod'`) triggers Rule A. + +## Known sibling packages (for Rule B) + +The hook recognizes Rule B traversals only when the next segment after `..` is a known fleet package name: + +`binflate`, `binject`, `binpress`, `bin-infra`, `build-infra`, `codet5-models-builder`, `curl-builder`, `iocraft-builder`, `ink-builder`, `libpq-builder`, `lief-builder`, `minilm-builder`, `models`, `napi-go`, `node-smol-builder`, `onnxruntime-builder`, `opentui-builder`, `stubs-builder`, `ultraviolet-builder`, `yoga-layout-builder` + +When a new package joins the workspace, add it here. + +## Control flow + +The hook reads the tool-use payload from stdin, type-checks `tool_name === 'Edit'` or `'Write'`, filters to `.mts`/`.cts` files, and runs `check(source)`. Any rule violation `throw`s a typed `BlockError`; a single top-level `try/catch` in `main()` writes the block message to stderr and sets `process.exitCode = 2`. + +Hook bugs fail **open** — a crash in the hook writes a log line and returns exit 0 so legitimate work isn't blocked on a bad deploy. The companion `scripts/check-paths.mts` gate runs a thorough whole-repo scan at `pnpm check` time, catching anything the hook misses. + +## Testing + +```bash +pnpm --filter hook-path-guard test +``` + +Adding a new detection pattern: update `STAGE_SEGMENTS` (or `KNOWN_SIBLING_PACKAGES`) in `index.mts`, add a positive and negative test in `test/path-guard.test.mts`. + +## Updating across the fleet + +This file is in `IDENTICAL_FILES` in `scripts/sync-scaffolding.mjs` (in `socket-repo-template`). After editing, run from `socket-repo-template`: + +```bash +node scripts/sync-scaffolding.mjs --all --fix +``` + +to propagate the change to every fleet repo. diff --git a/.claude/hooks/path-guard/index.mts b/.claude/hooks/path-guard/index.mts new file mode 100644 index 000000000..ced9fcfc1 --- /dev/null +++ b/.claude/hooks/path-guard/index.mts @@ -0,0 +1,339 @@ +#!/usr/bin/env node +// Claude Code PreToolUse hook — path-guard firewall. +// +// Mantra: 1 path, 1 reference. +// +// Blocks Edit/Write tool calls that would *construct* a multi-segment +// build/output path inline in a `.mts` or `.cts` file, instead of +// importing the constructed value from the canonical `paths.mts` (or a +// build-infra helper). This fires BEFORE the write lands; exit code 2 +// makes Claude Code refuse the tool call so the diff never touches the +// repo. The model sees the rejection reason on stderr and retries with +// an import-based approach. +// +// What the hook checks (subset of the gate's rules — diff-local only): +// +// Rule A — Multi-stage path construction: a `path.join(...)` call or +// string-template that stitches together two or more "stage" segments +// like `'Final'`, `'Release'`, `'Stripped'`, `'Compressed'`, +// `'Optimized'`, `'Synced'`, `'wasm'`, `'downloaded'` together with +// `'build'` / `'out'` / a mode (`'dev'`/`'prod'`) or platform-arch. +// Outside a `paths.mts` file, this is always a violation: the +// construction belongs in a helper, every consumer imports the +// computed value. +// +// Rule B — Cross-package traversal: `path.join(*, '..', '', 'build', ...)` reaches into a sibling's build output +// without going through its `exports`. Forces consumers to declare a +// workspace dep and import the sibling's `paths.mts`. The R28 yoga/ +// ink bug — ink hand-building yoga's wasm path and missing the +// `wasm/` segment — is exactly the failure mode this prevents. +// +// What the hook does NOT check (the gate handles repo-wide concerns): +// +// Rule C — workflow YAML repetition (gate scans .yml files). +// Rule D — comment-encoded paths (gate scans comments + JSDoc). +// Rule F — same path reconstructed in multiple files (needs whole- +// repo state). +// Rule G — Makefile / Dockerfile / shell-script paths (different +// tool, gate covers). +// +// Scope: +// +// - Fires only on `Edit` and `Write` tool calls. +// - Skips files NOT ending in `.mts` or `.cts`. TS path code lives +// there; .ts/.mjs/.js sources in `additions/` have different +// constraints per CLAUDE.md. +// - Skips when the target itself is a `paths.mts` (canonical +// constructor), the gate (`scripts/check-paths.mts`), or this hook +// — those files legitimately enumerate stage segments. +// +// Control flow uses a `BlockError` thrown from check helpers so every +// short-circuit path goes through a single `process.exitCode = 2` drop +// at the top-level catch — no scattered `process.exit(2)` that can race +// with buffered stderr. The hook fails OPEN on its own bugs (exit 0 + +// log) so a bad deploy of the hook can't brick the session. + +import process from 'node:process' + +import { + BUILD_ROOT_SEGMENTS, + KNOWN_SIBLING_PACKAGES, + MODE_SEGMENTS, + STAGE_SEGMENTS, +} from './segments.mts' + +// File-path patterns that are exempt from the hook entirely. Edits to +// these files legitimately need to enumerate path segments. +const EXEMPT_FILE_PATTERNS: RegExp[] = [ + // Any paths.mts is the canonical constructor. + /(^|\/)paths\.(mts|cts)$/, + // The gate itself and this hook — both enumerate the patterns to + // detect them. + /scripts\/check-paths\.mts$/, + /\.claude\/hooks\/path-guard\/index\.(mts|cts)$/, + /\.claude\/hooks\/path-guard\/test\//, + // Existing path-scanning gates that intentionally enumerate. + /scripts\/check-consistency\.mts$/, +] + +class BlockError extends Error { + public readonly rule: string + public readonly suggestion: string + public readonly snippet: string + constructor(rule: string, suggestion: string, snippet: string) { + super(rule) + this.name = 'BlockError' + this.rule = rule + this.suggestion = suggestion + this.snippet = snippet.slice(0, 240) + (snippet.length > 240 ? '…' : '') + } +} + +const stdin = (): Promise => + new Promise(resolve => { + let buf = '' + process.stdin.setEncoding('utf8') + process.stdin.on('data', chunk => (buf += chunk)) + process.stdin.on('end', () => resolve(buf)) + }) + +type ToolInput = { + tool_name?: string + tool_input?: { + file_path?: string + new_string?: string + content?: string + } +} + +const isInScope = (filePath: string): boolean => { + if (!filePath) { + return false + } + // Only inspect TypeScript-Module / CommonJS-Module sources. Per + // the user's directive, allowlist by extension. + if (!filePath.endsWith('.mts') && !filePath.endsWith('.cts')) { + return false + } + return !EXEMPT_FILE_PATTERNS.some(re => re.test(filePath)) +} + +// Extract every `path.join(...)` and `path.resolve(...)` call from +// the diff and return its argument substring. Uses paren-balancing so +// deeply nested arguments like `path.join(getDir(child(x)), 'Final')` +// are captured correctly — a regex-only approach silently missed any +// argument with 2+ levels of nested parentheses. +const extractPathCalls = ( + source: string, +): Array<{ snippet: string; literals: string[] }> => { + const calls: Array<{ snippet: string; literals: string[] }> = [] + const callRe = /\bpath\.(?:join|resolve)\s*\(/g + let m: RegExpExecArray | null + while ((m = callRe.exec(source)) !== null) { + const callStart = m.index + const argsStart = callRe.lastIndex + let depth = 1 + let i = argsStart + let inString: '"' | "'" | '`' | null = null + while (i < source.length && depth > 0) { + const ch = source[i]! + if (inString) { + if (ch === '\\') { + i += 2 + continue + } + if (ch === inString) { + inString = null + } + } else { + if (ch === '"' || ch === "'" || ch === '`') { + inString = ch + } else if (ch === '(') { + depth += 1 + } else if (ch === ')') { + depth -= 1 + if (depth === 0) { + break + } + } + } + i += 1 + } + if (depth !== 0) { + continue + } + const args = source.slice(argsStart, i) + const litRe = /(['"])((?:\\.|(?!\1)[^\\])*)\1/g + const literals: string[] = [] + let lit: RegExpExecArray | null + while ((lit = litRe.exec(args)) !== null) { + const value = lit[2] + if (value !== undefined) { + literals.push(value) + } + } + calls.push({ snippet: source.slice(callStart, i + 1), literals }) + callRe.lastIndex = i + 1 + } + return calls +} + +const checkRuleA = (calls: ReturnType): void => { + for (const call of calls) { + const stages = call.literals.filter(l => STAGE_SEGMENTS.has(l)) + const buildRoots = call.literals.filter(l => BUILD_ROOT_SEGMENTS.has(l)) + const modes = call.literals.filter(l => MODE_SEGMENTS.has(l)) + // Trigger if: 2+ stage segments OR (1 stage + 1 build-root + 1 mode). + // Both shapes indicate a hand-built build-output path. + const twoStages = stages.length >= 2 + const stagePlusContext = + stages.length >= 1 && buildRoots.length >= 1 && modes.length >= 1 + if (twoStages || stagePlusContext) { + throw new BlockError( + 'A — multi-stage path constructed inline', + 'Construct this path in the owning `paths.mts` (or a build-infra helper like `getFinalBinaryPath`) and import the computed value here. 1 path, 1 reference.', + call.snippet, + ) + } + } +} + +const checkRuleB = (calls: ReturnType): void => { + for (const call of calls) { + // A sibling package name *immediately after* a `..` literal (no + // path segment in between) plus build context elsewhere in the + // call indicates cross-package traversal. The previous "sticky + // sawDotDot" form fired falsely when '..' appeared early and an + // unrelated sibling-named segment appeared much later. + const hasBuildContext = call.literals.some( + l => BUILD_ROOT_SEGMENTS.has(l) || STAGE_SEGMENTS.has(l), + ) + if (!hasBuildContext) { + continue + } + for (let i = 0; i < call.literals.length - 1; i++) { + if ( + call.literals[i] === '..' && + KNOWN_SIBLING_PACKAGES.has(call.literals[i + 1]!) + ) { + const sibling = call.literals[i + 1]! + throw new BlockError( + 'B — cross-package path traversal', + `Don't reach into '${sibling}'s build output via \`..\`. Add \`${sibling}: workspace:*\` as a dep and import its \`paths.mts\` via the \`exports\` field. 1 path, 1 reference.`, + call.snippet, + ) + } + } + } +} + +// Backtick template-literal detection. Path construction via +// `${buildDir}/out/Final/${binary}` follows the same shape as +// path.join() and constitutes the same Rule A violation. Placeholders +// (${...}) are stripped to a sentinel that won't match any segment +// set, so segments composed entirely of interpolation contribute +// nothing to the trigger. +const TEMPLATE_LITERAL_RE = /`((?:\\.|(?:\$\{(?:[^{}]|\{[^{}]*\})*\})|(?!`)[^\\])*)`/g + +const checkRuleATemplate = (source: string): void => { + TEMPLATE_LITERAL_RE.lastIndex = 0 + let m: RegExpExecArray | null + while ((m = TEMPLATE_LITERAL_RE.exec(source)) !== null) { + const body = m[1] ?? '' + if (!body.includes('/')) { + continue + } + const stripped = body.replace(/\$\{(?:[^{}]|\{[^{}]*\})*\}/g, '\x00') + const segments = stripped + .split('/') + .filter(s => s.length > 0 && s !== '\x00') + const stages = segments.filter(s => STAGE_SEGMENTS.has(s)) + const buildRoots = segments.filter(s => BUILD_ROOT_SEGMENTS.has(s)) + const modes = segments.filter(s => MODE_SEGMENTS.has(s)) + // Template literal trigger is tighter than path.join() because + // backtick strings often appear in patch fixtures, error messages, + // and other multi-line content that incidentally contains stage + // tokens like `wasm`. Require the canonical build-output shape. + const hasBuildAndOut = + buildRoots.includes('build') && buildRoots.includes('out') + const hasOut = buildRoots.includes('out') + const hasBuild = buildRoots.includes('build') + const triggers = + (hasBuildAndOut && stages.length >= 1) || + (stages.length >= 2 && hasOut) || + (hasBuild && stages.length >= 1 && modes.length >= 1) + if (triggers) { + throw new BlockError( + 'A — multi-stage path constructed inline via template literal', + 'Construct this path in the owning `paths.mts` (or a build-infra helper) and import the computed value here. 1 path, 1 reference.', + m[0], + ) + } + } +} + +const check = (source: string): void => { + const calls = extractPathCalls(source) + if (calls.length > 0) { + checkRuleA(calls) + checkRuleB(calls) + } + checkRuleATemplate(source) +} + +const emitBlock = (filePath: string, err: BlockError): void => { + process.stderr.write( + `\n[path-guard] Blocked: ${err.rule}\n` + + ` Mantra: 1 path, 1 reference\n` + + ` File: ${filePath}\n` + + ` Snippet: ${err.snippet}\n` + + ` Fix: ${err.suggestion}\n\n`, + ) +} + +const main = async (): Promise => { + const raw = await stdin() + if (!raw) { + return + } + let payload: ToolInput + try { + payload = JSON.parse(raw) as ToolInput + } catch { + return + } + if (payload.tool_name !== 'Edit' && payload.tool_name !== 'Write') { + return + } + const filePath = payload.tool_input?.file_path ?? '' + if (!isInScope(filePath)) { + return + } + // Edit tool sends `new_string` (the replacement); Write sends + // `content` (the full file). Either is the text we'd be putting on + // disk. + const source = + payload.tool_input?.new_string ?? payload.tool_input?.content ?? '' + if (!source) { + return + } + + try { + check(source) + } catch (e) { + if (e instanceof BlockError) { + emitBlock(filePath, e) + process.exitCode = 2 + return + } + throw e + } +} + +main().catch(e => { + // Never block a tool call due to a bug in the hook itself. Log it + // so we notice, but fail open. + process.stderr.write(`[path-guard] hook error (allowing): ${e}\n`) + process.exitCode = 0 +}) diff --git a/.claude/hooks/path-guard/package.json b/.claude/hooks/path-guard/package.json new file mode 100644 index 000000000..a7cb5039a --- /dev/null +++ b/.claude/hooks/path-guard/package.json @@ -0,0 +1,12 @@ +{ + "name": "hook-path-guard", + "private": true, + "type": "module", + "main": "./index.mts", + "exports": { + ".": "./index.mts" + }, + "scripts": { + "test": "node --test test/*.test.mts" + } +} diff --git a/.claude/hooks/path-guard/segments.mts b/.claude/hooks/path-guard/segments.mts new file mode 100644 index 000000000..891d0b8b7 --- /dev/null +++ b/.claude/hooks/path-guard/segments.mts @@ -0,0 +1,80 @@ +// Canonical path-segment vocabulary shared by the path-guard hook +// (.claude/hooks/path-guard/index.mts) and gate (scripts/check-paths.mts). +// +// Mantra: 1 path, 1 reference. This module is the *one* place stage, +// build-root, mode, and sibling-package vocabulary is defined. Both +// consumers import from here so they can never drift apart. +// +// Synced byte-identically across the Socket fleet via +// socket-repo-template/scripts/sync-scaffolding.mjs (IDENTICAL_FILES). +// When adding a new stage/build-root/mode/sibling, edit this file in +// the template and re-sync. + +// "Stage" segments — Rule A core. Two of these spread via `path.join` +// or interpolated into a template literal is a finding outside a +// canonical `paths.mts`. Sourced from build-infra/lib/constants.mts +// `BUILD_STAGES` plus their lowercase directory-name siblings used by +// some builders. +export const STAGE_SEGMENTS = new Set([ + 'Compressed', + 'downloaded', + 'Final', + 'Optimized', + 'Release', + 'Stripped', + 'Synced', + 'wasm', +]) + +// "Build-root" segments — at least one must be present together with +// a stage segment to confirm we're constructing a build output path +// rather than something coincidental. Example: a join that yields +// `//` doesn't fire if no build-root segment is +// present; `/build//out/` does. +export const BUILD_ROOT_SEGMENTS = new Set(['build', 'out']) + +// Build-mode segments — a stage segment plus one of these is also a +// finding (`build///out/` is the canonical shape). +export const MODE_SEGMENTS = new Set(['dev', 'prod', 'shared']) + +// Sibling fleet packages (Rule B). Union of all packages across the +// Socket fleet — the gate is byte-identical via sync-scaffolding, so +// listing every fleet package keeps Rule B firing in any repo. When a +// new package joins the workspace, add it here and propagate via +// `node scripts/sync-scaffolding.mjs --all --fix` from +// socket-repo-template. +export const KNOWN_SIBLING_PACKAGES = new Set([ + // socket-btm + 'bin-infra', + 'binflate', + 'binject', + 'binpress', + 'build-infra', + 'codet5-models-builder', + 'curl-builder', + 'ink-builder', + 'iocraft-builder', + 'libpq-builder', + 'lief-builder', + 'minilm-builder', + 'models', + 'napi-go', + 'node-smol-builder', + 'onnxruntime-builder', + 'opentui-builder', + 'stubs-builder', + 'ultraviolet-builder', + 'yoga-layout-builder', + // socket-cli + 'cli', + 'package-builder', + // socket-tui + 'core', + 'react', + 'renderer', + 'ultraviolet', + 'yoga', + // socket-registry / ultrathink + 'acorn', + 'npm', +]) diff --git a/.claude/hooks/path-guard/test/path-guard.test.mts b/.claude/hooks/path-guard/test/path-guard.test.mts new file mode 100644 index 000000000..a3c3a1324 --- /dev/null +++ b/.claude/hooks/path-guard/test/path-guard.test.mts @@ -0,0 +1,378 @@ +// Tests for the path-guard hook. Each `node:test` block writes a +// mock PreToolUse payload to the hook's stdin and asserts on its exit +// code + stderr. Exit 2 = blocked; exit 0 = allowed. +// +// Run: pnpm --filter hook-path-guard test +// (or directly: node --test test/*.test.mts) + +import { spawnSync } from 'node:child_process' +import path from 'node:path' +import process from 'node:process' +import { fileURLToPath } from 'node:url' + +import { describe, it } from 'node:test' +import assert from 'node:assert/strict' + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) +const HOOK = path.resolve(__dirname, '..', 'index.mts') + +const runHook = ( + toolName: string, + filePath: string, + source: string, +): { code: number; stderr: string } => { + const payload = JSON.stringify({ + tool_name: toolName, + tool_input: + toolName === 'Edit' + ? { file_path: filePath, new_string: source } + : { file_path: filePath, content: source }, + }) + const result = spawnSync(process.execPath, [HOOK], { + encoding: 'utf8', + input: payload, + }) + return { + code: result.status ?? -1, + stderr: result.stderr, + } +} + +describe('path-guard — Rule A (multi-stage construction)', () => { + it('blocks two stage segments in path.join', () => { + const source = ` + const p = path.join(PACKAGE_ROOT, 'wasm', 'out', 'Final', 'bin') + ` + const { code, stderr } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + assert.match(stderr, /Blocked: A/) + assert.match(stderr, /1 path, 1 reference/) + }) + + it('blocks build + mode + stage', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'binary') + ` + const { code } = runHook( + 'Edit', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('blocks Release + Stripped together', () => { + const source = ` + const p = path.join(buildDir, 'Release', 'Stripped') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/release.mts', + source, + ) + assert.equal(code, 2) + }) + + it('allows single stage segment with one build root', () => { + // 'build' + 'temp' → no stage segment at all → pass + const source = ` + const tmp = path.join(packageRoot, 'build', 'temp') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows path.join with no stage segments', () => { + const source = ` + const cfg = path.join(packageRoot, 'config', 'settings.json') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) +}) + +describe('path-guard — Rule B (cross-package traversal)', () => { + it('blocks .. + sibling package + build context', () => { + const source = ` + const lief = path.join(PKG, '..', 'lief-builder', 'build', 'Final') + ` + const { code, stderr } = runHook( + 'Write', + 'packages/binject/scripts/build.mts', + source, + ) + assert.equal(code, 2) + assert.match(stderr, /Blocked: B/) + assert.match(stderr, /lief-builder/) + }) + + it('allows .. + sibling without build context', () => { + // Reaching into a sibling for a non-build asset is allowed; the + // gate may still flag it but the hook is scoped to build paths. + const source = ` + const cfg = path.join(PKG, '..', 'lief-builder', 'config.json') + ` + const { code } = runHook( + 'Write', + 'packages/binject/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) + + it('does not fire on traversal to unknown directory', () => { + const source = ` + const x = path.join(PKG, '..', 'fixtures', 'build', 'Final') + ` + const { code } = runHook( + 'Write', + 'packages/foo/test/test.mts', + source, + ) + assert.equal(code, 0) + }) + + it('does not fire when .. and sibling are non-adjacent (regression)', () => { + // Earlier regex ran with sticky sawDotDot — once it saw `..` it + // would flag any later sibling-named segment. The fix requires + // the sibling to appear *immediately* after `..`. + const source = ` + const x = path.join(PKG, '..', 'cache', 'lief-builder', 'config.json') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) +}) + +describe('path-guard — paren-balance correctness', () => { + it('detects A through nested function-call args (regression)', () => { + // Old regex used \\([^()]*\\) which only handled one nesting + // level — `path.join(getDir(child(x)), 'build', 'dev', 'Final')` + // silently slipped through. The paren-balancing scanner catches it. + const source = ` + const p = path.join(getDir(child(x)), 'build', 'dev', 'out', 'Final') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('detects A in path.resolve() too', () => { + const source = ` + const p = path.resolve(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) +}) + +describe('path-guard — template literals', () => { + it('detects A in fully-literal template path', () => { + const source = '\n const p = `build/dev/out/Final/binary`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('detects A in template with placeholders', () => { + const source = + '\n const p = `${PKG}/build/${mode}/${arch}/out/Final/${name}`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('allows template with single non-stage segment', () => { + const source = '\n const url = `https://example.com/path`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows template with no stage segments', () => { + const source = '\n const tmp = `${packageRoot}/build/temp/cache`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows template that is purely interpolation', () => { + // `${a}/${b}/${c}` has no literal stage segments. + const source = '\n const p = `${a}/${b}/${c}`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) +}) + +describe('path-guard — file-type filter', () => { + it('skips .ts files', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook('Write', 'packages/foo/src/index.ts', source) + assert.equal(code, 0) + }) + + it('skips .mjs files', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook('Write', 'additions/foo.mjs', source) + assert.equal(code, 0) + }) + + it('skips .yml files', () => { + const source = ` + run: | + FINAL="build/\${MODE}/\${ARCH}/out/Final" + ` + const { code } = runHook( + 'Write', + '.github/workflows/foo.yml', + source, + ) + assert.equal(code, 0) + }) + + it('inspects .mts files', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('inspects .cts files', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.cts', + source, + ) + assert.equal(code, 2) + }) +}) + +describe('path-guard — exempt files', () => { + it('allows edits to paths.mts', () => { + const source = ` + export const FINAL_DIR = path.join(PKG, 'build', 'dev', 'out', 'Final') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/paths.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows edits to check-paths.mts (the gate)', () => { + const source = ` + const PATTERNS = [path.join('build', 'Final', 'wasm')] + ` + const { code } = runHook('Write', 'scripts/check-paths.mts', source) + assert.equal(code, 0) + }) + + it('allows edits to the path-guard hook itself', () => { + const source = ` + const STAGES = ['Final', 'Release', 'Stripped'] + ` + const { code } = runHook( + 'Write', + '.claude/hooks/path-guard/index.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows edits to path-guard tests', () => { + const source = ` + const fixture = path.join('build', 'dev', 'out', 'Final') + ` + const { code } = runHook( + 'Write', + '.claude/hooks/path-guard/test/path-guard.test.mts', + source, + ) + assert.equal(code, 0) + }) +}) + +describe('path-guard — tool-name filter', () => { + it('skips Bash', () => { + const source = `path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin')` + const { code } = runHook('Bash', '', source) + assert.equal(code, 0) + }) + + it('skips Read', () => { + const source = '' + const { code } = runHook('Read', 'packages/foo/scripts/build.mts', source) + assert.equal(code, 0) + }) +}) + +describe('path-guard — bug-tolerance (fails open)', () => { + it('passes through invalid JSON payload', () => { + const result = spawnSync(process.execPath, [HOOK], { + encoding: 'utf8', + input: 'not json at all', + }) + assert.equal(result.status, 0) + }) + + it('passes through empty stdin', () => { + const result = spawnSync(process.execPath, [HOOK], { + encoding: 'utf8', + input: '', + }) + assert.equal(result.status, 0) + }) +}) diff --git a/.claude/hooks/path-guard/tsconfig.json b/.claude/hooks/path-guard/tsconfig.json new file mode 100644 index 000000000..53c5c8475 --- /dev/null +++ b/.claude/hooks/path-guard/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "declarationMap": false, + "erasableSyntaxOnly": true, + "module": "nodenext", + "moduleResolution": "nodenext", + "noEmit": true, + "rewriteRelativeImportExtensions": true, + "skipLibCheck": true, + "sourceMap": false, + "strict": true, + "target": "esnext", + "verbatimModuleSyntax": true + } +} diff --git a/.claude/hooks/token-guard/README.md b/.claude/hooks/token-guard/README.md new file mode 100644 index 000000000..9cba28a51 --- /dev/null +++ b/.claude/hooks/token-guard/README.md @@ -0,0 +1,57 @@ +# token-guard + +Claude Code `PreToolUse` hook that refuses Bash tool calls that would leak secrets to tool output. Mandatory across the Socket fleet — every repo ships this file byte-for-byte via `scripts/sync-scaffolding.mjs`. + +## What it blocks + +| Rule | Example | Fix | +|------|---------|-----| +| Literal token in command | `echo vtwn_abc123…` | Rotate the exposed token; read tokens from `.env.local` at spawn time, never inline them | +| `env`/`printenv`/`export -p`/`set` dumping everything | `env \| grep FOO` (unredacted) | `env \| sed 's/=.*/=/'` or filter specific keys | +| `.env*` read without redactor | `cat .env.local` | `sed 's/=.*/=/' .env.local` or `grep -v '^#' .env.local \| cut -d= -f1` | +| `curl -H "Authorization:"` with unfiltered stdout | `curl -H "Authorization: Bearer $TOKEN" api.example.com` | Redirect to file/`/dev/null`, or pipe to `jq`/`grep`/`head`/`wc`/`cut`/`awk` | +| References sensitive env var name writing unredacted to stdout | `echo $API_KEY` | Same as above | + +## What it allows + +- Any write to a file (`>`, `>>`, `tee`) +- Any pipe through `jq`, `grep`, `head`, `tail`, `wc`, `cut`, `awk`, `sed s/=.*/=/`, `python3 -m json.tool` +- Legitimate `git`/`pnpm`/`npm`/`node`/`tsc`/`oxfmt`/`oxlint` invocations that happen to reference env var names but don't echo values +- Any curl call that does not carry an `Authorization:` header + +## Detected token shapes + +Literal value patterns caught in-command: + +- Val Town — `vtwn_` +- Linear — `lin_api_` +- OpenAI / Anthropic — `sk-` (20+ chars) +- Stripe — `sk_live_`, `sk_test_`, `pk_live_`, `rk_live_` +- GitHub — `ghp_`, `gho_`, `ghs_`, `ghu_`, `ghr_`, `github_pat_` +- GitLab — `glpat-` +- AWS — `AKIA…` +- Slack — `xoxb-`, `xoxa-`, `xoxp-`, `xoxr-`, `xoxs-` +- Google — `AIza…` +- JWTs — three-segment `eyJ…` + +## Control flow + +The hook reads the tool-use payload from stdin, type-checks `tool_name === 'Bash'`, and runs `check(command)`. Any rule violation `throw`s a typed `BlockError`; a single top-level `try/catch` in `main()` writes the block message to stderr and sets `process.exitCode = 2`. Hook bugs fail **open** — a crash in the hook writes a log line and returns exit 0 so legitimate work isn't blocked on a bad deploy. + +## Testing + +```bash +pnpm --filter hook-token-guard test +``` + +Adding new token-shape detections: update `LITERAL_TOKEN_PATTERNS` in `index.mts`, add a positive and negative test in `test/token-guard.test.mts`. + +## Updating across the fleet + +This file is in `IDENTICAL_FILES` in `scripts/sync-scaffolding.mjs`. After editing, run from `socket-repo-template`: + +```bash +node scripts/sync-scaffolding.mjs --all --fix +``` + +to propagate the change to every fleet repo. diff --git a/.claude/hooks/token-guard/index.mts b/.claude/hooks/token-guard/index.mts new file mode 100644 index 000000000..6cd98a8a0 --- /dev/null +++ b/.claude/hooks/token-guard/index.mts @@ -0,0 +1,261 @@ +#!/usr/bin/env node +// Claude Code PreToolUse hook — token-guard firewall. +// +// Blocks Bash commands that would echo token-bearing env vars into +// tool output. This fires BEFORE the command runs; exit code 2 makes +// Claude Code refuse the tool call. The model sees the rejection +// reason on stderr and retries with a redacted formulation. +// +// Blocked patterns: +// - Literal token shapes in the command string (vtwn_, lin_api_, +// sk-, ghp_, AKIA, xox, AIza, JWT, etc.) — hardest block, logs +// a redacted message and urges rotation +// - `env`, `printenv`, `export -p`, `set` with no filter pipeline +// - `cat` / `head` / `tail` / `less` / `more` of .env* files +// without a redaction step +// - `curl -H "Authorization: ..."` with output going to unfiltered +// stdout (not /dev/null, not a file, not piped to jq/grep/etc.) +// - Commands referencing a sensitive env var name (*TOKEN*, +// *SECRET*, *PASSWORD*, *API_KEY*, *SIGNING_KEY*, *PRIVATE_KEY*, +// *AUTH*, *CREDENTIAL*) that write to stdout without redaction +// +// Control flow uses a `BlockError` thrown from check helpers so every +// short-circuit path goes through a single `process.exitCode = 2` +// drop at the top-level catch — no scattered `process.exit(2)` that +// can race with buffered stderr. + +import process from 'node:process' + +// Name fragments matched case-insensitively against the command. +const SENSITIVE_ENV_NAMES = [ + 'TOKEN', + 'SECRET', + 'PASSWORD', + 'PASS', + 'API_KEY', + 'APIKEY', + 'SIGNING_KEY', + 'PRIVATE_KEY', + 'AUTH', + 'CREDENTIAL', +] + +// Pipelines that "launder" earlier-stage secrets into safe output. +const REDACTION_MARKERS = [ + /\bsed\b[^|]*s[/|#][^/|#]*=[^/|#]*\s*\/dev\/null/, + />>\s*[^|]/, + />\s*[^|]/, +] + +// Commands that dump all env vars to stdout with no filter. +const ALWAYS_DANGEROUS = [ + /^\s*env\s*(?:\||&&|;|$)/, + /^\s*env\s*$/, + /^\s*printenv\s*(?:\||&&|;|$)/, + /^\s*printenv\s*$/, + /^\s*export\s+-p\s*(?:\||&&|;|$)/, + /^\s*set\s*(?:\||&&|;|$)/, +] + +// Plain reads of .env files that would dump values to stdout. +const ENV_FILE_READ = /\b(?:cat|head|tail|less|more|bat)\b[^|]*\.env[^/\s|]*/ + +// curl calls that include an Authorization header. +const CURL_WITH_AUTH = + /\bcurl\b(?:[^|]|\|(?!\s*(?:sed|grep|head|tail|jq)))*(?:-H|--header)\s*['"]?Authorization:/i + +// Literal token-shape patterns — if any match in the command string, +// a real token has been pasted somewhere it shouldn't have been. +const LITERAL_TOKEN_PATTERNS: Array<[RegExp, string]> = [ + [/\bvtwn_[A-Za-z0-9_-]{8,}/, 'Val Town token (vtwn_)'], + [/\blin_api_[A-Za-z0-9_-]{8,}/, 'Linear API token (lin_api_)'], + [/\bsk-[A-Za-z0-9_-]{20,}/, 'OpenAI/Anthropic-style secret key (sk-)'], + [/\bsk_live_[A-Za-z0-9_-]{16,}/, 'Stripe live secret (sk_live_)'], + [/\bsk_test_[A-Za-z0-9_-]{16,}/, 'Stripe test secret (sk_test_)'], + [/\bpk_live_[A-Za-z0-9_-]{16,}/, 'Stripe live publishable (pk_live_)'], + [/\brk_live_[A-Za-z0-9_-]{16,}/, 'Stripe live restricted (rk_live_)'], + [/\bghp_[A-Za-z0-9]{30,}/, 'GitHub personal access token (ghp_)'], + [/\bgho_[A-Za-z0-9]{30,}/, 'GitHub OAuth token (gho_)'], + [/\bghs_[A-Za-z0-9]{30,}/, 'GitHub app server token (ghs_)'], + [/\bghu_[A-Za-z0-9]{30,}/, 'GitHub user access token (ghu_)'], + [/\bghr_[A-Za-z0-9]{30,}/, 'GitHub refresh token (ghr_)'], + [/\bgithub_pat_[A-Za-z0-9_]{20,}/, 'GitHub fine-grained PAT'], + [/\bglpat-[A-Za-z0-9_-]{16,}/, 'GitLab PAT (glpat-)'], + [/\bAKIA[0-9A-Z]{16}/, 'AWS access key ID (AKIA)'], + [/\bxox[baprs]-[A-Za-z0-9-]{10,}/, 'Slack token (xox_-)'], + [/\bAIza[0-9A-Za-z_-]{35}/, 'Google API key (AIza)'], + [/\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}/, 'JWT'], +] + +class BlockError extends Error { + public readonly rule: string + public readonly suggestion: string + public readonly showCommand: boolean + constructor(rule: string, suggestion: string, showCommand = true) { + super(rule) + this.name = 'BlockError' + this.rule = rule + this.suggestion = suggestion + this.showCommand = showCommand + } +} + +const stdin = (): Promise => + new Promise(resolve => { + let buf = '' + process.stdin.setEncoding('utf8') + process.stdin.on('data', chunk => (buf += chunk)) + process.stdin.on('end', () => resolve(buf)) + }) + +type ToolInput = { + tool_name?: string + tool_input?: { command?: string } +} + +const hasRedaction = (command: string): boolean => + REDACTION_MARKERS.some(re => re.test(command)) + +// Word-boundary match so `PASS` doesn't fire on `PATHS-ALLOWLIST` and +// `AUTH` doesn't fire on `AUTHOR`. Env-var-style boundaries treat `_` +// as a separator (so `ACCESS_TOKEN` matches `TOKEN`) but require a +// non-alphanumeric character on each end (so `PATHS` doesn't match +// `PASS`). The pre-fix substring match created false positives +// whenever a path name happened to contain a sensitive keyword as a +// literal substring. +const sensitiveEnvBoundaryRes = SENSITIVE_ENV_NAMES.map( + frag => new RegExp(String.raw`(?:^|[^A-Z0-9])${frag}(?:[^A-Z0-9]|$)`), +) +const referencesSensitiveEnv = (command: string): boolean => { + const upper = command.toUpperCase() + return sensitiveEnvBoundaryRes.some(re => re.test(upper)) +} + +const matchesAlwaysDangerous = (command: string): RegExp | null => { + for (const re of ALWAYS_DANGEROUS) { + if (re.test(command)) { + return re + } + } + return null +} + +const check = (command: string): void => { + // 0. Literal token-shape in the command string — hardest block. + // A real token value already landed in the command, which itself is + // logged. We refuse to echo it further and urge rotation. + for (const [pattern, label] of LITERAL_TOKEN_PATTERNS) { + if (pattern.test(command)) { + throw new BlockError( + `literal ${label} found in command string`, + 'Rotate the exposed token immediately. Never paste tokens into commands; read them from .env.local or a keychain at subprocess spawn time.', + false, + ) + } + } + + // 1. Always-dangerous patterns. Skip when the command already has a + // redaction pipeline — the suggested fix here is `env | sed ...`, + // which would itself match ALWAYS_DANGEROUS without this guard. + const dangerous = matchesAlwaysDangerous(command) + if (dangerous && !hasRedaction(command)) { + throw new BlockError( + `\`${dangerous.source}\` dumps env to stdout`, + 'Pipe through redaction, e.g. `env | sed "s/=.*/=/"` or filter specific keys.', + ) + } + + // 2. .env file reads without redaction. + if (ENV_FILE_READ.test(command) && !hasRedaction(command)) { + throw new BlockError( + '.env file read without a redaction pipeline', + 'Use `sed "s/=.*/=/" .env.local` or `grep -v "^#" .env.local | cut -d= -f1` for key names only.', + ) + } + + // 3. curl with Authorization header and unsanitized stdout. + const curlHasAuth = CURL_WITH_AUTH.test(command) + const curlOutputSafe = + />\s*\/dev\/null|>\s*[^|&]/.test(command) || + /\|\s*(?:jq|grep|head|tail|wc|cut|awk|python3?\s+-m\s+json\.tool)\b/.test( + command, + ) + if (curlHasAuth && !curlOutputSafe) { + throw new BlockError( + 'curl with Authorization header and unsanitized stdout', + 'Redirect response to /dev/null, pipe to jq/grep/head, or save to a file.', + ) + } + + // 4. References a sensitive env var name and writes to stdout + // without a redaction step. Skip when curl-with-auth passed — that + // rule already evaluated the same pipeline. + if ( + !curlHasAuth && + referencesSensitiveEnv(command) && + !hasRedaction(command) + ) { + const isPureWrite = /^\s*(?:git|pnpm|npm|node|tsc|oxfmt|oxlint)\b/.test( + command, + ) + if (!isPureWrite) { + throw new BlockError( + 'command references sensitive env var name and writes to stdout without redaction', + 'Redirect to a file, pipe through `sed "s/=.*/=/"`, or ensure only key names (not values) are printed.', + ) + } + } +} + +const emitBlock = (command: string, err: BlockError): void => { + const safeCommand = err.showCommand + ? command.slice(0, 200) + (command.length > 200 ? '…' : '') + : '' + process.stderr.write( + `\n[token-guard] Blocked: ${err.rule}\n` + + ` Command: ${safeCommand}\n` + + ` Fix: ${err.suggestion}\n\n`, + ) +} + +const main = async (): Promise => { + const raw = await stdin() + if (!raw) { + return + } + let payload: ToolInput + try { + payload = JSON.parse(raw) as ToolInput + } catch { + return + } + if (payload.tool_name !== 'Bash') { + return + } + const command = payload.tool_input?.command ?? '' + if (!command) { + return + } + + try { + check(command) + } catch (e) { + if (e instanceof BlockError) { + emitBlock(command, e) + process.exitCode = 2 + return + } + throw e + } +} + +main().catch(e => { + // Never block a tool call due to a bug in the hook itself. Log it + // so we notice, but fail open. + process.stderr.write(`[token-guard] hook error (allowing): ${e}\n`) + process.exitCode = 0 +}) diff --git a/.claude/hooks/token-guard/package.json b/.claude/hooks/token-guard/package.json new file mode 100644 index 000000000..fc68951d8 --- /dev/null +++ b/.claude/hooks/token-guard/package.json @@ -0,0 +1,12 @@ +{ + "name": "hook-token-guard", + "private": true, + "type": "module", + "main": "./index.mts", + "exports": { + ".": "./index.mts" + }, + "scripts": { + "test": "node --test test/*.test.mts" + } +} diff --git a/.claude/hooks/token-guard/test/token-guard.test.mts b/.claude/hooks/token-guard/test/token-guard.test.mts new file mode 100644 index 000000000..b2ab67147 --- /dev/null +++ b/.claude/hooks/token-guard/test/token-guard.test.mts @@ -0,0 +1,225 @@ +/** + * @fileoverview Tests for the token-guard hook. + * + * Runs the hook as a subprocess (node --test), piping a tool-use + * payload on stdin and asserting on the exit code + stderr. Exit 2 + * means the hook refused the command; exit 0 means it passed it + * through. + */ + +import { describe, it } from 'node:test' +import assert from 'node:assert/strict' + +import { whichSync } from '@socketsecurity/lib/bin' +import { spawnSync } from '@socketsecurity/lib/spawn' + +const hookScript = new URL('../index.mts', import.meta.url).pathname +const nodeBin = whichSync('node') +if (!nodeBin) { + throw new Error('"node" not found on PATH') +} + +function runHook(command: string, toolName = 'Bash'): { + code: number | null + stdout: string + stderr: string +} { + const input = JSON.stringify({ + tool_name: toolName, + tool_input: { command }, + }) + const result = spawnSync(nodeBin, [hookScript], { + input, + timeout: 5_000, + stdio: ['pipe', 'pipe', 'pipe'], + }) + return { + code: result.status, + stdout: (result.stdout || '').toString(), + stderr: (result.stderr || '').toString(), + } +} + +describe('token-guard hook', () => { + describe('allows safe commands', () => { + it('plain echo', () => { + assert.equal(runHook('echo hello').code, 0) + }) + it('git log', () => { + assert.equal(runHook('git log -1 --oneline').code, 0) + }) + it('pnpm install', () => { + assert.equal(runHook('pnpm install').code, 0) + }) + it('node script', () => { + assert.equal(runHook('node scripts/build.mts').code, 0) + }) + it('sed with redaction on .env', () => { + assert.equal( + runHook("sed 's/=.*/=/' .env.local").code, + 0, + ) + }) + it('grep key-names-only on .env', () => { + assert.equal( + runHook("grep -v '^#' .env.local | cut -d= -f1").code, + 0, + ) + }) + it('curl without Authorization header', () => { + assert.equal(runHook('curl -sS https://api.example.com').code, 0) + }) + it('curl with auth piped to jq', () => { + assert.equal( + runHook( + 'curl -sS -H "Authorization: Bearer $TOKEN" https://api.example.com | jq .name', + ).code, + 0, + ) + }) + it('curl with auth redirected to file', () => { + assert.equal( + runHook( + 'curl -sS -H "Authorization: Bearer $TOKEN" https://api.example.com > out.json', + ).code, + 0, + ) + }) + it('non-Bash tool is always allowed', () => { + assert.equal(runHook('env', 'Edit').code, 0) + }) + }) + + describe('blocks literal token shapes', () => { + it('Val Town token', () => { + const r = runHook('echo vtwn_ABCDEFGHIJKL') + assert.equal(r.code, 2) + assert.match(r.stderr, /Val Town token/) + }) + it('Linear API token', () => { + const r = runHook('echo lin_api_ABCDEFGHIJKLMNOP') + assert.equal(r.code, 2) + assert.match(r.stderr, /Linear API token/) + }) + it('GitHub PAT', () => { + const r = runHook( + 'echo ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcd1234', + ) + assert.equal(r.code, 2) + assert.match(r.stderr, /GitHub personal access token/) + }) + it('AWS access key', () => { + const r = runHook('echo AKIAIOSFODNN7EXAMPLE') + assert.equal(r.code, 2) + assert.match(r.stderr, /AWS access key/) + }) + it('Stripe test secret', () => { + const r = runHook('echo sk_test_ABCDEFGHIJKLMNOP') + assert.equal(r.code, 2) + assert.match(r.stderr, /Stripe test secret/) + }) + it('JWT', () => { + const r = runHook( + 'echo eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', + ) + assert.equal(r.code, 2) + assert.match(r.stderr, /JWT/) + }) + it('redacts the command in stderr so the literal token is not re-logged', () => { + const r = runHook('echo vtwn_SECRETVALUE') + assert.equal(r.code, 2) + assert.doesNotMatch(r.stderr, /SECRETVALUE/) + assert.match(r.stderr, /suppressed/) + }) + }) + + describe('blocks env/printenv dumps', () => { + it('bare env', () => { + assert.equal(runHook('env').code, 2) + }) + it('env piped without redactor', () => { + assert.equal(runHook('env | grep FOO').code, 2) + }) + it('printenv', () => { + assert.equal(runHook('printenv').code, 2) + }) + it('export -p', () => { + assert.equal(runHook('export -p').code, 2) + }) + }) + + describe('blocks .env reads without redaction', () => { + it('cat .env.local', () => { + assert.equal(runHook('cat .env.local').code, 2) + }) + it('head .env', () => { + assert.equal(runHook('head .env').code, 2) + }) + it('less .env.production', () => { + assert.equal(runHook('less .env.production').code, 2) + }) + }) + + describe('blocks curl with auth to unfiltered stdout', () => { + it('plain curl -H Authorization', () => { + const r = runHook( + 'curl -sS -H "Authorization: Bearer $TOKEN" https://api.example.com', + ) + assert.equal(r.code, 2) + assert.match(r.stderr, /Authorization header and unsanitized stdout/) + }) + }) + + describe('blocks sensitive-env-name references without redaction', () => { + it('echoing $API_KEY', () => { + assert.equal(runHook('echo $API_KEY').code, 2) + }) + it('ruby -e with $TOKEN', () => { + assert.equal( + runHook('ruby -e "puts ENV[\'ACCESS_TOKEN\']"').code, + 2, + ) + }) + }) + + describe('does not false-positive on substring of sensitive name', () => { + // Regression: `PATHS-ALLOWLIST.YML` toUpperCase()d contains `PASS` + // as a substring, which the pre-fix unbounded match treated as + // a sensitive env reference. Word-boundary fix means `PASS` must + // be a standalone token (or at a `_`/`-`/`.`/`/` boundary). + it('paths-allowlist.yml does not trip PASS', () => { + assert.equal(runHook('cat .github/paths-allowlist.yml').code, 0) + }) + it('AUTHOR_NAME does not trip AUTH', () => { + // AUTHOR ends with R; the boundary-after match correctly skips + // it because the next char is `_`, but `AUTH` followed by `O` + // (alphanumeric) is not a token boundary. + assert.equal(runHook('echo $AUTHOR_NAME').code, 0) + }) + it('PASSAGE_TIME does not trip PASS', () => { + assert.equal(runHook('echo $PASSAGE_TIME').code, 0) + }) + }) + + describe('fails open on malformed input', () => { + it('empty stdin', () => { + const r = spawnSync(nodeBin, [hookScript], { + input: '', + timeout: 5_000, + stdio: ['pipe', 'pipe', 'pipe'], + }) + assert.equal(r.status, 0) + }) + it('non-JSON stdin', () => { + const r = spawnSync(nodeBin, [hookScript], { + input: 'not json', + timeout: 5_000, + stdio: ['pipe', 'pipe', 'pipe'], + }) + assert.equal(r.status, 0) + }) + it('empty command', () => { + assert.equal(runHook('').code, 0) + }) + }) +}) diff --git a/.claude/hooks/token-guard/tsconfig.json b/.claude/hooks/token-guard/tsconfig.json new file mode 100644 index 000000000..53c5c8475 --- /dev/null +++ b/.claude/hooks/token-guard/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "declarationMap": false, + "erasableSyntaxOnly": true, + "module": "nodenext", + "moduleResolution": "nodenext", + "noEmit": true, + "rewriteRelativeImportExtensions": true, + "skipLibCheck": true, + "sourceMap": false, + "strict": true, + "target": "esnext", + "verbatimModuleSyntax": true + } +} diff --git a/.claude/settings.json b/.claude/settings.json index 3490c309f..cc6c18da3 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -7,6 +7,10 @@ { "type": "command", "command": "node .claude/hooks/check-new-deps/index.mts" + }, + { + "type": "command", + "command": "node .claude/hooks/path-guard/index.mts" } ] }, @@ -15,11 +19,11 @@ "hooks": [ { "type": "command", - "command": "node .claude/hooks/public-surface-reminder/index.mts" + "command": "node .claude/hooks/token-guard/index.mts" }, { "type": "command", - "command": "node .claude/hooks/token-hygiene/index.mts" + "command": "node .claude/hooks/public-surface-reminder/index.mts" } ] } diff --git a/.claude/skills/_shared/path-guard-rule.md b/.claude/skills/_shared/path-guard-rule.md new file mode 100644 index 000000000..fa42a32ea --- /dev/null +++ b/.claude/skills/_shared/path-guard-rule.md @@ -0,0 +1,39 @@ + + +## 1 path, 1 reference + +**A path is *constructed* exactly once. Everywhere else *references* the constructed value.** + +Referencing a single computed path many times is fine — that's the whole point of computing it once. What's banned is *re-constructing* the same path in multiple places, because that's where drift is born. Three concrete shapes: + +1. **Within a package** — every script, test, and lib file that needs a build path imports it from the package's `scripts/paths.mts` (or `lib/paths.mts`). No `path.join('build', mode, ...)` outside that module. + +2. **Across packages** — when package B consumes package A's output, B imports A's `paths.mts` via the workspace `exports` field. Never `path.join(PKG, '..', '', 'build', ...)`. The R28 yoga/ink bug — ink hand-building yoga's wasm path and missing the `wasm/` segment — is the canonical failure mode this rule prevents. + +3. **Workflows, Dockerfiles, shell scripts** — they can't `import` TS, so they construct the string once and reference it everywhere downstream. Workflows: a "Compute paths" step exposes `steps.paths.outputs.final_dir`; later steps read `${{ steps.paths.outputs.final_dir }}`. Dockerfiles/shell: assign once to a variable, reference by name thereafter. Each canonical construction carries a comment naming the source-of-truth `paths.mts` so the YAML can't drift from TS without a flagged change. **Re-building** the same path in a second step is the violation, not referring to the constructed value many times. + +Comments that re-state a full path are forbidden. The import statement IS the comment. Docs and READMEs may describe the structure ("output goes under the Final dir") but should not encode a complete `build///out/Final/binary` string — encoded paths get parsed by tools and silently rot. + +Code execution takes priority over docs: violations in `.mts`/`.cts`, Makefiles, Dockerfiles, workflow YAML, and shell scripts are blocking. README and doc-comment violations are advisory unless they contain a fully-qualified path with no parametric placeholders. + +### Three-level enforcement + +- **Hook** — `.claude/hooks/path-guard/` blocks `Edit`/`Write` calls that would introduce a violation in a `.mts`/`.cts` file. Refusal at edit time stops new duplication from landing. +- **Gate** — `scripts/check-paths.mts` runs in `pnpm check`. Fails the build on any violation that isn't allowlisted. +- **Skill** — `/path-guard` audits the repo and fixes findings; `/path-guard check` reports only; `/path-guard install` drops the gate + hook + rule into a fresh repo. + +The mantra is intentionally short so it sticks: **1 path, 1 reference**. When in doubt, find the canonical owner and import from it. diff --git a/.claude/skills/path-guard/SKILL.md b/.claude/skills/path-guard/SKILL.md new file mode 100644 index 000000000..11d0e5ba7 --- /dev/null +++ b/.claude/skills/path-guard/SKILL.md @@ -0,0 +1,248 @@ +--- +name: path-guard +description: Audit and fix path duplication in this Socket repo. Apply the strict "1 path, 1 reference" rule — every build/test/runtime/config path is constructed exactly once; everywhere else references the constructed value. Default mode finds and fixes; `check` mode reports only; `install` mode drops the gate + hook + rule into a fresh repo. +user-invocable: true +allowed-tools: Task, Bash, Read, Edit, Write, Grep, Glob, AskUserQuestion +--- + +# path-guard + +**Mantra: 1 path, 1 reference.** A path is constructed exactly once; everywhere else references the constructed value. Re-constructing the same path twice is the violation, not referencing the constructed value many times. + +## Modes + +- `/path-guard` — full audit-and-fix conversion of the current repo (default). +- `/path-guard check` — read-only audit, report violations, no fixes. +- `/path-guard fix ` — fix a single finding from a prior `check` run, by index. +- `/path-guard install` — drop the gate + hook + rule + allowlist into a fresh repo (for new Socket repos). + +## Three-level enforcement + +The strategy lives in three artifacts that ship together: + +1. **CLAUDE.md rule** — the mantra and detection rules in plain language. Every Socket repo's CLAUDE.md carries `## 1 path, 1 reference`. Synced from `.claude/skills/_shared/path-guard-rule.md`. +2. **Hook** — `.claude/hooks/path-guard/index.mts` runs `PreToolUse` on `Edit`/`Write` of `.mts`/`.cts` files. Blocks new violations at edit time. Mandatory across the fleet. +3. **Gate** — `scripts/check-paths.mts` runs in `pnpm check` (and CI). Whole-repo scan. Fails the build on any unsanctioned violation. + +This skill is the *audit-and-fix workflow* that makes a repo conform initially and validates conformance over time. + +## Detection rules + +The gate enforces six rules. The hook enforces a subset (A and B) since it sees only one diff at a time. + +| Rule | What it catches | Where checked | +|---|---|---| +| **A** | Multi-stage `path.join(...)` constructed inline. Two or more "stage" segments (Final, Release, Stripped, Compressed, Optimized, Synced, wasm, downloaded), or one stage + build-root + mode. | `.mts`/`.cts` files outside a `paths.mts`. Hook + gate. | +| **B** | Cross-package traversal: `path.join(*, '..', '', 'build', ...)` reaching into a sibling's output instead of importing via `exports`. | `.mts`/`.cts` files. Hook + gate. | +| **C** | Workflow YAML constructs the same path string in 2+ steps outside a "Compute paths" step. | `.github/workflows/*.yml`. Gate. | +| **D** | Comment encodes a fully-qualified multi-stage path string (e.g. `# build/dev/darwin-arm64/out/Final/binary`). | `.github/workflows/*.yml`. Gate. | +| **F** | Same path shape constructed in 2+ different files. | All scanned files. Gate. | +| **G** | Hand-built multi-stage path constructed 2+ times in the same Makefile/Dockerfile/shell stage. | `Makefile`, `*.mk`, `*.Dockerfile`, `Dockerfile.*`, `*.sh`. Gate. | + +Comments may describe path *structure* with placeholders (`/` or `${BUILD_MODE}/${PLATFORM_ARCH}`) but should not encode a complete literal path string. Code execution takes priority over docs: violations in `.mts`, Makefiles, Dockerfiles, workflow YAML, shell scripts are blocking. + +## Mode: audit-and-fix (default) + +When invoked as `/path-guard` with no arg: + +1. **Setup** — spawn a worktree off `main` per `CLAUDE.md` parallel-sessions rule: + ```bash + git worktree add -b paths-audit ../-paths-audit main + cd ../-paths-audit + ``` + +2. **Audit** — run the gate to enumerate findings: + ```bash + pnpm run check:paths --json > /tmp/paths-findings.json + pnpm run check:paths --explain # human-readable + ``` + +3. **Fix loop** — for each finding, apply the matching pattern below. After each fix, re-run the gate. Stop iterating when `pnpm run check:paths` exits 0. + +4. **Verify** — run the full check suite + zizmor on any modified workflow: + ```bash + pnpm check + for w in .github/workflows/*.yml; do zizmor "$w"; done + ``` + +5. **Commit and push** — group fixes by logical category (workflows, code, Dockerfiles). Push directly to `main` for repos that allow direct push, or open a PR for repos that require it (socket-cli, socket-sdk-js, socket-registry per their CLAUDE.md / memory entries). + +## Fix patterns + +### Rule A — Multi-stage path constructed inline (in `.mts`/`.cts`) + +**Bad**: +```ts +const finalBinary = path.join(PACKAGE_ROOT, 'build', BUILD_MODE, PLATFORM_ARCH, 'out', 'Final', 'binary') +``` + +**Fix**: move the construction into the package's `scripts/paths.mts` (or `lib/paths.mts`), or use a build-infra helper: +```ts +// In packages/foo/scripts/paths.mts: +export function getBuildPaths(mode, platformArch) { + // ... constructs once ... + return { outputFinalBinary: path.join(PACKAGE_ROOT, 'build', mode, platformArch, 'out', 'Final', binaryName) } +} + +// In the consumer: +import { getBuildPaths } from './paths.mts' +const { outputFinalBinary } = getBuildPaths(mode, platformArch) +``` + +For binsuite tools (binpress/binflate/binject) the canonical helper is `getFinalBinaryPath(packageRoot, mode, platformArch, binaryName)` from `build-infra/lib/paths`. For download caches use `getDownloadedDir(packageRoot)`. + +### Rule B — Cross-package traversal + +**Bad**: +```ts +const liefDir = path.join(PACKAGE_ROOT, '..', 'lief-builder', 'build', mode, platformArch, 'out', 'Final', 'lief') +``` + +**Fix**: declare the workspace dep, expose `paths.mts` via the producer's `exports`, import the helper: + +1. In producer's `package.json`: + ```json + "exports": { + "./scripts/paths": "./scripts/paths.mts" + } + ``` +2. In consumer's `package.json` `dependencies`: + ```json + "lief-builder": "workspace:*" + ``` +3. In consumer: + ```ts + import { getBuildPaths as getLiefBuildPaths } from 'lief-builder/scripts/paths' + const { outputFinalDir } = getLiefBuildPaths(mode, platformArch) + ``` + +### Rule C — Workflow path repetition + +**Bad** (3 steps each rebuilding the same path): +```yaml +- name: Step A + run: cd packages/foo/build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final && do-thing-1 +- name: Step B + run: cd packages/foo/build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final && do-thing-2 +- name: Step C + run: cd packages/foo/build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final && do-thing-3 +``` + +**Fix**: add a "Compute paths" step early in the job that constructs the path once, expose via `$GITHUB_OUTPUT`, reference downstream: + +```yaml +- name: Compute foo paths + id: paths + env: + BUILD_MODE: ${{ steps.build-mode.outputs.mode }} + PLATFORM_ARCH: ${{ steps.platform-arch.outputs.platform_arch }} + run: | + PACKAGE_DIR="packages/foo" + PLATFORM_BUILD_DIR="${PACKAGE_DIR}/build/${BUILD_MODE}/${PLATFORM_ARCH}" + FINAL_DIR="${PLATFORM_BUILD_DIR}/out/Final" + { + echo "package_dir=${PACKAGE_DIR}" + echo "platform_build_dir=${PLATFORM_BUILD_DIR}" + echo "final_dir=${FINAL_DIR}" + } >> "$GITHUB_OUTPUT" + +- name: Step A + env: + FINAL_DIR: ${{ steps.paths.outputs.final_dir }} + run: cd "$FINAL_DIR" && do-thing-1 +# ... etc +``` + +For paths used inside `working-directory: packages/foo` steps, expose a `_rel` companion (e.g. `final_dir_rel=build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final`) and reference that. + +### Rule D — Comment-encoded paths + +**Bad**: +```yaml +# Path: packages/foo/build/dev/darwin-arm64/out/Final/binary +COPY --from=builder /build/.../out/Final/binary /out/Final/binary +``` + +**Fix**: cite the canonical `paths.mts` instead of duplicating the string: +```yaml +# Layout owned by packages/foo/scripts/paths.mts:getBuildPaths(). +COPY --from=builder /build/packages/foo/build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final/binary /out/Final/binary +``` + +The comment may describe structure (`/`) but should not be a parsable literal path. + +### Rule G — Dockerfile/Makefile/shell duplicate construction + +**Bad** (Dockerfile reconstructs the path 3 times in the same stage): +```dockerfile +RUN mkdir -p build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final && \ + cp src build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final/output && \ + ls build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final/ +``` + +**Fix**: declare an `ENV` once, reference everywhere: +```dockerfile +# Layout owned by packages/foo/scripts/paths.mts. +ENV FINAL_DIR=build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final +RUN mkdir -p "$FINAL_DIR" && cp src "$FINAL_DIR/output" && ls "$FINAL_DIR/" +``` + +Each Dockerfile `FROM` stage is its own scope — ENV from the build stage doesn't reach a subsequent `FROM scratch AS export` stage. The gate accounts for this. + +## Mode: check (read-only) + +When invoked as `/path-guard check`: + +```bash +pnpm run check:paths --explain +``` + +Print the gate's findings without making any edits. Exit 0 if clean, 1 if findings present. Useful for CI / pre-merge inspection. + +## Allowlisting a finding + +When a genuine exemption is needed (rare — most "false positives" should be reported as gate bugs), add an entry to `.github/paths-allowlist.yml`. Two ways to pin the entry to a specific site: + +- **`line:`** — exact line number. Strict; a single-line edit above shifts the entry off-target and the finding re-surfaces. +- **`snippet_hash:`** — 12-char SHA-256 prefix of the offending snippet (whitespace-normalized). Drift-resistant: survives reformatting, but any content-changing edit invalidates it. Get the hash: + ```bash + pnpm run check:paths --show-hashes + ``` + +Both may be set — either matching is sufficient. Prefer `snippet_hash` over raw `line:` when the exemption is expected to outlive routine reformatting; prefer `line:` when you specifically *want* the entry to fall off after any nearby edit. + +## Mode: install (new repo) + +When invoked as `/path-guard install` on a Socket repo that doesn't yet have the gate: + +1. Copy the gate file from this skill's reference dir: + ```bash + cp .claude/skills/path-guard/reference/check-paths.mts.tmpl scripts/check-paths.mts + ``` +2. Copy the empty allowlist: + ```bash + cp .claude/skills/path-guard/reference/paths-allowlist.yml.tmpl .github/paths-allowlist.yml + ``` +3. Add `"check:paths": "node scripts/check-paths.mts"` to `package.json`. +4. Wire `runPathHygieneCheck()` into `scripts/check.mts` (after the existing checks). +5. Append the rule snippet from `.claude/skills/_shared/path-guard-rule.md` to the repo's `CLAUDE.md` if a `1 path, 1 reference` section is missing. +6. Add the hook entry to `.claude/settings.json` `PreToolUse` matcher `Edit|Write`: + ```json + { "type": "command", "command": "node .claude/hooks/path-guard/index.mts" } + ``` +7. Run the gate against the repo. Triage findings as you would in audit-and-fix mode. + +## Tie-in with quality-scan + +The `/quality-scan` skill should call `pnpm run check:paths --json` as one of its sub-scans and surface findings as part of its A-F graded report. Failures roll into the overall quality grade. The full audit-and-fix workflow lives here; quality-scan just *detects* during periodic scans. + +## Reference patterns + +When converting a repo to the strategy, the patterns I keep reusing: + +- **TS-first packages**: each package owns a `scripts/paths.mts` with `PACKAGE_ROOT`, `BUILD_ROOT`, `getBuildPaths(mode, platformArch)` returning at minimum `outputFinalDir` and `outputFinalBinary`/`outputFinalFile`. +- **Cross-package consumers**: `package.json` `exports` whitelists `./scripts/paths`. Consumer adds `": workspace:*"` and imports. +- **Workflows**: each job has a "Compute paths" step (`id: paths`) early in the job. Step outputs include `package_dir`, `platform_build_dir`, `final_dir`, named files. `_rel` companions when `working-directory:` is used. +- **Docker stages**: each `FROM` stage declares `ENV PLATFORM_BUILD_DIR=...` and `ENV FINAL_DIR=...` once. Subsequent RUN steps reference the variables. + +The first repo (socket-btm) is the worked example. Read its `scripts/paths.mts` files and `.github/workflows/*.yml` for canonical patterns when applying the strategy elsewhere. diff --git a/.claude/skills/path-guard/reference/check-paths.mts.tmpl b/.claude/skills/path-guard/reference/check-paths.mts.tmpl new file mode 100644 index 000000000..023b6ce14 --- /dev/null +++ b/.claude/skills/path-guard/reference/check-paths.mts.tmpl @@ -0,0 +1,946 @@ +#!/usr/bin/env node +/** + * @fileoverview Path-hygiene gate. + * + * Mantra: 1 path, 1 reference. A path is constructed exactly once; + * everywhere else references the constructed value. + * + * Whole-repo scan complementing the per-edit `.claude/hooks/path-guard` + * hook. The hook stops new violations from landing; this gate finds + * the existing ones and blocks merges that introduce more. + * + * Rules enforced: + * + * A — Multi-stage path constructed inline. A `path.join(...)` call + * (or template literal) in a `.mts`/`.cts` file outside a + * `paths.mts` that stitches together two or more "stage" + * segments (Final, Release, Stripped, Compressed, Optimized, + * Synced, wasm, downloaded), or one stage plus a build-root + * (`build`/`out`) plus a mode (`dev`/`prod`/`shared`). The + * construction belongs in the package's `paths.mts` (or a + * build-infra helper); every consumer imports the computed + * value. + * + * B — Cross-package path traversal. A `path.join(*, '..', '', 'build', ...)` reaches into a sibling's build + * output without going through its `exports`. The sibling owns + * its layout; consumers declare a workspace dep and import the + * sibling's `paths.mts`. + * + * C — Hand-built workflow path. A `.github/workflows/*.yml` step + * constructs `build/${...}/out//...` inline outside a + * canonical "Compute paths" step. Workflows can carry path + * strings, but the strings are constructed once and exposed via + * step outputs / job env that downstream steps reference. + * + * D — Comment-encoded paths. Comments (in code or YAML) that re-state + * a fully-qualified multi-stage path. Comments may describe the + * structure ("Final dir" or "build//...") but should not + * encode a complete path string that a tool would parse — the + * canonical construction IS the documentation. + * + * F — Same path constructed in multiple places. The same shape of + * multi-stage `path.join(...)` (or workflow `build/${...}/...` + * string template) appearing in two or more files. Construct + * once and import; references of the constructed value are + * unlimited. + * + * G — Hand-built paths in Makefiles, Dockerfiles, and shell scripts. + * Same shape as A, applied to executable artifacts that don't + * run TypeScript. Each canonical construction must carry a + * comment naming the source-of-truth `paths.mts` so the script + * can't drift from TS without a flagged change. + * + * Allowlist: `.github/paths-allowlist.yml`. Each entry needs a + * `reason` so the list stays audit-able. Patterns are deliberately + * narrow — entries should be specific, not blanket. + * + * Usage: + * node scripts/check-paths.mts # default: report + fail + * node scripts/check-paths.mts --explain # long-form explanation + * node scripts/check-paths.mts --json # machine-readable + * node scripts/check-paths.mts --quiet # silent on clean + * + * Exit codes: + * 0 — clean (no findings, or every finding is allowlisted) + * 1 — findings present + * 2 — gate itself crashed + */ + +import { createHash } from 'node:crypto' +import { existsSync, readFileSync, readdirSync } from 'node:fs' +import path from 'node:path' +import process from 'node:process' + +import { fileURLToPath } from 'node:url' + +import { parseArgs } from 'node:util' + +import { + BUILD_ROOT_SEGMENTS, + KNOWN_SIBLING_PACKAGES, + MODE_SEGMENTS, + STAGE_SEGMENTS, +} from '../.claude/hooks/path-guard/segments.mts' + +// Plain stderr/stdout output — no @socketsecurity/lib dependency so +// the gate is self-contained and works in socket-lib itself (which +// would otherwise import itself). +const logger = { + log: (msg: string) => process.stdout.write(msg + '\n'), + error: (msg: string) => process.stderr.write(msg + '\n'), + step: (msg: string) => process.stdout.write(`→ ${msg}\n`), + success: (msg: string) => process.stdout.write(`✔ ${msg}\n`), + substep: (msg: string) => process.stdout.write(` ${msg}\n`), +} + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) +const REPO_ROOT = path.resolve(__dirname, '..') + +// Stage / build-root / mode / sibling-package vocabularies are imported +// from `.claude/hooks/path-guard/segments.mts` (the canonical source). +// Both this gate and the path-guard hook share that single definition +// — Mantra: 1 path, 1 reference. + +// File-path patterns that legitimately enumerate path segments. +const EXEMPT_FILE_PATTERNS: RegExp[] = [ + // Any paths.mts is the canonical constructor. + /(^|\/)paths\.(mts|cts|js)$/, + // Build-infra owns shared helpers that enumerate stages. + /packages\/build-infra\/lib\/paths\.mts$/, + /packages\/build-infra\/lib\/constants\.mts$/, + // Path-scanning gates that intentionally enumerate. + /scripts\/check-paths\.mts$/, + /scripts\/check-consistency\.mts$/, + /\.claude\/hooks\/path-guard\//, + // Allowlist + config files. + /\.github\/paths-allowlist\.yml$/, +] + +type Finding = { + rule: 'A' | 'B' | 'C' | 'D' | 'F' | 'G' + file: string + line: number + snippet: string + message: string + fix: string +} + +const findings: Finding[] = [] + +const args = parseArgs({ + options: { + explain: { type: 'boolean', default: false }, + json: { type: 'boolean', default: false }, + quiet: { type: 'boolean', default: false }, + 'show-hashes': { type: 'boolean', default: false }, + }, + strict: false, +}) + +const isExempt = (filePath: string): boolean => + EXEMPT_FILE_PATTERNS.some(re => re.test(filePath)) + +// ────────────────────────────────────────────────────────────────── +// Allowlist loading +// ────────────────────────────────────────────────────────────────── + +type AllowlistEntry = { + file?: string + pattern?: string + rule?: string + line?: number + snippet_hash?: string + reason: string +} + +const loadAllowlist = (): AllowlistEntry[] => { + const allowlistPath = path.join(REPO_ROOT, '.github', 'paths-allowlist.yml') + if (!existsSync(allowlistPath)) { + return [] + } + const text = readFileSync(allowlistPath, 'utf8') + // Tiny YAML parser — only the shape we need: list of entries with + // `file`, `pattern`, `rule`, `line`, `reason` scalar fields, plus + // YAML 1.2 block-scalar indicators `|` (literal) and `>` (folded) + // for multi-line reasons. Avoids a yaml dep for a gate that has to + // be self-contained. + const entries: AllowlistEntry[] = [] + let current: Partial | null = null + // When set, subsequent more-indented lines fold into this key as a + // block scalar (literal '|' keeps newlines, folded '>' joins with + // spaces). + let blockKey: string | null = null + let blockKind: '|' | '>' | null = null + let blockIndent = 0 + let blockLines: string[] = [] + const flushBlock = () => { + if (current && blockKey) { + const value = + blockKind === '>' + ? blockLines.join(' ').replace(/\s+/g, ' ').trim() + : blockLines.join('\n').replace(/\n+$/, '') + ;(current as any)[blockKey] = value + } + blockKey = null + blockKind = null + blockLines = [] + } + const indentOf = (line: string): number => { + let i = 0 + while (i < line.length && line[i] === ' ') { + i += 1 + } + return i + } + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const raw = lines[i]! + const line = raw.replace(/\r$/, '') + // Block-scalar accumulation takes precedence over normal parsing. + if (blockKey !== null) { + if (line.trim() === '') { + // Preserve blank lines inside a literal block; folded blocks + // turn them into paragraph breaks (kept as separate joins). + blockLines.push('') + continue + } + const indent = indentOf(line) + if (indent >= blockIndent) { + blockLines.push(line.slice(blockIndent)) + continue + } + flushBlock() + // Fall through and re-process the dedented line as normal. + } + if (!line.trim() || line.trim().startsWith('#')) { + continue + } + const tryAssign = (key: string, value: string) => { + const trimmed = value.trim() + if (current === null) { + return + } + if (trimmed === '|' || trimmed === '>') { + blockKey = key + blockKind = trimmed as '|' | '>' + blockIndent = indentOf(lines[i + 1] ?? '') || indentOf(line) + 2 + blockLines = [] + return + } + ;(current as any)[key] = key === 'line' ? Number(unquote(trimmed)) : unquote(trimmed) + } + if (line.startsWith('- ')) { + if (current && current.reason) { + entries.push(current as AllowlistEntry) + } + current = {} + const rest = line.slice(2).trim() + if (rest) { + const m = rest.match(/^([\w-]+):\s*(.*)$/) + if (m) { + tryAssign(m[1]!, m[2]!) + } + } + } else if (current) { + const m = line.match(/^\s+([\w-]+):\s*(.*)$/) + if (m) { + tryAssign(m[1]!, m[2]!) + } + } + } + if (blockKey !== null) { + flushBlock() + } + if (current && current.reason) { + entries.push(current as AllowlistEntry) + } + return entries +} + +const unquote = (s: string): string => { + const t = s.trim() + if ( + (t.startsWith('"') && t.endsWith('"')) || + (t.startsWith("'") && t.endsWith("'")) + ) { + return t.slice(1, -1) + } + return t +} + +const ALLOWLIST = loadAllowlist() + +/** + * Stable, normalized snippet hash. Whitespace-insensitive so trivial + * reformatting (indent change, trailing comma, line wrap) doesn't + * invalidate an allowlist entry, but content-changing edits do. The + * hash exposes only the first 12 hex chars (~48 bits) which is plenty + * for collision-resistance within a single repo's finding set and + * keeps the YAML readable. + */ +const snippetHash = (snippet: string): string => { + const normalized = snippet.replace(/\s+/g, ' ').trim() + return createHash('sha256').update(normalized).digest('hex').slice(0, 12) +} + +/** + * Allowlist matching trades off two failure modes: + * + * - Drift via reformatting (a line shift breaks an entry, the + * finding re-surfaces, devs paper over with a new entry). + * - Stealth allowlisting (an entry pinned to "anywhere in this file" + * silently exempts unrelated future violations). + * + * Strategy: exact line match OR `snippet_hash` match (whitespace- + * normalized SHA-256, first 12 hex). Either is sufficient. Lines stay + * exact (was ±2; the slack let reformatting silently slide), and + * `snippet_hash` provides reformatting-tolerant matching that's still + * tied to the literal text — paste-and-edit cheating would change the + * hash. If neither `line` nor `snippet_hash` is provided, the entry + * matches purely by `rule` + `file` + `pattern` (file-level exempt; + * use sparingly and always pair with a precise `pattern`). + */ +const isAllowlisted = (finding: Finding): boolean => + ALLOWLIST.some(entry => { + if (entry.rule && entry.rule !== finding.rule) { + return false + } + if (entry.file && !finding.file.includes(entry.file)) { + return false + } + if (entry.pattern && !finding.snippet.includes(entry.pattern)) { + return false + } + const lineProvided = entry.line !== undefined + const hashProvided = + typeof entry.snippet_hash === 'string' && entry.snippet_hash.length > 0 + if (lineProvided || hashProvided) { + const lineMatches = + lineProvided && entry.line === finding.line + const hashMatches = + hashProvided && entry.snippet_hash === snippetHash(finding.snippet) + if (!(lineMatches || hashMatches)) { + return false + } + } + return true + }) + +// ────────────────────────────────────────────────────────────────── +// File walking +// ────────────────────────────────────────────────────────────────── + +const SKIP_DIRS = new Set([ + '.git', + 'node_modules', + 'build', + 'dist', + 'out', + 'target', + '.cache', + 'upstream', +]) + +const walk = function* ( + dir: string, + filter: (relPath: string) => boolean, +): Generator { + let entries + try { + entries = readdirSync(dir, { withFileTypes: true }) + } catch { + return + } + for (const e of entries) { + if (SKIP_DIRS.has(e.name)) { + continue + } + const full = path.join(dir, e.name) + const rel = path.relative(REPO_ROOT, full) + if (e.isDirectory()) { + yield* walk(full, filter) + } else if (e.isFile() && filter(rel)) { + yield rel + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Rule A + B: code scan (.mts / .cts) +// ────────────────────────────────────────────────────────────────── + +// Locate `path.join(` or `path.resolve(` call sites; argument-list +// extraction uses a paren-balancing scanner below to handle arbitrary +// nesting depth (the previous regex-only approach silently missed any +// argument containing 2+ levels of nested function calls). +const PATH_CALL_RE = /\bpath\.(?:join|resolve)\s*\(/g +const STRING_LITERAL_RE = /(['"])((?:\\.|(?!\1)[^\\])*)\1/g + +// Template literal scanner. Captures backtick-delimited strings +// (including those with `${...}` placeholders) so Rule A also catches +// path construction via template literals like +// `${buildDir}/out/Final/${binary}` or `build/${mode}/out/Final`. +const TEMPLATE_LITERAL_RE = /`((?:\\.|(?:\$\{(?:[^{}]|\{[^{}]*\})*\})|(?!`)[^\\])*)`/g + +/** + * Convert a template-literal body into a synthetic forward-slash path + * by replacing `${...}` placeholders with a sentinel and normalizing + * separators. Returns the sequence of path segments split on `/`. The + * sentinel doesn't match any STAGE/BUILD_ROOT/MODE token, so a + * placeholder-only segment (`${binaryName}`) won't match those sets. + */ +const templateLiteralSegments = (body: string): string[] => { + // Strip placeholders so they don't introduce noise in segments. + // Empty result for a placeholder is fine; downstream filters by set + // membership and skips empties. + const stripped = body.replace(/\$\{(?:[^{}]|\{[^{}]*\})*\}/g, '\x00') + return stripped.split('/').filter(seg => seg.length > 0 && seg !== '\x00') +} + +/** + * Extract every `path.join(...)` and `path.resolve(...)` call from the + * source text, returning each call's literal start offset and argument + * substring. Uses paren-balancing so deeply-nested arguments like + * `path.join(getDir(child(x)), 'build', 'Final')` are captured fully. + */ +const extractPathCalls = ( + source: string, +): Array<{ offset: number; args: string }> => { + const calls: Array<{ offset: number; args: string }> = [] + PATH_CALL_RE.lastIndex = 0 + let match: RegExpExecArray | null + while ((match = PATH_CALL_RE.exec(source)) !== null) { + const callStart = match.index + const argsStart = PATH_CALL_RE.lastIndex + let depth = 1 + let i = argsStart + let inString: '"' | "'" | '`' | null = null + while (i < source.length && depth > 0) { + const ch = source[i]! + if (inString) { + if (ch === '\\') { + i += 2 + continue + } + if (ch === inString) { + inString = null + } + } else { + if (ch === '"' || ch === "'" || ch === '`') { + inString = ch + } else if (ch === '(') { + depth += 1 + } else if (ch === ')') { + depth -= 1 + if (depth === 0) { + break + } + } + } + i += 1 + } + if (depth === 0) { + calls.push({ offset: callStart, args: source.slice(argsStart, i) }) + PATH_CALL_RE.lastIndex = i + 1 + } + } + return calls +} + +const extractStringLiterals = (args: string): string[] => { + const literals: string[] = [] + let match: RegExpExecArray | null + STRING_LITERAL_RE.lastIndex = 0 + while ((match = STRING_LITERAL_RE.exec(args)) !== null) { + if (match[2] !== undefined) { + literals.push(match[2]) + } + } + return literals +} + +const scanCodeFile = (relPath: string): void => { + const full = path.join(REPO_ROOT, relPath) + let content: string + try { + content = readFileSync(full, 'utf8') + } catch { + return + } + const lines = content.split('\n') + // Build a line-offset map so we can map regex offsets back to line + // numbers cheaply. + const lineOffsets: number[] = [0] + for (let i = 0; i < content.length; i++) { + if (content[i] === '\n') { + lineOffsets.push(i + 1) + } + } + const offsetToLine = (offset: number): number => { + let lo = 0 + let hi = lineOffsets.length - 1 + while (lo < hi) { + const mid = (lo + hi + 1) >>> 1 + if (lineOffsets[mid]! <= offset) { + lo = mid + } else { + hi = mid - 1 + } + } + return lo + 1 + } + + for (const call of extractPathCalls(content)) { + const literals = extractStringLiterals(call.args) + const stages = literals.filter(l => STAGE_SEGMENTS.has(l)) + const buildRoots = literals.filter(l => BUILD_ROOT_SEGMENTS.has(l)) + const modes = literals.filter(l => MODE_SEGMENTS.has(l)) + + // Rule A: 2+ stages OR (1 stage + 1 build-root + 1 mode). + const triggersA = + stages.length >= 2 || + (stages.length >= 1 && buildRoots.length >= 1 && modes.length >= 1) + if (triggersA) { + const line = offsetToLine(call.offset) + const snippet = (lines[line - 1] ?? '').trim() + findings.push({ + rule: 'A', + file: relPath, + line, + snippet, + message: 'Multi-stage path constructed inline (outside paths.mts).', + fix: 'Construct in the owning paths.mts (or use getFinalBinaryPath / getDownloadedDir from build-infra/lib/paths). Import the computed value here.', + }) + } + + // Rule B: each '..' opens a window; the window stays open only + // until the next non-'..' literal. A sibling-package literal + // *immediately after* a '..' (no path segment between them) + // triggers, AND there must be build context elsewhere in the + // call. Resetting per-segment prevents false positives where '..' + // appears earlier and sibling-name appears much later in an + // unrelated position. + const hasBuildContext = literals.some( + l => BUILD_ROOT_SEGMENTS.has(l) || STAGE_SEGMENTS.has(l), + ) + if (hasBuildContext) { + for (let i = 0; i < literals.length - 1; i++) { + if ( + literals[i] === '..' && + KNOWN_SIBLING_PACKAGES.has(literals[i + 1]!) + ) { + const sibling = literals[i + 1]! + const line = offsetToLine(call.offset) + const snippet = (lines[line - 1] ?? '').trim() + findings.push({ + rule: 'B', + file: relPath, + line, + snippet, + message: `Cross-package traversal into '${sibling}' build output.`, + fix: `Add '${sibling}: workspace:*' as a dep, declare an exports entry on '${sibling}' (e.g. './scripts/paths' → './scripts/paths.mts'), and import the path from there.`, + }) + break + } + } + } + } + + // Rule A (template literal variant). Backtick strings like + // `${buildDir}/out/Final/${binary}` or `build/${mode}/${arch}/out/Final` + // construct paths the same way `path.join(...)` does — flag the + // same shapes. Skip raw imports / template tag positions by + // filtering out leading `import.meta.url`-style / tag positions + // implicitly: TEMPLATE_LITERAL_RE matches any backtick string and + // we rely on segment composition to decide if it's a path. + TEMPLATE_LITERAL_RE.lastIndex = 0 + let tmpl: RegExpExecArray | null + while ((tmpl = TEMPLATE_LITERAL_RE.exec(content)) !== null) { + const body = tmpl[1] ?? '' + if (!body.includes('/')) { + continue + } + const segments = templateLiteralSegments(body) + const stages = segments.filter(s => STAGE_SEGMENTS.has(s)) + const buildRoots = segments.filter(s => BUILD_ROOT_SEGMENTS.has(s)) + const modes = segments.filter(s => MODE_SEGMENTS.has(s)) + // Template literal trigger is tighter than path.join() because + // backtick strings often appear in patch fixtures, error messages, + // and other multi-line content that incidentally contains stage + // tokens like `wasm`. Require the canonical build-output shape: + // - 'build' + 'out' + stage (canonical multi-stage layout), OR + // - 2+ stage segments AND 'out' (e.g. `wasm/out/Final`), OR + // - 'build' + stage + literal mode (back-compat with path.join). + const hasBuildAndOut = + buildRoots.includes('build') && buildRoots.includes('out') + const hasOut = buildRoots.includes('out') + const hasBuild = buildRoots.includes('build') + const triggersA = + (hasBuildAndOut && stages.length >= 1) || + (stages.length >= 2 && hasOut) || + (hasBuild && stages.length >= 1 && modes.length >= 1) + if (triggersA) { + const line = offsetToLine(tmpl.index) + const snippet = (lines[line - 1] ?? '').trim() + findings.push({ + rule: 'A', + file: relPath, + line, + snippet, + message: + 'Multi-stage path constructed inline via template literal (outside paths.mts).', + fix: 'Construct in the owning paths.mts (or use getFinalBinaryPath / getDownloadedDir from build-infra/lib/paths). Import the computed value here.', + }) + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Rule C + D: workflow YAML scan +// ────────────────────────────────────────────────────────────────── + +const WORKFLOW_PATH_RE = + /build\/\$\{[^}]+\}\/[^"'`\s]*\/out\/(?:Final|Release|Stripped|Compressed|Optimized|Synced)/g +const WORKFLOW_GH_EXPR_PATH_RE = + /build\/\$\{\{\s*[^}]+\}\}\/[^"'`\s]*\/out\/(?:Final|Release|Stripped|Compressed|Optimized|Synced)/g + +const isInsideComputePathsBlock = ( + lines: string[], + lineIdx: number, +): boolean => { + // Walk backwards up to 60 lines looking for the start of the + // current step. If that step is a "Compute paths" step, the line + // is exempt. + for (let i = lineIdx; i >= Math.max(0, lineIdx - 60); i--) { + const l = lines[i] ?? '' + if (/^\s*-\s*name:/i.test(l)) { + // Step boundary — check if THIS step is a Compute paths step. + // The step body may include `id: paths` even if the name is + // something else (e.g. `id: stub-paths`), so look at the next + // ~20 lines for either marker. + for (let j = i; j < Math.min(lines.length, i + 20); j++) { + const m = lines[j] ?? '' + if ( + /^\s*-\s*name:\s*Compute\s+[\w-]+\s+paths/i.test(m) || + /^\s*id:\s*[\w-]*paths\s*$/i.test(m) + ) { + return true + } + if (j > i && /^\s*-\s*name:/i.test(m)) { + // Hit the next step — current step is NOT Compute paths. + return false + } + } + return false + } + } + return false +} + +const scanWorkflowFile = (relPath: string): void => { + const full = path.join(REPO_ROOT, relPath) + let content: string + try { + content = readFileSync(full, 'utf8') + } catch { + return + } + const lines = content.split('\n') + + // First pass: collect every hand-built path occurrence outside a + // "Compute paths" step. Per the mantra, a single reference is fine + // — what's banned is reconstructing the same path 2+ times. + type PathHit = { + line: number + snippet: string + pathStr: string + } + const occurrences = new Map() + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (/^\s*#/.test(line)) { + // Skip comment lines from C scan; they're under D below. + continue + } + if (isInsideComputePathsBlock(lines, i)) { + // Inside the canonical construction step — exempt. + continue + } + WORKFLOW_PATH_RE.lastIndex = 0 + WORKFLOW_GH_EXPR_PATH_RE.lastIndex = 0 + const matches: string[] = [] + let m: RegExpExecArray | null + while ((m = WORKFLOW_PATH_RE.exec(line)) !== null) { + matches.push(m[0]) + } + while ((m = WORKFLOW_GH_EXPR_PATH_RE.exec(line)) !== null) { + matches.push(m[0]) + } + for (const pathStr of matches) { + const list = occurrences.get(pathStr) ?? [] + list.push({ line: i + 1, snippet: line.trim(), pathStr }) + occurrences.set(pathStr, list) + } + } + + // Flag every occurrence of a shape that appears 2+ times. + for (const [pathStr, hits] of occurrences) { + if (hits.length < 2) { + continue + } + for (const hit of hits) { + findings.push({ + rule: 'C', + file: relPath, + line: hit.line, + snippet: hit.snippet, + message: `Workflow constructs the same path ${hits.length} times: ${pathStr}`, + fix: 'Add a "Compute paths" step (id: paths) early in the job that computes this path ONCE and exposes it via $GITHUB_OUTPUT. Reference as ${{ steps.paths.outputs. }} in subsequent steps. References of the constructed value are unlimited; reconstructing is the violation.', + }) + } + } + + // Rule D: comments encoding a fully-qualified multi-stage path + // (separate scan since it has different semantics). + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (!/^\s*#/.test(line)) { + continue + } + const literalShape = + /build\/(?:dev|prod|shared)\/[a-z0-9-]+\/(?:wasm\/)?out\/(?:Final|Release|Stripped|Compressed|Optimized|Synced)/i + if (literalShape.test(line)) { + findings.push({ + rule: 'D', + file: relPath, + line: i + 1, + snippet: line.trim(), + message: 'Comment encodes a fully-qualified path string.', + fix: 'Cite the canonical paths.mts (e.g. "see packages//scripts/paths.mts:getBuildPaths()") instead of duplicating the path string. Comments may describe structure with placeholders ("/") but should not be a parsable path.', + }) + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Rule G: Makefile / Dockerfile / shell scan +// ────────────────────────────────────────────────────────────────── + +const SCRIPT_HAND_BUILT_RE = + /build\/\$?\{?(?:BUILD_MODE|MODE|prod|dev)\}?\/[\w${}.-]*\/out\/(?:Final|Release|Stripped|Compressed|Optimized|Synced)/g + +const scanScriptFile = (relPath: string): void => { + const full = path.join(REPO_ROOT, relPath) + let content: string + try { + content = readFileSync(full, 'utf8') + } catch { + return + } + const lines = content.split('\n') + const isDockerfile = + /Dockerfile/i.test(relPath) || /\.glibc$|\.musl$/.test(relPath) + + // First pass: collect every multi-stage path occurrence in this file, + // scoped per Dockerfile stage (each `FROM ... AS ...` starts a new + // scope where ENV/ARG don't propagate). + type Hit = { line: number; text: string; pathStr: string; stage: number } + const hits: Hit[] = [] + let stage = 0 + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (/^\s*#/.test(line)) { + // Skip comments — documentation, not construction. + continue + } + if (isDockerfile && /^FROM\s+/i.test(line)) { + stage += 1 + continue + } + SCRIPT_HAND_BUILT_RE.lastIndex = 0 + let m: RegExpExecArray | null + while ((m = SCRIPT_HAND_BUILT_RE.exec(line)) !== null) { + hits.push({ + line: i + 1, + text: line.trim(), + pathStr: m[0], + stage, + }) + } + } + + // Group by (stage, pathStr) — only flag when a path is built 2+ + // times within the SAME Dockerfile stage (or anywhere in non- + // Dockerfile scripts, where stages don't apply). + const grouped = new Map() + for (const h of hits) { + const key = `${h.stage}::${h.pathStr}` + const list = grouped.get(key) ?? [] + list.push(h) + grouped.set(key, list) + } + for (const [, list] of grouped) { + if (list.length < 2) { + continue + } + for (const hit of list) { + findings.push({ + rule: 'G', + file: relPath, + line: hit.line, + snippet: hit.text, + message: `Hand-built multi-stage path constructed ${list.length} times in this file: ${hit.pathStr}`, + fix: 'Assign to a variable / ENV once near the top of the script / Dockerfile stage, with a comment naming the canonical paths.mts. Reference the variable everywhere downstream. References of a single construction are unlimited; reconstructing the same path is the violation.', + }) + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Rule F: cross-file path repetition +// ────────────────────────────────────────────────────────────────── + +const checkRuleF = (): void => { + // A path is "constructed" each time we see a new path.join with a + // matching shape. Group findings of Rule A by their snippet shape; + // when the same shape appears in 2+ files, demote them to Rule F so + // the message is more accurate. + const byShape = new Map() + for (const f of findings) { + if (f.rule !== 'A') { + continue + } + // Normalize: strip whitespace, identifiers, surrounding context; + // keep just the literal path-segment shape. + const literalsRe = /'[^']*'|"[^"]*"/g + const literals = (f.snippet.match(literalsRe) ?? []).join(',') + if (!literals) { + continue + } + const list = byShape.get(literals) ?? [] + list.push(f) + byShape.set(literals, list) + } + for (const [shape, list] of byShape) { + if (list.length < 2) { + continue + } + // Promote each Rule-A finding in this group to Rule F so the + // message tells the reader the issue is cross-file repetition, + // not just a single hand-build. + for (const f of list) { + f.rule = 'F' + f.message = `Same path shape constructed in ${list.length} places: ${shape.slice(0, 100)}` + f.fix = + 'Construct this path ONCE in a paths.mts (or build-infra helper) and import the computed value. References of the computed variable are unlimited; re-constructing the same shape twice is the violation.' + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Main +// ────────────────────────────────────────────────────────────────── + +const main = (): number => { + // Scan code files (Rule A + B). + for (const rel of walk( + REPO_ROOT, + p => p.endsWith('.mts') || p.endsWith('.cts'), + )) { + if (isExempt(rel)) { + continue + } + scanCodeFile(rel) + } + // Scan workflows (Rule C + D). + const workflowDir = path.join(REPO_ROOT, '.github', 'workflows') + if (existsSync(workflowDir)) { + for (const rel of walk(workflowDir, p => p.endsWith('.yml'))) { + if (isExempt(rel)) { + continue + } + scanWorkflowFile(rel) + } + } + // Scan scripts/Makefiles/Dockerfiles (Rule G). + for (const rel of walk(REPO_ROOT, p => { + const base = path.basename(p) + return ( + base === 'Makefile' || + base.endsWith('.mk') || + base.endsWith('.Dockerfile') || + base === 'Dockerfile' || + base.endsWith('.glibc') || + base.endsWith('.musl') || + (base.endsWith('.sh') && !p.includes('test/')) + ) + })) { + if (isExempt(rel)) { + continue + } + scanScriptFile(rel) + } + // Promote cross-file Rule-A repeats to Rule F. + checkRuleF() + + // Filter against allowlist. + const blocking = findings.filter(f => !isAllowlisted(f)) + + if (args.values.json) { + process.stdout.write( + JSON.stringify( + { findings: blocking, allowlisted: findings.length - blocking.length }, + null, + 2, + ) + '\n', + ) + return blocking.length === 0 ? 0 : 1 + } + + if (blocking.length === 0) { + if (!args.values.quiet) { + logger.success('Path-hygiene check passed (1 path, 1 reference)') + if (findings.length > 0) { + logger.substep(`${findings.length} finding(s) allowlisted`) + } + } + return 0 + } + + logger.error(`Path-hygiene check FAILED — ${blocking.length} finding(s)`) + logger.log('') + logger.log('Mantra: 1 path, 1 reference') + logger.log('') + for (const f of blocking) { + logger.log(` [${f.rule}] ${f.file}:${f.line}`) + logger.log(` ${f.snippet}`) + logger.log(` → ${f.message}`) + if (args.values['show-hashes']) { + logger.log(` snippet_hash: ${snippetHash(f.snippet)}`) + } + if (args.values.explain) { + logger.log(` Fix: ${f.fix}`) + } + logger.log('') + } + if (!args.values.explain) { + logger.log('Run with --explain to see fix suggestions per finding.') + logger.log( + 'Add intentional exceptions to .github/paths-allowlist.yml with a `reason` field.', + ) + logger.log( + 'Run with --show-hashes to print the snippet_hash for each finding (drift-resistant allowlisting).', + ) + } + return 1 +} + +try { + process.exitCode = main() +} catch (e) { + logger.error(`Path-hygiene gate crashed: ${e}`) + process.exitCode = 2 +} diff --git a/.claude/skills/path-guard/reference/claude-md-rule.md b/.claude/skills/path-guard/reference/claude-md-rule.md new file mode 100644 index 000000000..3e32b1bae --- /dev/null +++ b/.claude/skills/path-guard/reference/claude-md-rule.md @@ -0,0 +1,29 @@ + + +## 1 path, 1 reference + +**A path is *constructed* exactly once. Everywhere else *references* the constructed value.** + +Referencing a single computed path many times is fine — that's the whole point of computing it once. What's banned is *re-constructing* the same path in multiple places, because that's where drift is born. + +Three concrete shapes: + +1. **Within a package** — every script, test, and lib file that needs a build path imports it from the package's `scripts/paths.mts` (or `lib/paths.mts`). No `path.join('build', mode, ...)` outside that module. + +2. **Across packages** — when package B consumes package A's output, B imports A's `paths.mts` via the workspace `exports` field. Never `path.join(PKG, '..', '', 'build', ...)`. The R28 yoga/ink bug — ink hand-building yoga's wasm path and missing the `wasm/` segment — is the canonical failure mode this rule prevents. + +3. **Workflows, Dockerfiles, shell scripts** — they can't `import` TS, so they construct the string once and reference it everywhere downstream. Workflows: a "Compute paths" step exposes `steps.paths.outputs.final_dir`; later steps read `${{ steps.paths.outputs.final_dir }}`. Dockerfiles/shell: assign once to a variable / `ENV`, reference by name thereafter. Each canonical construction carries a comment naming the source-of-truth `paths.mts`. **Re-building** the same path in a second step is the violation, not referring to the constructed value many times. + +Comments may describe path *structure* with placeholders ("`/`" or "`${BUILD_MODE}/${PLATFORM_ARCH}`") but should not encode a complete literal path string. Code execution takes priority over docs: violations in `.mts`/`.cts`, Makefiles, Dockerfiles, workflow YAML, and shell scripts are blocking. README and doc-comment violations are advisory unless they contain a fully-qualified path with no parametric placeholders. + +### Three-level enforcement + +- **Hook** — `.claude/hooks/path-guard/` blocks `Edit`/`Write` calls that would introduce a violation in a `.mts`/`.cts` file. Refusal at edit time stops new duplication from landing. +- **Gate** — `scripts/check-paths.mts` runs in `pnpm check`. Fails the build on any violation that isn't allowlisted in `.github/paths-allowlist.yml`. +- **Skill** — `/path-guard` audits the repo and fixes findings; `/path-guard check` reports only; `/path-guard install` drops the gate + hook + rule into a fresh repo. + +The mantra is intentionally short so it sticks: **1 path, 1 reference**. When in doubt, find the canonical owner and import from it. diff --git a/.claude/skills/path-guard/reference/paths-allowlist.yml.tmpl b/.claude/skills/path-guard/reference/paths-allowlist.yml.tmpl new file mode 100644 index 000000000..e2746660c --- /dev/null +++ b/.claude/skills/path-guard/reference/paths-allowlist.yml.tmpl @@ -0,0 +1,28 @@ +# Path-hygiene gate allowlist. +# Mantra: 1 path, 1 reference. +# +# Each entry exempts a specific finding from `scripts/check-paths.mts`. +# Entries MUST carry a `reason` so the list stays audit-able and +# entries can be removed when the underlying code changes. +# +# Schema (all top-level keys optional except `reason`): +# +# - rule: Rule letter (A, B, C, D, F, G). Omit to match any rule. +# file: Substring match against the relative file path. +# pattern: Substring match against the offending snippet. +# line: Line number; matches if within ±2 of the finding. +# reason: Why this site is genuinely exempt. Required. +# +# Prefer narrow entries (rule + file + line + pattern) over blanket +# `file:` entries that exempt the whole file. Genuine exemptions are +# rare — most "false positives" should be reported as gate bugs. +# +# Example: +# +# - rule: A +# file: packages/foo/scripts/legacy-build.mts +# line: 42 +# pattern: "path.join(testDir, 'out', 'Final')" +# reason: | +# legacy-build.mts is scheduled for removal in v2.0; refactoring +# its path construction now would conflict with the rewrite. diff --git a/.claude/skills/security-scan/SKILL.md b/.claude/skills/security-scan/SKILL.md index 7f2fd77e8..0c2cf12ed 100644 --- a/.claude/skills/security-scan/SKILL.md +++ b/.claude/skills/security-scan/SKILL.md @@ -2,6 +2,7 @@ name: security-scan description: Runs a multi-tool security scan — AgentShield for Claude config, zizmor for GitHub Actions, and optionally Socket CLI for dependency scanning. Produces an A-F graded security report. Use after modifying `.claude/` config, hooks, agents, or GitHub Actions workflows, and before releases. user-invocable: true +allowed-tools: Task, Bash, Read, Grep, Glob --- # Security Scan diff --git a/.config/tsconfig.check.json b/.config/tsconfig.check.json index e19788a4f..2f0e97d1b 100644 --- a/.config/tsconfig.check.json +++ b/.config/tsconfig.check.json @@ -12,6 +12,8 @@ "../packages/cli/.config/*.mts" ], "exclude": [ + "../.cache/**", + "../packages/cli/.cache/**", "../packages/cli/**/*.tsx", "../packages/cli/**/*.d.mts", "../packages/cli/src/commands/analytics/output-analytics.mts", diff --git a/.env.example b/.env.example index 691c00890..de9adb650 100644 --- a/.env.example +++ b/.env.example @@ -2,7 +2,6 @@ # Copy this file to .env.local and customize for your local environment. # Node.js Configuration (optional overrides). -NODE_COMPILE_CACHE="./.cache" NODE_OPTIONS="--max-old-space-size=8192 --max-semi-space-size=1024" # Socket API Configuration (for e2e testing). diff --git a/.env.precommit b/.env.precommit index 1ee9eda75..75db740a8 100644 --- a/.env.precommit +++ b/.env.precommit @@ -8,5 +8,4 @@ SOCKET_CLI_NO_API_TOKEN=1 VITEST=1 # Node.js optimization for test performance. -NODE_COMPILE_CACHE="./.cache" NODE_OPTIONS="--max-old-space-size=8192" diff --git a/.git-hooks/_api-key-check.sh b/.git-hooks/_api-key-check.sh new file mode 100755 index 000000000..ce07b2505 --- /dev/null +++ b/.git-hooks/_api-key-check.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Shared helpers for git hooks — API-key scanner allowlist + color codes. +# Sourced by .git-hooks/commit-msg, pre-commit, pre-push. +# +# Constants +# --------- +# ALLOWED_PUBLIC_KEY The real public API key shipped in socket-lib test +# fixtures. Safe to appear in commits anywhere in the +# fleet. +# FAKE_TOKEN_MARKER Substring marker used in test fixtures (see +# socket-lib/test/unit/utils/fake-tokens.ts). Any line +# containing this string is treated as a test fixture +# by the API-key scanner. +# FAKE_TOKEN_LEGACY Legacy lib-scoped marker — accepted during the +# rename from `socket-lib-test-fake-token` to +# `socket-test-fake-token`. Drop when lib's rename PR +# lands. +# SOCKET_SECURITY_ENV Name of the env var used in shell examples; not a +# token value itself. Exempted from scanners. +# +# Functions +# --------- +# filter_allowed_api_keys Reads stdin, drops lines matching any allowlist +# entry, prints the rest. Usage: +# echo "$text" | filter_allowed_api_keys +# grep ... | filter_allowed_api_keys +# +# Colors +# ------ +# RED, GREEN, YELLOW, NC + +# shellcheck disable=SC2034 # constants sourced by other hooks +ALLOWED_PUBLIC_KEY="sktsec_t_--RAN5U4ivauy4w37-6aoKyYPDt5ZbaT5JBVMqiwKo_api" +FAKE_TOKEN_MARKER="socket-test-fake-token" +FAKE_TOKEN_LEGACY="socket-lib-test-fake-token" +SOCKET_SECURITY_ENV="SOCKET_SECURITY_API_KEY=" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +# Strips lines that match the allowlist: public key, current fake-token +# marker, legacy lib-scoped marker, env-var name, or `.example` paths. +filter_allowed_api_keys() { + grep -v "$ALLOWED_PUBLIC_KEY" \ + | grep -v "$FAKE_TOKEN_MARKER" \ + | grep -v "$FAKE_TOKEN_LEGACY" \ + | grep -v "$SOCKET_SECURITY_ENV" \ + | grep -v '\.example' +} diff --git a/.git-hooks/_helpers.mts b/.git-hooks/_helpers.mts new file mode 100644 index 000000000..fde8c3fa5 --- /dev/null +++ b/.git-hooks/_helpers.mts @@ -0,0 +1,304 @@ +// Shared helpers for git hooks — API-key allowlist + ANSI colors + +// content scanners. Imported by .git-hooks/{commit-msg,pre-commit, +// pre-push}.mts. No third-party deps; uses only Node built-ins. +// +// Requires Node 25+ for stable .mts type-stripping (no flag needed). +// Earlier Node versions either lacked --experimental-strip-types or +// shipped it under a flag, both unacceptable for hook ergonomics. + +import { spawnSync } from 'node:child_process' +import { existsSync, readFileSync, statSync } from 'node:fs' + +// Hard-fail if Node is below 25. This runs at module load — every +// hook invocation imports _helpers.mts before doing anything, so the +// version check is the first thing that happens. +const NODE_MIN_MAJOR = 25 +const nodeMajor = Number.parseInt( + process.versions.node.split('.')[0] || '0', + 10, +) +if (nodeMajor < NODE_MIN_MAJOR) { + process.stderr.write( + `\x1b[0;31m✗ Hook requires Node >= ${NODE_MIN_MAJOR}.0.0 (have v${process.versions.node})\x1b[0m\n`, + ) + process.stderr.write( + 'Install Node 25+ — these hooks rely on stable .mts type stripping.\n', + ) + process.exit(1) +} + +// ── Allowlist constants ──────────────────────────────────────────── +// These exempt known-safe matches from the API-key scanner. Each +// allowlist entry is a substring; if the matched line contains it, +// the line is dropped from the findings. + +// Real public API key shipped in socket-lib test fixtures. Safe to +// appear anywhere in the fleet. +export const ALLOWED_PUBLIC_KEY = + 'sktsec_t_--RAN5U4ivauy4w37-6aoKyYPDt5ZbaT5JBVMqiwKo_api' + +// Substring marker used in test fixtures (see +// socket-lib/test/unit/utils/fake-tokens.ts). Lines containing this +// are treated as test fixtures. +export const FAKE_TOKEN_MARKER = 'socket-test-fake-token' + +// Legacy lib-scoped marker — accepted during the rename from +// `socket-lib-test-fake-token` to `socket-test-fake-token`. Drop when +// lib's rename PR lands. +export const FAKE_TOKEN_LEGACY = 'socket-lib-test-fake-token' + +// Name of the env var used in shell examples; not a token value. +export const SOCKET_SECURITY_ENV = 'SOCKET_SECURITY_API_KEY=' + +// ── ANSI colors ──────────────────────────────────────────────────── + +export const RED = '\x1b[0;31m' +export const GREEN = '\x1b[0;32m' +export const YELLOW = '\x1b[1;33m' +export const NC = '\x1b[0m' + +// ── Output helpers ───────────────────────────────────────────────── + +export const out = (msg: string): void => { + process.stdout.write(msg + '\n') +} + +export const err = (msg: string): void => { + process.stderr.write(msg + '\n') +} + +export const red = (msg: string): string => `${RED}${msg}${NC}` +export const green = (msg: string): string => `${GREEN}${msg}${NC}` +export const yellow = (msg: string): string => `${YELLOW}${msg}${NC}` + +// ── API-key allowlist filter ─────────────────────────────────────── + +// Drops any line that matches an allowlist entry. +export const filterAllowedApiKeys = (lines: readonly string[]): string[] => { + return lines.filter( + line => + !line.includes(ALLOWED_PUBLIC_KEY) && + !line.includes(FAKE_TOKEN_MARKER) && + !line.includes(FAKE_TOKEN_LEGACY) && + !line.includes(SOCKET_SECURITY_ENV) && + !line.includes('.example'), + ) +} + +// ── Personal-path scanner ────────────────────────────────────────── + +// Real personal paths to flag: /Users/foo/, /home/foo/, C:\Users\foo\. +const PERSONAL_PATH_RE = + /(\/Users\/[^/\s]+\/|\/home\/[^/\s]+\/|C:\\Users\\[^\\]+\\)/ + +// Placeholders we ALLOW (documentation, not real leaks): any path +// component wrapped in <...> or starting with $VAR / ${VAR}. +const PERSONAL_PATH_PLACEHOLDER_RE = + /(\/Users\/<[^>]*>\/|\/home\/<[^>]*>\/|C:\\Users\\<[^>]*>\\|\/Users\/\$\{?[A-Z_]+\}?\/|\/home\/\$\{?[A-Z_]+\}?\/)/ + +export type LineHit = { lineNumber: number; line: string } + +// Returns lines that contain a real personal path (excludes lines +// that are pure placeholders). Caller decides what to do with hits. +export const scanPersonalPaths = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (!PERSONAL_PATH_RE.test(line)) { + continue + } + if (PERSONAL_PATH_PLACEHOLDER_RE.test(line)) { + // Has placeholder — but might also have a real path on the + // same line. Strip placeholder forms and re-test. + const stripped = line.replace( + new RegExp(PERSONAL_PATH_PLACEHOLDER_RE, 'g'), + '', + ) + if (!PERSONAL_PATH_RE.test(stripped)) { + continue + } + } + hits.push({ lineNumber: i + 1, line }) + } + return hits +} + +// ── Secret scanners ──────────────────────────────────────────────── + +const SOCKET_API_KEY_RE = /sktsec_[a-zA-Z0-9_-]+/ +const AWS_KEY_RE = /(aws_access_key|aws_secret|\bAKIA[0-9A-Z]{16}\b)/i +const GITHUB_TOKEN_RE = /gh[ps]_[a-zA-Z0-9]{36}/ +const PRIVATE_KEY_RE = /-----BEGIN (RSA |EC |DSA )?PRIVATE KEY-----/ + +export const scanSocketApiKeys = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (SOCKET_API_KEY_RE.test(line)) { + hits.push({ lineNumber: i + 1, line }) + } + } + // Filter the LineHit objects directly so duplicate-content lines + // at different line numbers keep their correct numbers. + const allowedSet = new Set(filterAllowedApiKeys(hits.map(h => h.line))) + return hits.filter(h => allowedSet.has(h.line)) +} + +export const scanAwsKeys = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (AWS_KEY_RE.test(line)) { + hits.push({ lineNumber: i + 1, line }) + } + } + return hits +} + +export const scanGitHubTokens = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (GITHUB_TOKEN_RE.test(line)) { + hits.push({ lineNumber: i + 1, line }) + } + } + return hits +} + +export const scanPrivateKeys = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (PRIVATE_KEY_RE.test(line)) { + hits.push({ lineNumber: i + 1, line }) + } + } + return hits +} + +// ── npx/dlx scanner ──────────────────────────────────────────────── + +const NPX_DLX_RE = /\b(npx|pnpm dlx|yarn dlx)\b/ + +export const scanNpxDlx = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (NPX_DLX_RE.test(line) && !line.includes('# zizmor:')) { + hits.push({ lineNumber: i + 1, line }) + } + } + return hits +} + +// ── Linear issue reference scanner ───────────────────────────────── +// CLAUDE.md "ABSOLUTE RULES": NEVER reference Linear issues in commits. +// Team keys enumerated from the Socket workspace. PATCH listed before +// PAT so the alternation matches the longer prefix first. + +const LINEAR_TEAM_KEYS = + 'ASK|AUTO|BOT|CE|CORE|DAT|DES|DEV|ENG|INFRA|LAB|MAR|MET|OPS|PAR|PATCH|PAT|PLAT|REA|SALES|SBOM|SEC|SMO|SUP|TES|TI|WEB' + +const LINEAR_ISSUE_RE = new RegExp( + `(?:^|[^A-Za-z0-9_])((?:${LINEAR_TEAM_KEYS})-[0-9]+)(?:$|[^A-Za-z0-9_])`, + 'gm', +) + +const LINEAR_URL_RE = /linear\.app\/[A-Za-z0-9/_-]+/g + +export const scanLinearReferences = (commitMsg: string): string[] => { + const hits: string[] = [] + const lines = commitMsg.split('\n').filter(l => !l.startsWith('#')) + const body = lines.join('\n') + for (const m of body.matchAll(LINEAR_ISSUE_RE)) { + hits.push(m[1]!) + } + for (const m of body.matchAll(LINEAR_URL_RE)) { + hits.push(m[0]!) + } + return hits.slice(0, 5) +} + +// ── AI attribution scanner ───────────────────────────────────────── + +const AI_ATTRIBUTION_RE = + /(Generated with.*(Claude|AI)|Co-Authored-By: Claude|Co-Authored-By: AI|🤖 Generated|AI generated|@anthropic\.com|Assistant:|Generated by Claude|Machine generated|Claude Code)/i + +export const containsAiAttribution = (text: string): boolean => + AI_ATTRIBUTION_RE.test(text) + +export const stripAiAttribution = ( + text: string, +): { cleaned: string; removed: number } => { + const lines = text.split('\n') + const kept: string[] = [] + let removed = 0 + for (const line of lines) { + if (AI_ATTRIBUTION_RE.test(line)) { + removed++ + } else { + kept.push(line) + } + } + return { cleaned: kept.join('\n'), removed } +} + +// ── File classification ──────────────────────────────────────────── + +// Files we never scan: hooks themselves, husky shims, test fixtures. +const SKIP_FILE_RE = + /\.(test|spec)\.(m?[jt]s|tsx?|cts|mts)$|\.example$|\/test\/|\/tests\/|fixtures\/|\.git-hooks\/|\.husky\/|node_modules\/|pnpm-lock\.yaml/ + +export const shouldSkipFile = (filePath: string): boolean => + SKIP_FILE_RE.test(filePath) + +// Returns file content as a string. For binaries, runs `strings` to +// extract printable byte sequences (catches paths embedded in WASM +// or other compiled artifacts). +export const readFileForScan = (filePath: string): string => { + if (!existsSync(filePath)) { + return '' + } + try { + if (statSync(filePath).isDirectory()) { + return '' + } + } catch { + return '' + } + // Detect binary via grep -I (matches text-only); if grep says + // binary, fall back to `strings`. + const grepResult = spawnSync('grep', ['-qI', '', filePath]) + if (grepResult.status === 0) { + // Text file. + try { + return readFileSync(filePath, 'utf8') + } catch { + return '' + } + } + // Binary — extract strings. + const stringsResult = spawnSync('strings', [filePath], { + encoding: 'utf8', + }) + return stringsResult.stdout || '' +} + +// ── Git wrappers ─────────────────────────────────────────────────── + +export const git = (...args: string[]): string => { + const result = spawnSync('git', args, { encoding: 'utf8' }) + return result.stdout.trim() +} + +export const gitLines = (...args: string[]): string[] => { + const out = git(...args) + return out ? out.split('\n') : [] +} diff --git a/.git-hooks/_helpers.sh b/.git-hooks/_helpers.sh deleted file mode 100644 index 15e9a4083..000000000 --- a/.git-hooks/_helpers.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -# Shared helpers for git hooks. -# Sourced by .git-hooks/commit-msg, pre-commit, pre-push. -# -# Constants -# --------- -# ALLOWED_PUBLIC_KEY Real public API key shipped in socket-lib test -# fixtures. Safe to appear in commits anywhere. -# FAKE_TOKEN_MARKER Substring marker used in fleet test fixtures. -# FAKE_TOKEN_LEGACY Legacy lib-scoped marker — accepted during the -# rename from `socket-lib-test-fake-token` to -# `socket-test-fake-token`. Drop when socket-lib's -# fixture rename PR lands. -# SOCKET_SECURITY_ENV Env var name used in shell examples; not a token. -# -# Functions -# --------- -# filter_allowed_api_keys Reads stdin, drops allowlist matches (public -# key, fake-token markers, env var name, -# `.example` paths), prints the rest. -# -# Colors -# ------ -# RED, GREEN, YELLOW, NC - -# shellcheck disable=SC2034 # constants sourced by other hooks -ALLOWED_PUBLIC_KEY="sktsec_t_--RAN5U4ivauy4w37-6aoKyYPDt5ZbaT5JBVMqiwKo_api" -FAKE_TOKEN_MARKER="socket-test-fake-token" -FAKE_TOKEN_LEGACY="socket-lib-test-fake-token" -SOCKET_SECURITY_ENV="SOCKET_SECURITY_API_KEY=" - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -filter_allowed_api_keys() { - grep -v "$ALLOWED_PUBLIC_KEY" \ - | grep -v "$FAKE_TOKEN_MARKER" \ - | grep -v "$FAKE_TOKEN_LEGACY" \ - | grep -v "$SOCKET_SECURITY_ENV" \ - | grep -v '\.example' -} diff --git a/.git-hooks/commit-msg b/.git-hooks/commit-msg deleted file mode 100755 index 7acf4c56b..000000000 --- a/.git-hooks/commit-msg +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash -# Socket Security Commit-msg Hook -# Additional security layer - validates commit even if pre-commit was bypassed. - -set -e - -# shellcheck source=./_helpers.sh -. "$(dirname "$0")/_helpers.sh" - -ERRORS=0 - -# Get files in this commit (for security checks). -COMMITTED_FILES=$(git diff --cached --name-only --diff-filter=ACM 2>/dev/null || printf "\n") - -# Quick checks for critical issues in committed files. -if [ -n "$COMMITTED_FILES" ]; then - for file in $COMMITTED_FILES; do - if [ -f "$file" ]; then - # Check for Socket API keys (except allowed). - if grep -E 'sktsec_[a-zA-Z0-9_-]+' "$file" 2>/dev/null | filter_allowed_api_keys | grep -q .; then - printf "${RED}✗ SECURITY: Potential API key detected in commit!${NC}\n" - printf "File: %s\n" "$file" - ERRORS=$((ERRORS + 1)) - fi - - # Check for .env files. - if echo "$file" | grep -qE '^\.env(\.[^/]+)?$' && ! echo "$file" | grep -qE '^\.env\.(example|test)$'; then - printf "${RED}✗ SECURITY: .env file in commit!${NC}\n" - ERRORS=$((ERRORS + 1)) - fi - fi - done -fi - -# Block Linear issue references in the commit message. -# Linear tracking lives in Linear; keep commit history tool-agnostic. -# Team keys enumerated from the Socket workspace. PATCH listed before PAT so -# the engine matches the longer prefix first on strings like "PATCH-123". -COMMIT_MSG_FILE="$1" -LINEAR_TEAM_KEYS='ASK|AUTO|BOT|CE|CORE|DAT|DES|DEV|ENG|INFRA|LAB|MAR|MET|OPS|PAR|PATCH|PAT|PLAT|REA|SALES|SBOM|SEC|SMO|SUP|TES|TI|WEB' -if [ -f "$COMMIT_MSG_FILE" ]; then - LINEAR_HITS=$(grep -vE '^#' "$COMMIT_MSG_FILE" 2>/dev/null \ - | grep -oE "(^|[^A-Za-z0-9_])($LINEAR_TEAM_KEYS)-[0-9]+($|[^A-Za-z0-9_])|linear\.app/[A-Za-z0-9/_-]+" \ - | head -5 || true) - if [ -n "$LINEAR_HITS" ]; then - printf "${RED}✗ Commit message references Linear issue(s):${NC}\n" - printf '%s\n' "$LINEAR_HITS" | sed 's/^/ /' - printf "${RED}Linear tracking lives in Linear. Remove the reference from the commit message.${NC}\n" - ERRORS=$((ERRORS + 1)) - fi -fi - -# Auto-strip AI attribution from commit message. -if [ -f "$COMMIT_MSG_FILE" ]; then - # Create a temporary file to store the cleaned message. - TEMP_FILE=$(mktemp) || { - printf "${RED}✗ Failed to create temporary file${NC}\n" >&2 - exit 1 - } - # Ensure cleanup on exit - trap 'rm -f "$TEMP_FILE"' EXIT - REMOVED_LINES=0 - - # Read the commit message line by line and filter out AI attribution. - while IFS= read -r line || [ -n "$line" ]; do - # Check if this line contains AI attribution patterns. - if echo "$line" | grep -qiE "(Generated with|Co-Authored-By: Claude|Co-Authored-By: AI|🤖 Generated|AI generated|Claude Code|@anthropic|Assistant:|Generated by Claude|Machine generated)"; then - REMOVED_LINES=$((REMOVED_LINES + 1)) - else - # Line doesn't contain AI attribution, keep it. - printf '%s\n' "$line" >> "$TEMP_FILE" - fi - done < "$COMMIT_MSG_FILE" - - # Replace the original commit message with the cleaned version. - if [ $REMOVED_LINES -gt 0 ]; then - mv "$TEMP_FILE" "$COMMIT_MSG_FILE" - printf "${GREEN}✓ Auto-stripped${NC} $REMOVED_LINES AI attribution line(s) from commit message\n" - else - # No lines were removed, just clean up the temp file. - rm -f "$TEMP_FILE" - fi -fi - -if [ $ERRORS -gt 0 ]; then - printf "${RED}✗ Commit blocked by security validation${NC}\n" - exit 1 -fi - -exit 0 diff --git a/.git-hooks/commit-msg.mts b/.git-hooks/commit-msg.mts new file mode 100644 index 000000000..e080c6d34 --- /dev/null +++ b/.git-hooks/commit-msg.mts @@ -0,0 +1,111 @@ +#!/usr/bin/env node +// Socket Security Commit-msg Hook +// +// Three responsibilities: +// 1. Block commits that introduce API keys / .env files (security +// layer that runs even when pre-commit is bypassed via +// `--no-verify`). +// 2. Block commits whose message references Linear issues — Socket +// keeps Linear tracking out of git history per CLAUDE.md. +// 3. Auto-strip AI attribution lines from the commit message before +// git records the commit. +// +// Wired via .husky/commit-msg, which invokes this with the path to the +// commit message file as argv[2] (after the script path itself). + +import { existsSync, readFileSync, writeFileSync } from 'node:fs' + +import { basename } from 'node:path' +import process from 'node:process' + +import { + err, + gitLines, + green, + out, + red, + readFileForScan, + scanLinearReferences, + scanSocketApiKeys, + shouldSkipFile, + stripAiAttribution, +} from './_helpers.mts' + +const main = (): number => { + let errors = 0 + const committedFiles = gitLines( + 'diff', + '--cached', + '--name-only', + '--diff-filter=ACM', + ) + + for (const file of committedFiles) { + if (!file || shouldSkipFile(file)) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + + // Socket API keys (allowlist-aware). + const apiHits = scanSocketApiKeys(text) + if (apiHits.length > 0) { + out(red('✗ SECURITY: Potential API key detected in commit!')) + out(`File: ${file}`) + errors++ + } + + // .env files at any depth — allow only .env.example, .env.test, + // .env.precommit (templates / tracked placeholders). + const base = basename(file) + if ( + /^\.env(\.[^/]+)?$/.test(base) && + !/^\.env\.(example|test|precommit)$/.test(base) + ) { + out(red('✗ SECURITY: .env file in commit!')) + out(`File: ${file}`) + errors++ + } + } + + const commitMsgFile = process.argv[2] + if (commitMsgFile && existsSync(commitMsgFile)) { + const original = readFileSync(commitMsgFile, 'utf8') + + // Block Linear issue references in the commit message. Socket + // keeps Linear tracking out of git history; commit messages stay + // tool-agnostic. + const linearHits = scanLinearReferences(original) + if (linearHits.length > 0) { + out(red('✗ Commit message references Linear issue(s):')) + for (const hit of linearHits) { + out(` ${hit}`) + } + out( + red( + 'Linear tracking lives in Linear. Remove the reference from the commit message.', + ), + ) + errors++ + } + + // Auto-strip AI attribution lines from the commit message. + const { cleaned, removed } = stripAiAttribution(original) + if (removed > 0) { + writeFileSync(commitMsgFile, cleaned) + out( + `${green('✓ Auto-stripped')} ${removed} AI attribution line(s) from commit message`, + ) + } + } + + if (errors > 0) { + err(red('✗ Commit blocked by security validation')) + return 1 + } + return 0 +} + +process.exit(main()) diff --git a/.git-hooks/pre-commit.mts b/.git-hooks/pre-commit.mts new file mode 100644 index 000000000..aa3898678 --- /dev/null +++ b/.git-hooks/pre-commit.mts @@ -0,0 +1,186 @@ +#!/usr/bin/env node +// Socket Security Pre-commit Hook +// +// Local-defense layer: scans staged files for sensitive content +// before git records the commit. Mandatory enforcement re-runs in +// pre-push for the final gate. +// +// Bypassable: --no-verify skips this hook entirely. Use sparingly +// (hotfixes, history operations, pre-build states). + +import process from 'node:process' + +import { + err, + gitLines, + green, + out, + red, + readFileForScan, + scanAwsKeys, + scanGitHubTokens, + scanNpxDlx, + scanPersonalPaths, + scanPrivateKeys, + scanSocketApiKeys, + shouldSkipFile, + yellow, +} from './_helpers.mts' + +const main = (): number => { + out(green('Running Socket Security checks...')) + const stagedFiles = gitLines( + 'diff', + '--cached', + '--name-only', + '--diff-filter=ACM', + ) + if (stagedFiles.length === 0) { + out(green('✓ No files to check')) + return 0 + } + + let errors = 0 + + // .DS_Store files. + out('Checking for .DS_Store files...') + const dsStores = stagedFiles.filter(f => f.includes('.DS_Store')) + if (dsStores.length > 0) { + out(red('✗ ERROR: .DS_Store file detected!')) + dsStores.forEach(f => out(f)) + errors++ + } + + // Log files (ignore test logs). + out('Checking for log files...') + const logs = stagedFiles.filter( + f => f.endsWith('.log') && !/test.*\.log$/.test(f), + ) + if (logs.length > 0) { + out(red('✗ ERROR: Log file detected!')) + logs.forEach(f => out(f)) + errors++ + } + + // .env files (allowlist .env.example / .env.test / .env.precommit). + // Match commit-msg.mts allowlist — .env.precommit is a tracked file + // some repos use to disable test API tokens during pre-commit runs. + out('Checking for .env files...') + const envFiles = stagedFiles.filter( + f => + /^\.env(\.[^/]+)?$/.test(f) && + !/^\.env\.(example|test|precommit)$/.test(f), + ) + if (envFiles.length > 0) { + out(red('✗ ERROR: .env file detected!')) + envFiles.forEach(f => out(f)) + out( + 'These files should never be committed. Use .env.example for templates.', + ) + errors++ + } + + // Hardcoded personal paths. + out('Checking for hardcoded personal paths...') + for (const file of stagedFiles) { + if (shouldSkipFile(file)) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + const hits = scanPersonalPaths(text) + if (hits.length > 0) { + out(red(`✗ ERROR: Hardcoded personal path found in: ${file}`)) + hits.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + out('Replace with relative paths or environment variables.') + errors++ + } + } + + // Socket API keys (warning, not blocking). + out('Checking for API keys...') + for (const file of stagedFiles) { + if (shouldSkipFile(file)) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + const hits = scanSocketApiKeys(text) + if (hits.length > 0) { + out(yellow(`⚠ WARNING: Potential API key found in: ${file}`)) + hits.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + out('If this is a real API key, DO NOT COMMIT IT.') + } + } + + // Other secret patterns (AWS, GitHub, private keys). + out('Checking for potential secrets...') + for (const file of stagedFiles) { + if (shouldSkipFile(file)) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + + const aws = scanAwsKeys(text) + if (aws.length > 0) { + out(red(`✗ ERROR: Potential AWS credentials found in: ${file}`)) + aws.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + errors++ + } + + const gh = scanGitHubTokens(text) + if (gh.length > 0) { + out(red(`✗ ERROR: Potential GitHub token found in: ${file}`)) + gh.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + errors++ + } + + const pk = scanPrivateKeys(text) + if (pk.length > 0) { + out(red(`✗ ERROR: Private key found in: ${file}`)) + errors++ + } + } + + // npx/dlx usage. + out('Checking for npx/dlx usage...') + for (const file of stagedFiles) { + if ( + file.includes('node_modules/') || + file.endsWith('pnpm-lock.yaml') || + file.includes('.git-hooks/') + ) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + const hits = scanNpxDlx(text) + if (hits.length > 0) { + out(red(`✗ ERROR: npx/dlx usage found in: ${file}`)) + hits.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + out("Use 'pnpm exec ' or 'pnpm run