open-design/apps/daemon/src/agents.ts
Zakaria a46764fb1b
Some checks failed
ci / Validate workspace (push) Has been cancelled
landing-page-ci / Validate landing page (push) Has been cancelled
landing-page-deploy / Deploy landing page (push) Has been cancelled
github-metrics / Generate repository metrics SVG (push) Has been cancelled
first-commit
2026-05-04 14:58:14 -04:00

1135 lines
46 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// @ts-nocheck
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import { existsSync, readdirSync } from 'node:fs';
import { delimiter } from 'node:path';
import path from 'node:path';
import { homedir } from 'node:os';
import { detectAcpModels } from './acp.js';
import { parsePiModels } from './pi-rpc.js';
const execFileP = promisify(execFile);
// Capability flags detected at probe time (per agent id). buildArgs consults
// this map so we only pass flags the installed CLI actually advertises in
// `--help`. Falls back to "off" when probing failed or hasn't run yet — that
// keeps the spawn safe across older Claude Code releases that pre-date a
// given flag (e.g. `--include-partial-messages`, added in 1.0.86).
const agentCapabilities = new Map();
// Per-agent model picker.
//
// - `listModels` : optional spec for fetching the model list from
// the CLI itself ({ args, parse, timeoutMs }).
// When defined we run it during agent detection
// (best-effort, with a timeout) and use the
// result. If the listing fails we fall back to
// `fallbackModels` so the UI still has something
// to show.
// - `fallbackModels` : static hint list. Used as the source of truth
// for CLIs that don't expose a listing command
// (Claude Code, Codex, Devin for Terminal, Gemini CLI, Qwen Code)
// and as the fallback for the others.
// - `reasoningOptions` : optional reasoning-effort presets (currently
// only Codex exposes this knob).
// - `buildArgs(prompt, imagePaths, extraAllowedDirs, options, runtimeContext)`
// returns argv for the child process. `options = { model, reasoning }`
// carries whatever the user picked in the model menu — agents that don't
// take a model flag ignore them. `runtimeContext` currently carries
// runtime execution details like `{ cwd }` for CLIs that need an explicit
// workspace flag in addition to process cwd.
//
// Every model list is prefixed with a synthetic `'default'` entry meaning
// "let the CLI pick" — the agent runs with no `--model` flag, so the
// user's local CLI config wins.
//
// `extraAllowedDirs` is a list of absolute directories the agent must be
// permitted to read files from (skill seeds, design-system specs) that live
// outside the project cwd. Currently only Claude Code wires this through
// (`--add-dir`); other agents either inherit broader access or run with cwd
// boundaries we can't widen via flags.
//
// `streamFormat` hints to the daemon how to interpret stdout:
// - 'claude-stream-json' : line-delimited JSON emitted by Claude Code's
// `--output-format stream-json`. Daemon parses it into typed events
// (text / thinking / tool_use / tool_result / status) for the UI.
// - 'acp-json-rpc' : ACP JSON-RPC over stdio. Daemon drives the
// initialize/session/new/session/prompt lifecycle and maps updates into
// typed UI events.
// - 'plain' (default) : raw text, forwarded chunk-by-chunk.
//
// Permission posture: the daemon spawns each CLI with cwd pinned to the
// project folder (`.od/projects/<id>/`), and the web app has no terminal
// to surface an interactive approve/deny prompt. So every agent runs with
// its non-interactive/auto-approve switch on — otherwise Write/Edit hangs
// or errors and the model has to hallucinate a permission button the UI
// never shows.
//
// `env` is optional per-agent process environment. Keep it limited to
// documented, non-secret runtime knobs that belong to the adapter contract.
const DEFAULT_MODEL_OPTION = { id: 'default', label: 'Default (CLI config)' };
// Map a user-picked reasoning effort to one the chosen model will accept.
// Codex's CLI accepts `none | minimal | low | medium | high | xhigh`, but
// real models support narrower subsets — gpt-5.2/5.3/5.4/5.5 reject
// `minimal`, gpt-5.1 rejects `xhigh`, gpt-5.1-codex-mini accepts only
// `medium` / `high`.
// An undefined / 'default' modelId is clamped as if it were gpt-5.5,
// since that's codex's current default model. Unknown / future model ids
// pass through unchanged — if the API later rejects, the server error
// is the signal that a new rule belongs here.
function clampCodexReasoning(modelId, effort) {
if (!effort) return effort;
const raw = String(modelId ?? '').trim();
const id = raw.includes('/') ? raw.split('/').pop() : raw;
const isGpt5LateFamily =
!id ||
id === 'default' ||
id.startsWith('gpt-5.2') ||
id.startsWith('gpt-5.3') ||
id.startsWith('gpt-5.4') ||
id.startsWith('gpt-5.5');
if (isGpt5LateFamily && effort === 'minimal') return 'low';
if (id === 'gpt-5.1' && effort === 'xhigh') return 'high';
if (id === 'gpt-5.1-codex-mini') {
return effort === 'high' || effort === 'xhigh' ? 'high' : 'medium';
}
return effort;
}
// Parse one-id-per-line stdout from `<cli> models` and prepend the synthetic
// default option. Used by opencode / cursor-agent.
function parseLineSeparatedModels(stdout) {
const ids = String(stdout || '')
.split('\n')
.map((line) => line.trim())
.filter((line) => line.length > 0 && !line.startsWith('#'));
// De-dupe while preserving order — some CLIs print near-duplicates.
const seen = new Set();
const out = [DEFAULT_MODEL_OPTION];
for (const id of ids) {
if (seen.has(id)) continue;
seen.add(id);
out.push({ id, label: id });
}
return out;
}
export const AGENT_DEFS = [
{
id: 'claude',
name: 'Claude Code',
bin: 'claude',
// Drop-in forks that ship a CLI argv-compatible with `claude`. Tried in
// order if `claude` itself isn't on PATH, so users on a single-binary
// install (e.g. only OpenClaude — https://github.com/Gitlawb/openclaude
// — issue #235) get auto-detected without writing wrapper scripts.
fallbackBins: ['openclaude'],
versionArgs: ['--version'],
helpArgs: ['--help'],
capabilityFlags: {
// Flag string -> capability key. After probing `--help`, we set
// `agentCapabilities[id][key] = true` for each substring that matches.
'--include-partial-messages': 'partialMessages',
'--add-dir': 'addDir',
},
// `claude` has no list-models subcommand; the CLI accepts both short
// aliases (sonnet/opus/haiku) and the full ids, so we ship both as
// hints. Users who want a non-shipped model can paste it via the
// Settings dialog's custom-model input.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'sonnet', label: 'Sonnet (alias)' },
{ id: 'opus', label: 'Opus (alias)' },
{ id: 'haiku', label: 'Haiku (alias)' },
{ id: 'claude-opus-4-5', label: 'claude-opus-4-5' },
{ id: 'claude-sonnet-4-5', label: 'claude-sonnet-4-5' },
{ id: 'claude-haiku-4-5', label: 'claude-haiku-4-5' },
],
// Prompt delivered via stdin to avoid both Linux `spawn E2BIG`
// (MAX_ARG_STRLEN caps a single argv entry at ~128 KB) and Windows
// `spawn ENAMETOOLONG` (CreateProcess caps the full command line at
// ~32 KB direct, ~8 KB via .cmd shim). `claude -p` with no positional
// prompt reads the prompt from stdin under `--input-format text` (the
// default), which has no length cap. Mirrors the codex/gemini/opencode/
// cursor/qwen entries below.
buildArgs: (_prompt, _imagePaths, extraAllowedDirs = [], options = {}) => {
const caps = agentCapabilities.get('claude') || {};
const args = [
'-p',
'--output-format',
'stream-json',
'--verbose',
];
// `--include-partial-messages` lands richer streaming events but only
// exists in newer Claude Code builds. Older installs reject it with
// "unknown option" and exit 1, killing the chat. Gate on the probe.
if (caps.partialMessages) {
args.push('--include-partial-messages');
}
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
const dirs = (extraAllowedDirs || []).filter(
(d) => typeof d === 'string' && d.length > 0,
);
// `--add-dir` is older but still gate it for symmetry — old/forked
// builds may lack it.
if (dirs.length > 0 && caps.addDir !== false) {
args.push('--add-dir', ...dirs);
}
args.push('--permission-mode', 'bypassPermissions');
return args;
},
promptViaStdin: true,
streamFormat: 'claude-stream-json',
},
{
id: 'codex',
name: 'Codex CLI',
bin: 'codex',
versionArgs: ['--version'],
// Codex doesn't have a `models` subcommand; ship the most common ids
// as a hint. Users can supply other ids via the custom-model input.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'gpt-5-codex', label: 'gpt-5-codex' },
{ id: 'gpt-5', label: 'gpt-5' },
{ id: 'o3', label: 'o3' },
{ id: 'o4-mini', label: 'o4-mini' },
],
reasoningOptions: [
{ id: 'default', label: 'Default' },
{ id: 'minimal', label: 'Minimal' },
{ id: 'low', label: 'Low' },
{ id: 'medium', label: 'Medium' },
{ id: 'high', label: 'High' },
],
// Prompt is delivered via stdin pipe (gated by `promptViaStdin: true`
// below) to avoid Windows `spawn ENAMETOOLONG` while keeping Codex on
// its structured JSON stream. Recent Codex CLI versions reject a bare
// `-` argv sentinel — passing both the pipe and `-` produces
// `error: unexpected argument '-' found` and the agent exits with
// code 2 before any prompt is read (see issue #237). The pipe alone
// is sufficient for stdin delivery.
buildArgs: (_prompt, _imagePaths, _extra, options = {}, runtimeContext = {}) => {
const args = [
'exec',
'--json',
'--skip-git-repo-check',
'--full-auto',
'-c',
'sandbox_workspace_write.network_access=true',
];
if (process.env.OD_CODEX_DISABLE_PLUGINS === '1') {
args.push('--disable', 'plugins');
}
if (runtimeContext.cwd) {
args.push('-C', runtimeContext.cwd);
}
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
if (options.reasoning && options.reasoning !== 'default') {
const effort = clampCodexReasoning(options.model, options.reasoning);
// Codex accepts `-c key=value` config overrides; reasoning effort
// is exposed as `model_reasoning_effort`.
args.push('-c', `model_reasoning_effort="${effort}"`);
}
return args;
},
promptViaStdin: true,
streamFormat: 'json-event-stream',
eventParser: 'codex',
},
{
id: 'devin',
name: 'Devin for Terminal',
bin: 'devin',
versionArgs: ['--version'],
fetchModels: async (resolvedBin) =>
detectAcpModels({
bin: resolvedBin,
args: ['--permission-mode', 'dangerous', '--respect-workspace-trust', 'false', 'acp'],
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
// Fallback aliases from Devin for Terminal docs
// (https://cli.devin.ai/docs/models): `adaptive` appears in the config example;
// `opus`, `sonnet`, `swe`, `codex`, `gemini`, and `gpt` are documented
// as short model-family names / recommended picks.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'adaptive', label: 'adaptive' },
{ id: 'swe', label: 'swe' },
{ id: 'opus', label: 'opus' },
{ id: 'sonnet', label: 'sonnet' },
{ id: 'codex', label: 'codex' },
{ id: 'gpt', label: 'gpt' },
{ id: 'gemini', label: 'gemini' },
],
buildArgs: () => ['--permission-mode', 'dangerous', '--respect-workspace-trust', 'false', 'acp'],
streamFormat: 'acp-json-rpc',
},
{
id: 'gemini',
name: 'Gemini CLI',
bin: 'gemini',
versionArgs: ['--version'],
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'gemini-2.5-pro', label: 'gemini-2.5-pro' },
{ id: 'gemini-2.5-flash', label: 'gemini-2.5-flash' },
],
// Gemini reads from stdin when `-p` is omitted and stdin is a pipe.
// Passing the full composed prompt as a CLI arg causes ENAMETOOLONG on
// Windows (CreateProcess limit ~32 KB) for any non-trivial prompt.
// `--yolo` skips interactive approval prompts in the no-TTY web UI.
// Workspace trust is provided via `GEMINI_CLI_TRUST_WORKSPACE` below
// instead of `--skip-trust`; several Gemini CLI builds hide or reject the
// flag even though they accept the documented environment variable.
env: { GEMINI_CLI_TRUST_WORKSPACE: 'true' },
buildArgs: (_prompt, _imagePaths, _extra, options = {}) => {
const args = ['--output-format', 'stream-json', '--yolo'];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
return args;
},
promptViaStdin: true,
streamFormat: 'json-event-stream',
eventParser: 'gemini',
},
{
id: 'opencode',
name: 'OpenCode',
bin: 'opencode',
versionArgs: ['--version'],
// `opencode models` prints `provider/model` per line.
listModels: {
args: ['models'],
parse: parseLineSeparatedModels,
timeoutMs: 8000,
},
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'anthropic/claude-sonnet-4-5', label: 'anthropic/claude-sonnet-4-5' },
{ id: 'openai/gpt-5', label: 'openai/gpt-5' },
{ id: 'google/gemini-2.5-pro', label: 'google/gemini-2.5-pro' },
],
// Prompt delivered via stdin (`opencode run -`) to avoid Windows
// `spawn ENAMETOOLONG` while preserving OpenCode's structured stream.
buildArgs: (_prompt, _imagePaths, _extra, options = {}) => {
const args = ['run', '--format', 'json', '--dangerously-skip-permissions'];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
args.push('-');
return args;
},
promptViaStdin: true,
streamFormat: 'json-event-stream',
eventParser: 'opencode',
},
{
id: 'hermes',
name: 'Hermes',
bin: 'hermes',
versionArgs: ['--version'],
fetchModels: async (resolvedBin) =>
detectAcpModels({
bin: resolvedBin,
args: ['acp', '--accept-hooks'],
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'openai-codex:gpt-5.5', label: 'gpt-5.5 (openai-codex:gpt-5.5)' },
{ id: 'openai-codex:gpt-5.4', label: 'gpt-5.4 (openai-codex:gpt-5.4)' },
{
id: 'openai-codex:gpt-5.4-mini',
label: 'gpt-5.4-mini (openai-codex:gpt-5.4-mini)',
},
],
buildArgs: () => ['acp', '--accept-hooks'],
streamFormat: 'acp-json-rpc',
},
{
id: 'kimi',
name: 'Kimi CLI',
bin: 'kimi',
versionArgs: ['--version'],
fetchModels: async (resolvedBin) =>
detectAcpModels({
bin: resolvedBin,
args: ['acp'],
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'kimi-k2-turbo-preview', label: 'kimi-k2-turbo-preview' },
{ id: 'moonshot-v1-8k', label: 'moonshot-v1-8k' },
{ id: 'moonshot-v1-32k', label: 'moonshot-v1-32k' },
],
buildArgs: () => ['acp'],
streamFormat: 'acp-json-rpc',
},
{
id: 'cursor-agent',
name: 'Cursor Agent',
bin: 'cursor-agent',
versionArgs: ['--version'],
// `cursor-agent models` prints account-bound model ids per line. When
// the user isn't authed it prints "No models available for this
// account." — that's not a model list, so we detect it and fall back.
listModels: {
args: ['models'],
timeoutMs: 5000,
parse: (stdout) => {
const trimmed = String(stdout || '').trim();
if (!trimmed || /no models available/i.test(trimmed)) return null;
return parseLineSeparatedModels(trimmed);
},
},
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'auto', label: 'auto' },
{ id: 'sonnet-4', label: 'sonnet-4' },
{ id: 'sonnet-4-thinking', label: 'sonnet-4-thinking' },
{ id: 'gpt-5', label: 'gpt-5' },
],
// Cursor Agent does not use `-` as a "read prompt from stdin" sentinel.
// Passing it makes the CLI treat the dash as the literal user prompt,
// which then surfaces as "your message only contains '-'". Keep stdin
// piped for prompt delivery, but do not append a fake prompt arg.
buildArgs: (_prompt, _imagePaths, _extra, options = {}, runtimeContext = {}) => {
const args = [];
args.push('--print', '--output-format', 'stream-json', '--stream-partial-output', '--force', '--trust');
if (runtimeContext.cwd) {
args.push('--workspace', runtimeContext.cwd);
}
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
return args;
},
promptViaStdin: true,
streamFormat: 'json-event-stream',
eventParser: 'cursor-agent',
},
{
id: 'qwen',
name: 'Qwen Code',
bin: 'qwen',
versionArgs: ['--version'],
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
{ id: 'qwen3-coder-flash', label: 'qwen3-coder-flash' },
],
// Prompt delivered via stdin (`qwen -`) to avoid Windows
// `spawn ENAMETOOLONG` for large composed prompts. Qwen Code is a
// Gemini-CLI fork and supports the same `--yolo` non-interactive mode.
buildArgs: (_prompt, _imagePaths, _extra, options = {}) => {
const args = ['--yolo'];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
args.push('-');
return args;
},
promptViaStdin: true,
streamFormat: 'plain',
},
{
id: 'copilot',
name: 'GitHub Copilot CLI',
bin: 'copilot',
versionArgs: ['--version'],
// The prompt is passed directly as the value of `-p`: `copilot -p
// "<prompt>" --allow-all-tools --output-format json`. Copilot does NOT
// treat `-` as a stdin sentinel — it reads it as a literal one-character
// prompt string — so the previous `-p -` + stdin pattern produced a
// nonsensical single-dash prompt instead of the composed prompt body.
//
// `--allow-all-tools` is required for non-interactive runs: without it
// the CLI blocks waiting for human approval on every tool call. Unlike
// Codex (where `exec` is a dedicated headless subcommand with
// auto-approve baked in) or Claude Code (which inherits its permission
// policy from the user's settings.json), Copilot's `-p` mode always
// prompts unless this flag is passed explicitly.
//
// `--output-format json` produces JSONL that copilot-stream.js parses
// into the same typed events as claude-stream.js.
//
// `--add-dir` (repeatable, same flag as Claude Code's) widens Copilot's
// path-level sandbox to skill seeds + design-system specs outside the
// project cwd.
//
// No `models` subcommand; the CLI accepts whatever the user's Copilot
// subscription exposes. Ship a small evidence-based hint list — the
// default we observed in the JSON stream and the example from
// `copilot --help`. Users can paste any other id via Settings.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'claude-sonnet-4.6', label: 'Claude Sonnet 4.6' },
{ id: 'gpt-5.2', label: 'GPT-5.2' },
],
buildArgs: (prompt, _imagePaths, extraAllowedDirs = [], options = {}) => {
const args = [
'-p',
prompt,
'--allow-all-tools',
'--output-format',
'json',
];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
const dirs = (extraAllowedDirs || []).filter(
(d) => typeof d === 'string' && d.length > 0,
);
for (const d of dirs) args.push('--add-dir', d);
return args;
},
promptViaStdin: false,
streamFormat: 'copilot-stream-json',
},
{
id: 'pi',
name: 'Pi',
bin: 'pi',
versionArgs: ['--version'],
// `pi --list-models` prints a TSV table to stderr (not stdout),
// so we use a custom fetchModels that reads stderr.
fetchModels: async (resolvedBin) => {
try {
const { stderr } = await execFileP(resolvedBin, ['--list-models'], {
timeout: 20_000,
maxBuffer: 8 * 1024 * 1024,
});
const parsed = parsePiModels(stderr);
if (!parsed || parsed.length === 0) return null;
return parsed;
} catch {
return null;
}
},
// Fallback models — the most commonly used providers/models when
// `pi --list-models` fails or times out.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'anthropic/claude-sonnet-4-5', label: 'Claude Sonnet 4.5 (anthropic)' },
{ id: 'anthropic/claude-opus-4-5', label: 'Claude Opus 4.5 (anthropic)' },
{ id: 'openai/gpt-5', label: 'GPT-5 (openai)' },
{ id: 'openai/o4-mini', label: 'o4-mini (openai)' },
{ id: 'google/gemini-2.5-pro', label: 'Gemini 2.5 Pro (google)' },
{ id: 'google/gemini-2.5-flash', label: 'Gemini 2.5 Flash (google)' },
],
// Thinking level presets mapped to pi's --thinking flag.
reasoningOptions: [
{ id: 'default', label: 'Default' },
{ id: 'off', label: 'Off' },
{ id: 'minimal', label: 'Minimal' },
{ id: 'low', label: 'Low' },
{ id: 'medium', label: 'Medium' },
{ id: 'high', label: 'High' },
{ id: 'xhigh', label: 'XHigh' },
],
// pi's RPC mode drives the entire conversation over stdio JSON-RPC.
// The daemon sends a `prompt` command and pi streams back typed events.
// No prompt in argv — avoids ENAMETOOLONG and keeps the protocol clean.
buildArgs: (_prompt, _imagePaths, _extra, options = {}, runtimeContext = {}) => {
const args = ['--mode', 'rpc', '--no-session'];
if (options.model && options.model !== 'default') {
// pi --model accepts patterns ("sonnet", "anthropic/claude-sonnet-4-5",
// "openai/gpt-5:high") so we pass the value through as-is.
args.push('--model', options.model);
}
if (options.reasoning && options.reasoning !== 'default') {
args.push('--thinking', options.reasoning);
}
// pi supports --append-system-prompt for cwd and extra context.
// For now we rely on the composed prompt containing the cwd hint
// (same pattern as other agents) rather than using system-prompt flags.
return args;
},
// Prompt is sent via RPC `prompt` command on stdin, not as a CLI arg.
promptViaStdin: true,
streamFormat: 'pi-rpc',
},
{
id: 'kiro',
name: 'Kiro CLI',
bin: 'kiro-cli',
versionArgs: ['--version'],
fetchModels: async (resolvedBin) =>
detectAcpModels({
bin: resolvedBin,
args: ['acp'],
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
fallbackModels: [
DEFAULT_MODEL_OPTION,
],
buildArgs: () => ['acp'],
streamFormat: 'acp-json-rpc',
},
{
id: 'vibe',
name: 'Mistral Vibe CLI',
bin: 'vibe-acp',
versionArgs: ['--version'],
fetchModels: async (resolvedBin) =>
detectAcpModels({
bin: resolvedBin,
args: [],
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
fallbackModels: [
DEFAULT_MODEL_OPTION,
],
buildArgs: () => [],
streamFormat: 'acp-json-rpc',
},
{
id: 'deepseek',
name: 'DeepSeek TUI',
// The `deepseek` dispatcher owns the `exec` / `--auto` subcommands and
// delegates to a sibling `deepseek-tui` runtime binary at exec time.
// Upstream documents both binaries as required (npm and cargo paths
// install them together), so a host with only `deepseek-tui` on PATH
// isn't a supported install — and `deepseek-tui` itself doesn't accept
// the argv shape `buildArgs` produces (`exec --auto <prompt>`). We only
// probe the dispatcher; advertising availability via a `deepseek-tui`
// fallback would surface the agent as runnable but make `/api/chat`
// exit immediately on the first prompt.
bin: 'deepseek',
versionArgs: ['--version'],
// No `models` subcommand that prints a clean id-per-line list; the
// canonical model ids for DeepSeek V4 are documented in the README,
// and the CLI accepts arbitrary provider/model strings via `--model`,
// so users can paste anything else through the custom-model input.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'deepseek-v4-pro', label: 'deepseek-v4-pro' },
{ id: 'deepseek-v4-flash', label: 'deepseek-v4-flash' },
],
// DeepSeek's exec mode requires the prompt as a positional argument
// (no `-` stdin sentinel; `prompt: String` is a required clap field).
// `--auto` enables agentic mode with auto-approval — the daemon runs
// every CLI without a TTY, so the interactive approval prompt would
// hang the run. Streaming is plain text on stdout (tool calls go to
// stderr); skipping `--json` keeps deltas streaming live instead of
// batched into one trailing summary object at end-of-turn.
buildArgs: (prompt, _imagePaths, _extra, options = {}) => {
const args = ['exec', '--auto'];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
args.push(prompt);
return args;
},
// Guard against prompts that would blow Windows' ~32 KB CreateProcess
// limit (or Linux MAX_ARG_STRLEN on extreme edges) before spawn. Every
// other argv-sensitive adapter sets `promptViaStdin: true` to dodge
// this; DeepSeek's CLI doesn't accept `-` as a stdin sentinel yet, so
// we have to ship the prompt as argv. The /api/chat spawn path checks
// this byte budget against the composed prompt and emits an actionable
// SSE error ("reduce skills/design-system context, or use an adapter
// with stdin support") instead of letting the spawn fail with a
// generic ENAMETOOLONG/E2BIG message. 30_000 bytes leaves ~2.7 KB of
// argv headroom under the Windows command-line limit for `exec
// --auto --model <id>` and any internal quoting.
maxPromptArgBytes: 30_000,
streamFormat: 'plain',
},
];
function existingDirsUnder(root, segments = []) {
const dirs = [];
let entries = [];
try {
entries = readdirSync(root, { withFileTypes: true });
} catch {
return dirs;
}
for (const entry of entries) {
if (!entry.isDirectory()) continue;
const full = path.join(root, entry.name, ...segments);
if (existsSync(full)) dirs.push(full);
}
return dirs;
}
const TOOLCHAIN_DIR_CACHE_TTL_MS = 5000;
let cachedToolchainHome = null;
let cachedToolchainDirs = null;
let cachedToolchainDirsAt = 0;
function userToolchainDirs() {
const homeOverride = process.env.OD_AGENT_HOME;
const home = homeOverride || homedir();
const now = Date.now();
if (
cachedToolchainHome === home &&
cachedToolchainDirs &&
now - cachedToolchainDirsAt < TOOLCHAIN_DIR_CACHE_TTL_MS
) {
return cachedToolchainDirs;
}
cachedToolchainHome = home;
cachedToolchainDirsAt = now;
cachedToolchainDirs = [
path.join(home, '.local', 'bin'),
path.join(home, '.opencode', 'bin'),
path.join(home, '.bun', 'bin'),
path.join(home, '.volta', 'bin'),
path.join(home, '.asdf', 'shims'),
path.join(home, 'Library', 'pnpm'),
path.join(home, '.cargo', 'bin'),
...(process.platform !== 'win32' && !homeOverride ? ['/opt/homebrew/bin', '/usr/local/bin'] : []),
...existingDirsUnder(path.join(home, '.local', 'share', 'mise', 'installs', 'node'), ['bin']),
...existingDirsUnder(path.join(home, '.nvm', 'versions', 'node'), ['bin']),
...existingDirsUnder(path.join(home, '.local', 'share', 'fnm', 'node-versions'), ['installation', 'bin']),
];
return cachedToolchainDirs;
}
function resolvePathDirs() {
const seen = new Set();
const dirs = [
...(process.env.PATH || '').split(delimiter),
// GUI launchers (macOS .app bundles, Linux .desktop files) often start
// with a minimal PATH. Include common user-level CLI install locations
// so agent detection matches the user's shell-installed tools,
// especially Node version managers.
...userToolchainDirs(),
];
return dirs.filter((dir) => {
if (!dir || seen.has(dir)) return false;
seen.add(dir);
return true;
});
}
export function resolveOnPath(bin) {
const exts =
process.platform === 'win32'
? (process.env.PATHEXT || '.EXE;.CMD;.BAT').split(';')
: [''];
const dirs = resolvePathDirs();
for (const dir of dirs) {
for (const ext of exts) {
const full = path.join(dir, bin + ext);
if (full && existsSync(full)) return full;
}
}
return null;
}
// Resolve the first available binary for an agent definition. Tries
// `def.bin` first, then walks `def.fallbackBins` in order. Used for
// agents whose forks ship under a different binary name but speak the
// exact same CLI (Claude Code → OpenClaude, issue #235). Returns null
// when no candidate is on PATH.
export function resolveAgentExecutable(def) {
if (!def?.bin) return null;
const candidates = [def.bin, ...(Array.isArray(def.fallbackBins) ? def.fallbackBins : [])];
for (const bin of candidates) {
const resolved = resolveOnPath(bin);
if (resolved) return resolved;
}
return null;
}
async function fetchModels(def, resolvedBin) {
if (typeof def.fetchModels === 'function') {
try {
const parsed = await def.fetchModels(resolvedBin);
if (!parsed || parsed.length === 0) return def.fallbackModels;
return parsed;
} catch {
return def.fallbackModels;
}
}
if (!def.listModels) return def.fallbackModels;
try {
const { stdout } = await execFileP(resolvedBin, def.listModels.args, {
timeout: def.listModels.timeoutMs ?? 5000,
// Models lists from popular CLIs (e.g. opencode) easily exceed the
// default 1MB buffer once you include every openrouter model. Bump
// it so we don't truncate the listing.
maxBuffer: 8 * 1024 * 1024,
});
const parsed = def.listModels.parse(stdout);
// Empty / null parse result means the CLI didn't actually return a
// usable list (e.g. cursor-agent's "No models available"); fall back
// to the static hint so the picker isn't stuck on Default-only.
if (!parsed || parsed.length === 0) return def.fallbackModels;
return parsed;
} catch {
return def.fallbackModels;
}
}
async function probe(def) {
const resolved = resolveAgentExecutable(def);
if (!resolved) {
return {
...stripFns(def),
models: def.fallbackModels ?? [DEFAULT_MODEL_OPTION],
available: false,
};
}
let version = null;
try {
const { stdout } = await execFileP(resolved, def.versionArgs, { timeout: 3000 });
version = stdout.trim().split('\n')[0];
} catch {
// binary exists but --version failed; still mark available
}
// Probe `--help` once per agent and record which flags the installed CLI
// advertises. Cached on `agentCapabilities` for buildArgs to consult.
if (def.helpArgs && def.capabilityFlags) {
const caps = {};
try {
const { stdout } = await execFileP(resolved, def.helpArgs, {
timeout: 5000,
maxBuffer: 4 * 1024 * 1024,
});
for (const [flag, key] of Object.entries(def.capabilityFlags)) {
caps[key] = stdout.includes(flag);
}
} catch {
// If --help fails, leave caps empty — buildArgs falls back to the safe
// baseline (no optional flags).
}
agentCapabilities.set(def.id, caps);
}
const models = await fetchModels(def, resolved);
return {
...stripFns(def),
models,
available: true,
path: resolved,
version,
};
}
function stripFns(def) {
// Drop the buildArgs / listModels closures but keep declarative metadata
// (reasoningOptions, streamFormat, name, bin, etc.). `models` is
// populated separately by `fetchModels`, so we strip the static
// `fallbackModels` slot here too. `helpArgs` / `capabilityFlags` /
// `fallbackBins` / `maxPromptArgBytes` / `env` are probe-or-spawn-only
// metadata and shouldn't bleed into the API response either.
const {
buildArgs,
listModels,
fetchModels,
fallbackModels,
helpArgs,
capabilityFlags,
fallbackBins,
maxPromptArgBytes,
env,
...rest
} = def;
return rest;
}
export async function detectAgents() {
const results = await Promise.all(AGENT_DEFS.map(probe));
// Refresh the validation cache from whatever we just surfaced to the UI
// so /api/chat can accept any model the user could have just picked,
// including ones that only showed up after a CLI re-auth.
for (const agent of results) {
rememberLiveModels(agent.id, agent.models);
}
return results;
}
export function getAgentDef(id) {
return AGENT_DEFS.find((a) => a.id === id) || null;
}
// Adapters that ship the prompt as a positional argv arg (no stdin
// sentinel upstream) declare a `maxPromptArgBytes` budget so the daemon
// can fail fast with an actionable, adapter-named error before `spawn`
// surfaces a generic ENAMETOOLONG / E2BIG (Linux MAX_ARG_STRLEN) or
// CreateProcess command-line-too-long (Windows ~32 KB) failure. Returns
// null when the prompt fits (or the adapter has no budget — i.e. uses
// stdin), and a structured error payload otherwise. Pure so it's
// directly unit-testable for both the oversized and short-prompt paths
// without spinning up the HTTP server or a real spawn.
export function checkPromptArgvBudget(def, composed) {
if (!def || typeof def.maxPromptArgBytes !== 'number') return null;
const bytes = Buffer.byteLength(typeof composed === 'string' ? composed : '', 'utf8');
if (bytes <= def.maxPromptArgBytes) return null;
return {
code: 'AGENT_PROMPT_TOO_LARGE',
message:
`${def.name} requires the prompt as a command-line argument and this run's composed prompt exceeds the safe size (${bytes} > ${def.maxPromptArgBytes} bytes). ` +
'Reduce the selected skills/design-system context, shorten the conversation, or pick an adapter with stdin support.',
bytes,
limit: def.maxPromptArgBytes,
};
}
// Mirror of packages/platform's `quoteWindowsCommandArg`, kept local so
// `checkWindowsCmdShimCommandLineBudget` can run on macOS/Linux against
// a fake `.cmd` path in tests without forking on `process.platform`.
// Must stay byte-for-byte identical to the platform copy — the helper's
// whole point is to compute the exact `cmd.exe /d /s /c "<inner>"` line
// the spawn path will produce on Windows. The `%` → `"^%"` substitution
// neutralizes cmd.exe's percent-expansion for prompts that ride argv
// (DeepSeek TUI today): `%name%` pairs would otherwise be expanded from
// the daemon environment before the child reads them, leaking secrets
// like `%DEEPSEEK_API_KEY%` whenever the prompt mentions an env-var name.
function quoteForWindowsCmdShim(value) {
const str = String(value ?? '');
if (!/[\s"&<>|^%]/.test(str)) return str;
const escaped = str.replace(/"/g, '""').replace(/%/g, '"^%"');
return `"${escaped}"`;
}
// Mirror of libuv's `quote_cmd_arg` (process-stdio.c), the exact rule
// Node uses on Windows when it composes a CreateProcess command line for
// a direct executable spawn (not a `.cmd` / `.bat` shim, which goes
// through `quoteForWindowsCmdShim` above). Each embedded `"` becomes
// `\"`, every backslash that ends up adjacent to a quote (or to the
// closing wrap quote) gets doubled, and an arg with whitespace or a
// quote is wrapped in outer `"..."`. Kept local so the budget check
// works on macOS/Linux test hosts against a fake `C:\…\foo.exe` path.
function quoteForWindowsDirectExe(value) {
const str = String(value ?? '');
// libuv emits a literal `""` for an empty argv entry so it survives
// CommandLineToArgvW round-tripping; mirror that.
if (str.length === 0) return '""';
// Fast path: no whitespace and no quote — pass through unchanged. This
// matches libuv's `wcspbrk(source, L" \t\"")` early return.
if (!/[\s"]/.test(str)) return str;
// No quote, no backslash: simple wrap, no per-char escaping needed.
if (!/[\\"]/.test(str)) return `"${str}"`;
// Slow path: walk the string, counting consecutive backslashes so we
// can double them whenever they precede a `"` or the closing wrap
// quote. Following the documented Windows convention:
// - 2n backslashes + `"` → emit `\\` × 2n + `\"`
// - 2n+1 backslashes + `"` → emit `\\` × (2n+1) + `\"`
// - n backslashes not before `"` → emit `\\` × n unchanged
// - trailing backslashes (before the closing wrap quote) → doubled
let result = '"';
let backslashes = 0;
for (let i = 0; i < str.length; i++) {
const ch = str[i];
if (ch === '\\') {
backslashes++;
} else if (ch === '"') {
result += '\\'.repeat(2 * backslashes + 1) + '"';
backslashes = 0;
} else {
result += '\\'.repeat(backslashes) + ch;
backslashes = 0;
}
}
result += '\\'.repeat(2 * backslashes) + '"';
return result;
}
// Windows' CreateProcess caps `lpCommandLine` at 32_767 chars. Going
// through a `.cmd` / `.bat` shim adds a `cmd.exe /d /s /c "<inner>"`
// wrapper, and `quoteForWindowsCmdShim` doubles every embedded `"` plus
// wraps any whitespace/special-char arg in outer quotes — so a prompt
// well under `maxPromptArgBytes` can still expand past the kernel cap
// once it's run through the shim. Leave headroom for any per-CLI flag
// the adapter might tack on at exec time and for cmd.exe's own framing.
const WINDOWS_CREATE_PROCESS_LIMIT = 32_767;
const WINDOWS_CREATE_PROCESS_HEADROOM = 256;
// Post-buildArgs guard for argv-bound adapters whose binary resolves to
// a Windows `.cmd` / `.bat` shim. Computes the exact command line shape
// `createCommandInvocation` (in packages/platform) hands to `spawn` —
// `cmd.exe /d /s /c "<quoted command + quoted args>"` — and refuses the
// run when that line would exceed the CreateProcess limit (less a small
// headroom). Returns the same `AGENT_PROMPT_TOO_LARGE` shape as
// `checkPromptArgvBudget` so the SSE error path in `/api/chat` doesn't
// have to special-case it.
//
// No-op when:
// - the adapter doesn't declare `maxPromptArgBytes` (stdin adapters
// never go through this path);
// - the resolved binary isn't a `.cmd` / `.bat` (POSIX hosts and
// direct `.exe` resolutions on Windows skip the cmd.exe wrap);
// - the assembled line fits comfortably under the kernel cap.
//
// Pure: takes `resolvedBin` explicitly so a test on macOS can pass a
// fake `C:\\…\\deepseek.cmd` path and exercise the same math the daemon
// would run on Windows.
export function checkWindowsCmdShimCommandLineBudget(def, resolvedBin, args) {
if (!def || typeof def.maxPromptArgBytes !== 'number') return null;
if (typeof resolvedBin !== 'string' || !/\.(bat|cmd)$/i.test(resolvedBin)) return null;
const argList = Array.isArray(args) ? args : [];
const inner = [resolvedBin, ...argList].map(quoteForWindowsCmdShim).join(' ');
// `cmd.exe /d /s /c "<inner>"` — same shape as buildCmdShimInvocation
// in packages/platform; the leading 'cmd.exe ' + '/d /s /c ' framing
// plus the two outer quote chars rounds out the full command line.
const commandLineLength = 'cmd.exe /d /s /c '.length + inner.length + 2;
const safeLimit = WINDOWS_CREATE_PROCESS_LIMIT - WINDOWS_CREATE_PROCESS_HEADROOM;
if (commandLineLength <= safeLimit) return null;
return {
code: 'AGENT_PROMPT_TOO_LARGE',
message:
`${def.name} on Windows runs through a .cmd shim and this run's prompt would expand past the CreateProcess command-line limit ` +
`after cmd.exe quote-doubling (${commandLineLength} > ${safeLimit} chars). ` +
'Reduce quote-heavy content in the selected skills/design-system context, shorten the conversation, or pick an adapter with stdin support.',
commandLineLength,
limit: safeLimit,
};
}
// Heuristic: does `resolvedBin` look like a Windows path? Used by the
// direct-exe guard so a test on a POSIX host can drive a fake
// `C:\…\foo.exe` path through the same math the daemon would run on
// Windows, while still skipping POSIX-shaped paths (which never go
// through CreateProcess).
function looksLikeWindowsPath(p) {
if (typeof p !== 'string' || p.length === 0) return false;
// Drive-letter (`C:\…`, `C:/…`) or UNC (`\\server\share\…`).
return /^[a-zA-Z]:[\\/]/.test(p) || p.startsWith('\\\\');
}
// Companion to `checkWindowsCmdShimCommandLineBudget` for argv-bound
// adapters whose binary resolves directly to a Windows executable
// (a cargo-installed `deepseek.exe`, a hand-built release, or any other
// non-shim install path). `createCommandInvocation` does *not* wrap the
// call in `cmd.exe /d /s /c "<inner>"` for those — but Node/libuv still
// composes a CreateProcess `lpCommandLine` by walking each argv entry
// through `quote_cmd_arg`, which doubles backslashes adjacent to quotes
// and escapes every embedded `"` as `\"`. A quote-heavy prompt that fits
// under the raw `maxPromptArgBytes` budget can therefore still expand
// past the kernel's 32_767-char `lpCommandLine` cap on a direct `.exe`
// spawn, surfacing as a generic `spawn ENAMETOOLONG` instead of the
// adapter-named `AGENT_PROMPT_TOO_LARGE` the budget guard exists to
// emit. Returns the same error shape as the cmd-shim guard so the SSE
// error path in `/api/chat` doesn't have to special-case it.
//
// No-op when:
// - the adapter doesn't declare `maxPromptArgBytes` (stdin adapters
// never go through this path);
// - the resolved binary is a `.cmd` / `.bat` shim — that's handled by
// `checkWindowsCmdShimCommandLineBudget` so we don't double-emit;
// - the resolved binary is a POSIX path on a POSIX host (no
// CreateProcess in play);
// - the assembled command line fits under the safe limit.
//
// Pure: takes `resolvedBin` and `args` explicitly so a test on macOS can
// pass a fake `C:\…\deepseek.exe` and exercise the same math the daemon
// would run on Windows. The libuv quoting math lives in
// `quoteForWindowsDirectExe` above.
export function checkWindowsDirectExeCommandLineBudget(def, resolvedBin, args) {
if (!def || typeof def.maxPromptArgBytes !== 'number') return null;
if (typeof resolvedBin !== 'string' || resolvedBin.length === 0) return null;
// The cmd-shim guard owns `.bat` / `.cmd`; skip those here so a single
// oversized prompt doesn't trip both guards.
if (/\.(bat|cmd)$/i.test(resolvedBin)) return null;
// Only fire when the spawn would actually go through Windows'
// CreateProcess. On POSIX hosts, `execvp` accepts each argv entry as a
// separate buffer — there's no command-line concatenation step that
// could expand past a kernel cap, so we have nothing to guard.
if (process.platform !== 'win32' && !looksLikeWindowsPath(resolvedBin)) return null;
const argList = Array.isArray(args) ? args : [];
// `[command, ...args].map(quote).join(' ')` is the exact shape libuv
// builds before handing it to CreateProcess.
const commandLineLength =
[resolvedBin, ...argList].map(quoteForWindowsDirectExe).join(' ').length;
const safeLimit = WINDOWS_CREATE_PROCESS_LIMIT - WINDOWS_CREATE_PROCESS_HEADROOM;
if (commandLineLength <= safeLimit) return null;
return {
code: 'AGENT_PROMPT_TOO_LARGE',
message:
`${def.name} on Windows builds a CreateProcess command line and this run's prompt would expand past the limit ` +
`after libuv quote-escaping (${commandLineLength} > ${safeLimit} chars). ` +
'Reduce quote-heavy content in the selected skills/design-system context, shorten the conversation, or pick an adapter with stdin support.',
commandLineLength,
limit: safeLimit,
};
}
// Resolve the absolute path of an agent's binary on the current PATH.
// Used by the chat handler so spawn() gets the same executable that
// detection reported as available — fixes Windows ENOENT when the bare
// bin name isn't on the child process's PATH (issue #10).
export function resolveAgentBin(id) {
const def = getAgentDef(id);
if (!def?.bin) return null;
return resolveAgentExecutable(def);
}
// Build the env passed to spawn() for a given agent adapter.
//
// The claude adapter strips ANTHROPIC_API_KEY so Claude Code's own auth
// resolution (claude login / Pro/Max plan) wins instead of silently
// falling back to API-key billing whenever the daemon happened to be
// launched from a shell that exported the key for SDK or scripting use.
// See issue #398.
//
// Windows env-var names are case-insensitive at the kernel level
// (`GetEnvironmentVariable`), but spreading `process.env` into a plain
// object loses Node's case-insensitive accessor — `Anthropic_Api_Key`
// would survive a literal `delete env.ANTHROPIC_API_KEY` and still reach
// the child. Iterate keys and compare case-insensitively to close that.
export function spawnEnvForAgent(agentId, baseEnv) {
const env = { ...baseEnv };
if (agentId !== 'claude') return env;
for (const key of Object.keys(env)) {
if (key.toUpperCase() === 'ANTHROPIC_API_KEY') delete env[key];
}
return env;
}
// Daemon's /api/chat needs to validate the user's model pick against the
// list we last surfaced to the UI. We keep a per-agent cache of the most
// recent live list (refreshed every detectAgents() call) and additionally
// trust any value present in the static fallback. A model that's neither
// gets rejected so a stale or hostile value can't smuggle arbitrary flags.
const liveModelCache = new Map();
export function rememberLiveModels(agentId, models) {
if (!Array.isArray(models)) return;
liveModelCache.set(
agentId,
new Set(models.map((m) => m && m.id).filter((id) => typeof id === 'string')),
);
}
export function isKnownModel(def, modelId) {
if (!modelId) return false;
const live = liveModelCache.get(def.id);
if (live && live.has(modelId)) return true;
if (Array.isArray(def.fallbackModels)) {
return def.fallbackModels.some((m) => m.id === modelId);
}
return false;
}
// Permit user-typed model ids that didn't appear in either the live
// listing or the static fallback (e.g. the user is on a brand-new model
// the CLI's `models` command hasn't surfaced yet). The CLI gets the value
// as a child-process arg — not a shell string — so injection isn't a
// concern, but we still reject anything that could be misread as a flag
// by a downstream CLI or that contains whitespace / control chars.
export function sanitizeCustomModel(id) {
if (typeof id !== 'string') return null;
const trimmed = id.trim();
if (trimmed.length === 0 || trimmed.length > 200) return null;
if (!/^[A-Za-z0-9][A-Za-z0-9._/:@-]*$/.test(trimmed)) return null;
return trimmed;
}