d1c5be112e
The reconnect + events.list backfill in c283d88 is correct but never ran:
the previous v2 Node Function was killed at ~27 s (well before the 20 min
reconnect budget could matter), so streams always died after the first MCP
tool batch.
Move the proxy to a Netlify Edge Function (Deno runtime) which has no
streaming-duration cap as long as we keep writing to the response body.
Same reconnect / backfill / dedupe-by-event-id pattern; same NDJSON wire
protocol to the browser. Implemented with plain fetch() against the
Anthropic REST API (npm packages on Edge are beta) so we have no SDK
runtime dependency.
Frontend now POSTs to /api/agent-proxy. The Anthropic SDK is removed
from the package; @netlify/edge-functions is added for ambient types.
Co-Authored-By: alex <alex@semipublic.co>
579 lines
18 KiB
TypeScript
579 lines
18 KiB
TypeScript
import type { Config, Context } from '@netlify/edge-functions'
|
|
|
|
/**
|
|
* Netlify Edge Function (Deno runtime).
|
|
*
|
|
* Proxies a request from the React frontend to a Claude Console-defined
|
|
* Managed Agent via the `/v1/sessions` endpoints. The agent's model,
|
|
* system prompt, tools, MCP servers, and skills are configured in the
|
|
* Console and referenced here by ID.
|
|
*
|
|
* This replaces the v2 Node Function at `netlify/functions/agent-proxy.js`.
|
|
* The Node Function process was killed at ~27 s by the platform — well
|
|
* before any SSE reconnect could fire — so the brief always stopped after
|
|
* the first MCP tool batch. Edge Functions run on Deno Deploy with a
|
|
* 40 s response-header timeout but no streaming-duration cap as long as
|
|
* we keep writing to the body, so the 20-minute reconnect budget below
|
|
* is actually reachable.
|
|
*
|
|
* Wire protocol (downstream to the browser): newline-delimited JSON
|
|
* (`application/x-ndjson`). One JSON object per line; the React side
|
|
* parses incrementally as bytes arrive. Object shapes:
|
|
*
|
|
* {"type":"text","text":"...markdown..."} // append to brief
|
|
* {"type":"status","kind":"thinking"}
|
|
* {"type":"status","kind":"tool_use","name":"...","label":"..."}
|
|
* {"type":"status","kind":"tool_result","ok":true}
|
|
* {"type":"status","kind":"tool_result","ok":false,"message":"..."}
|
|
* {"type":"status","kind":"session_error","message":"..."}
|
|
* {"type":"heartbeat"} // keep-alive only
|
|
* {"type":"done"} // session terminated cleanly
|
|
* {"type":"error","message":"..."} // unrecoverable
|
|
*
|
|
* Reconnect strategy: the upstream Anthropic SSE stream
|
|
* (`GET /v1/sessions/{id}/events/stream`) is not guaranteed to stay open
|
|
* for the full life of a multi-turn session — long inter-turn gaps (e.g.
|
|
* while MCP tools or the next model request are running) can trigger
|
|
* upstream / proxy read timeouts. When the upstream stream ends without
|
|
* a terminal `session.status_*` event we treat it as a drop, fetch any
|
|
* events we missed via the events-list endpoint (deduped by event id),
|
|
* reopen the live stream, and keep going — up to a wall-clock budget.
|
|
*
|
|
* We deliberately use plain `fetch()` against the Anthropic REST API
|
|
* rather than the `@anthropic-ai/sdk` npm package because npm support in
|
|
* Netlify Edge Functions is still in beta, and the SSE / list endpoints
|
|
* we need are easy to call directly.
|
|
*/
|
|
|
|
// All Managed Agents endpoints require this beta header.
|
|
const MANAGED_AGENTS_BETA = 'managed-agents-2026-04-01'
|
|
const ANTHROPIC_VERSION = '2023-06-01'
|
|
const ANTHROPIC_API_BASE = 'https://api.anthropic.com'
|
|
|
|
// Downstream keep-alive cadence. The browser's fetch reader will time
|
|
// out / appear stuck if no bytes flow for too long; we emit a
|
|
// `{"type":"heartbeat"}` line when the upstream is quiet.
|
|
const HEARTBEAT_MS = 10_000
|
|
|
|
// Hard ceiling on total wall-clock time the function will keep
|
|
// reconnecting upstream on behalf of a single browser request. Past this
|
|
// we emit an `error` line and close the stream so the user can retry
|
|
// rather than having the request hang indefinitely.
|
|
const RECONNECT_BUDGET_MS = 20 * 60 * 1000 // 20 minutes
|
|
|
|
// Short backoff between an upstream drop and the next reconnect attempt,
|
|
// so a tight error loop can't hammer the API.
|
|
const RECONNECT_BACKOFF_MS = 500
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Types (kept loose — Anthropic Managed Agents events are large and we
|
|
// only inspect a small subset of fields).
|
|
// ---------------------------------------------------------------------------
|
|
type ContentBlock = { type?: string; text?: string }
|
|
|
|
type AgentEvent = {
|
|
id?: string
|
|
type?: string
|
|
content?: ContentBlock[]
|
|
name?: string
|
|
input?: Record<string, unknown>
|
|
is_error?: boolean
|
|
stop_reason?: { type?: string }
|
|
error?: { message?: string }
|
|
}
|
|
|
|
type EventsListPage = {
|
|
data?: AgentEvent[]
|
|
has_more?: boolean
|
|
last_id?: string
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function authHeaders(apiKey: string): Record<string, string> {
|
|
return {
|
|
'x-api-key': apiKey,
|
|
'anthropic-version': ANTHROPIC_VERSION,
|
|
'anthropic-beta': MANAGED_AGENTS_BETA,
|
|
}
|
|
}
|
|
|
|
// A `session.status_idle` with `stop_reason: requires_action` means the
|
|
// agent is waiting on a client-side response (custom tool result, tool
|
|
// confirmation). For this app we don't expose custom tools, so it should
|
|
// never fire — but if it ever does we explicitly do NOT treat it as
|
|
// terminal and let the loop keep tailing.
|
|
function isTerminal(event: AgentEvent | undefined): boolean {
|
|
if (!event) return false
|
|
if (event.type === 'session.status_terminated') return true
|
|
if (event.type === 'session.status_idle') {
|
|
const t = event.stop_reason?.type
|
|
return t === 'end_turn' || t === 'retries_exhausted'
|
|
}
|
|
return false
|
|
}
|
|
|
|
function describeToolUse(event: AgentEvent): string {
|
|
const name = event.name || 'tool'
|
|
const input = (event.input || {}) as Record<string, unknown>
|
|
if (name === 'web_search' && typeof input.query === 'string') {
|
|
return `Searching the web: ${input.query}`
|
|
}
|
|
if (name === 'web_fetch' && typeof input.url === 'string') {
|
|
return `Fetching: ${input.url}`
|
|
}
|
|
if (event.type === 'agent.mcp_tool_use') {
|
|
const args = Object.entries(input)
|
|
.filter(([k]) => k !== 'limit')
|
|
.slice(0, 3)
|
|
.map(([k, v]) =>
|
|
typeof v === 'object' && v !== null
|
|
? `${k}=…`
|
|
: `${k}=${String(v).slice(0, 40)}`,
|
|
)
|
|
.join(', ')
|
|
return `${name}${args ? ` (${args})` : ''}`
|
|
}
|
|
return name
|
|
}
|
|
|
|
async function anthropicFetch(
|
|
path: string,
|
|
apiKey: string,
|
|
init: RequestInit = {},
|
|
): Promise<Response> {
|
|
const headers = {
|
|
...authHeaders(apiKey),
|
|
...(init.headers as Record<string, string> | undefined),
|
|
}
|
|
return await fetch(`${ANTHROPIC_API_BASE}${path}`, { ...init, headers })
|
|
}
|
|
|
|
// Parse the SSE stream into an async iterable of decoded `AgentEvent`
|
|
// objects. Each SSE message is `event: <type>\ndata: <json>\n\n`.
|
|
async function* parseSse(
|
|
stream: ReadableStream<Uint8Array>,
|
|
): AsyncGenerator<AgentEvent> {
|
|
const reader = stream.getReader()
|
|
const decoder = new TextDecoder('utf-8')
|
|
let buffer = ''
|
|
|
|
while (true) {
|
|
const { value, done } = await reader.read()
|
|
if (done) break
|
|
buffer += decoder.decode(value, { stream: true })
|
|
|
|
// SSE events are separated by a blank line (`\n\n`). Process each
|
|
// complete event.
|
|
let sep
|
|
while ((sep = buffer.indexOf('\n\n')) !== -1) {
|
|
const raw = buffer.slice(0, sep)
|
|
buffer = buffer.slice(sep + 2)
|
|
|
|
let dataPayload = ''
|
|
let evType: string | undefined
|
|
for (const line of raw.split('\n')) {
|
|
if (line.startsWith('event:')) {
|
|
evType = line.slice(6).trim()
|
|
} else if (line.startsWith('data:')) {
|
|
dataPayload += line.slice(5).trim()
|
|
}
|
|
// Anthropic doesn't multi-line `data:`, but tolerate it just in
|
|
// case — multiple `data:` lines are concatenated per the SSE
|
|
// spec.
|
|
}
|
|
if (!dataPayload) continue
|
|
try {
|
|
const parsed = JSON.parse(dataPayload) as AgentEvent
|
|
// `event:` header takes precedence; fall back to the `type`
|
|
// field inside `data` (Anthropic sets both).
|
|
if (evType && !parsed.type) parsed.type = evType
|
|
yield parsed
|
|
} catch {
|
|
// Malformed event — skip.
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Page through `GET /v1/sessions/{id}/events` to backfill anything we
|
|
// missed while disconnected. The endpoint returns `{data, has_more,
|
|
// last_id}`; we keep paging while `has_more` is true.
|
|
async function* listAllEvents(
|
|
sessionId: string,
|
|
apiKey: string,
|
|
): AsyncGenerator<AgentEvent> {
|
|
let after: string | undefined
|
|
for (;;) {
|
|
const params = new URLSearchParams({ order: 'asc', limit: '100' })
|
|
if (after) params.set('after_id', after)
|
|
const res = await anthropicFetch(
|
|
`/v1/sessions/${sessionId}/events?${params.toString()}`,
|
|
apiKey,
|
|
)
|
|
if (!res.ok) {
|
|
throw new Error(
|
|
`events.list returned ${res.status} ${res.statusText}`,
|
|
)
|
|
}
|
|
const page = (await res.json()) as EventsListPage
|
|
const items = page.data || []
|
|
for (const ev of items) yield ev
|
|
if (!page.has_more) break
|
|
const lastId = page.last_id ?? items[items.length - 1]?.id
|
|
if (!lastId) break // can't paginate without an anchor
|
|
after = lastId
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Handler
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const URL_REGEX =
|
|
/^(https?:\/\/)?([\w-]+\.)+[\w-]{2,}(\/[\w\-._~:/?#[\]@!$&'()*+,;=%]*)?$/i
|
|
|
|
export default async (req: Request, _context: Context) => {
|
|
if (req.method !== 'POST') {
|
|
return new Response('Method Not Allowed', { status: 405 })
|
|
}
|
|
|
|
let payload: {
|
|
stationName?: string
|
|
stationLocation?: string
|
|
stationWebsite?: string
|
|
}
|
|
try {
|
|
payload = await req.json()
|
|
} catch {
|
|
return new Response('Invalid JSON body', { status: 400 })
|
|
}
|
|
|
|
const {
|
|
stationName = '',
|
|
stationLocation = '',
|
|
stationWebsite = '',
|
|
} = payload || {}
|
|
|
|
if (!URL_REGEX.test(stationWebsite)) {
|
|
return new Response('Invalid station website URL', { status: 400 })
|
|
}
|
|
|
|
const apiKey = Netlify.env.get('ANTHROPIC_API_KEY')
|
|
const AGENT_ID = Netlify.env.get('ANTHROPIC_AGENT_ID')
|
|
const ENVIRONMENT_ID = Netlify.env.get('ANTHROPIC_ENVIRONMENT_ID')
|
|
const VAULT_IDS = (Netlify.env.get('ANTHROPIC_VAULT_IDS') || '')
|
|
.split(',')
|
|
.map((s) => s.trim())
|
|
.filter(Boolean)
|
|
|
|
if (!apiKey) {
|
|
return new Response('Server is missing ANTHROPIC_API_KEY', { status: 500 })
|
|
}
|
|
const missing: string[] = []
|
|
if (!AGENT_ID) missing.push('ANTHROPIC_AGENT_ID')
|
|
if (!ENVIRONMENT_ID) missing.push('ANTHROPIC_ENVIRONMENT_ID')
|
|
if (missing.length) {
|
|
return new Response(`Server is missing env var(s): ${missing.join(', ')}`, {
|
|
status: 500,
|
|
})
|
|
}
|
|
|
|
const userMessage = `Here is a new public media station intake:
|
|
|
|
- Station Name: ${stationName}
|
|
- Station Location: ${stationLocation}
|
|
- Station Website: ${stationWebsite}
|
|
|
|
Please follow your instructions to produce the funding outlook brief.`
|
|
|
|
// ----- Create the session -----
|
|
const createBody: Record<string, unknown> = {
|
|
agent: AGENT_ID,
|
|
environment_id: ENVIRONMENT_ID,
|
|
}
|
|
if (VAULT_IDS.length) createBody.vault_ids = VAULT_IDS
|
|
|
|
let sessionId: string
|
|
try {
|
|
const createRes = await anthropicFetch('/v1/sessions', apiKey, {
|
|
method: 'POST',
|
|
headers: { 'content-type': 'application/json' },
|
|
body: JSON.stringify(createBody),
|
|
})
|
|
if (!createRes.ok) {
|
|
const text = await createRes.text()
|
|
return new Response(
|
|
`Failed to create session: ${createRes.status} ${text}`,
|
|
{ status: 500 },
|
|
)
|
|
}
|
|
const created = (await createRes.json()) as { id?: string }
|
|
if (!created.id) {
|
|
return new Response('Failed to create session: no id returned', {
|
|
status: 500,
|
|
})
|
|
}
|
|
sessionId = created.id
|
|
} catch (err) {
|
|
return new Response(
|
|
`Failed to create session: ${(err as Error)?.message || err}`,
|
|
{ status: 500 },
|
|
)
|
|
}
|
|
|
|
// ----- Open the event stream BEFORE sending the user message -----
|
|
// (Stream-first ensures we don't miss any events the agent emits.)
|
|
const openStream = async (): Promise<ReadableStream<Uint8Array>> => {
|
|
const res = await anthropicFetch(
|
|
`/v1/sessions/${sessionId}/events/stream`,
|
|
apiKey,
|
|
{ method: 'GET', headers: { accept: 'text/event-stream' } },
|
|
)
|
|
if (!res.ok || !res.body) {
|
|
throw new Error(
|
|
`events.stream returned ${res.status} ${res.statusText}`,
|
|
)
|
|
}
|
|
return res.body
|
|
}
|
|
|
|
let upstream: ReadableStream<Uint8Array>
|
|
try {
|
|
upstream = await openStream()
|
|
} catch (err) {
|
|
return new Response(
|
|
`Failed to open session event stream: ${(err as Error)?.message || err}`,
|
|
{ status: 500 },
|
|
)
|
|
}
|
|
|
|
// ----- Send the kickoff user.message event -----
|
|
try {
|
|
const sendRes = await anthropicFetch(
|
|
`/v1/sessions/${sessionId}/events`,
|
|
apiKey,
|
|
{
|
|
method: 'POST',
|
|
headers: { 'content-type': 'application/json' },
|
|
body: JSON.stringify({
|
|
events: [
|
|
{
|
|
type: 'user.message',
|
|
content: [{ type: 'text', text: userMessage }],
|
|
},
|
|
],
|
|
}),
|
|
},
|
|
)
|
|
if (!sendRes.ok) {
|
|
const text = await sendRes.text()
|
|
return new Response(
|
|
`Failed to send user message: ${sendRes.status} ${text}`,
|
|
{ status: 500 },
|
|
)
|
|
}
|
|
} catch (err) {
|
|
return new Response(
|
|
`Failed to send user message: ${(err as Error)?.message || err}`,
|
|
{ status: 500 },
|
|
)
|
|
}
|
|
|
|
const encoder = new TextEncoder()
|
|
const seenEventIds = new Set<string>()
|
|
const deadline = Date.now() + RECONNECT_BUDGET_MS
|
|
|
|
const body = new ReadableStream<Uint8Array>({
|
|
async start(controller) {
|
|
let lastSendAt = Date.now()
|
|
const writeJson = (obj: unknown) => {
|
|
try {
|
|
controller.enqueue(encoder.encode(JSON.stringify(obj) + '\n'))
|
|
lastSendAt = Date.now()
|
|
} catch {
|
|
/* controller closed */
|
|
}
|
|
}
|
|
|
|
const heartbeat = setInterval(() => {
|
|
if (Date.now() - lastSendAt >= HEARTBEAT_MS) {
|
|
writeJson({ type: 'heartbeat' })
|
|
}
|
|
}, HEARTBEAT_MS / 2)
|
|
|
|
const handle = (event: AgentEvent) => {
|
|
switch (event.type) {
|
|
case 'agent.message': {
|
|
if (Array.isArray(event.content)) {
|
|
for (const block of event.content) {
|
|
if (block?.type === 'text' && block.text) {
|
|
writeJson({ type: 'text', text: block.text + '\n\n' })
|
|
}
|
|
}
|
|
}
|
|
break
|
|
}
|
|
|
|
case 'agent.thinking': {
|
|
writeJson({ type: 'status', kind: 'thinking' })
|
|
break
|
|
}
|
|
|
|
case 'agent.tool_use':
|
|
case 'agent.mcp_tool_use': {
|
|
writeJson({
|
|
type: 'status',
|
|
kind: 'tool_use',
|
|
name: event.name || 'tool',
|
|
label: describeToolUse(event),
|
|
})
|
|
break
|
|
}
|
|
|
|
case 'agent.tool_result':
|
|
case 'agent.mcp_tool_result': {
|
|
if (event.is_error) {
|
|
const msg =
|
|
(Array.isArray(event.content) && event.content[0]?.text) ||
|
|
'tool error'
|
|
writeJson({
|
|
type: 'status',
|
|
kind: 'tool_result',
|
|
ok: false,
|
|
message: String(msg).slice(0, 300),
|
|
})
|
|
} else {
|
|
writeJson({ type: 'status', kind: 'tool_result', ok: true })
|
|
}
|
|
break
|
|
}
|
|
|
|
case 'session.error': {
|
|
const msg = event.error?.message || 'unknown session error'
|
|
writeJson({
|
|
type: 'status',
|
|
kind: 'session_error',
|
|
message: msg,
|
|
})
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
let done = false
|
|
let currentStream: ReadableStream<Uint8Array> = upstream
|
|
let iteration = 0
|
|
|
|
try {
|
|
while (!done) {
|
|
iteration++
|
|
|
|
// On reconnect (every iteration after the first), reopen the
|
|
// live stream and backfill anything we missed during the gap.
|
|
if (iteration > 1) {
|
|
if (Date.now() >= deadline) {
|
|
writeJson({
|
|
type: 'error',
|
|
message: `Stream reconnect budget (${Math.round(
|
|
RECONNECT_BUDGET_MS / 60_000,
|
|
)} min) exhausted. The session may still be running — try again or check the Console.`,
|
|
})
|
|
break
|
|
}
|
|
|
|
await new Promise((r) => setTimeout(r, RECONNECT_BACKOFF_MS))
|
|
|
|
try {
|
|
currentStream = await openStream()
|
|
} catch (err) {
|
|
writeJson({
|
|
type: 'error',
|
|
message: `Failed to reopen event stream: ${(err as Error)?.message || err}`,
|
|
})
|
|
break
|
|
}
|
|
|
|
// Backfill: pull everything the session has emitted so far
|
|
// and dedupe by event.id.
|
|
try {
|
|
for await (const event of listAllEvents(sessionId, apiKey)) {
|
|
if (event.id && !seenEventIds.has(event.id)) {
|
|
seenEventIds.add(event.id)
|
|
handle(event)
|
|
}
|
|
// Terminal checks must run even for already-seen events,
|
|
// or a terminal event that came in via the backfill gets
|
|
// skipped and the loop never exits.
|
|
if (isTerminal(event)) {
|
|
done = true
|
|
break
|
|
}
|
|
}
|
|
} catch (err) {
|
|
writeJson({
|
|
type: 'status',
|
|
kind: 'session_error',
|
|
message: `Backfill failed: ${(err as Error)?.message || err}`,
|
|
})
|
|
// Don't break — fall through and try the live stream.
|
|
}
|
|
|
|
if (done) break
|
|
}
|
|
|
|
// Tail the currently-open live stream until it ends, errors,
|
|
// or hits a terminal session event.
|
|
try {
|
|
for await (const event of parseSse(currentStream)) {
|
|
if (event.id && !seenEventIds.has(event.id)) {
|
|
seenEventIds.add(event.id)
|
|
handle(event)
|
|
}
|
|
if (isTerminal(event)) {
|
|
done = true
|
|
break
|
|
}
|
|
}
|
|
} catch {
|
|
// Treat as transient. If it's actually persistent, the
|
|
// reopen + backfill on the next iteration will surface it
|
|
// (or burn down the reconnect budget cleanly).
|
|
}
|
|
}
|
|
|
|
if (done) {
|
|
writeJson({ type: 'done' })
|
|
}
|
|
} catch (err) {
|
|
writeJson({
|
|
type: 'error',
|
|
message: (err as Error)?.message || String(err),
|
|
})
|
|
} finally {
|
|
clearInterval(heartbeat)
|
|
try {
|
|
controller.close()
|
|
} catch {
|
|
/* ignore */
|
|
}
|
|
}
|
|
},
|
|
})
|
|
|
|
return new Response(body, {
|
|
status: 200,
|
|
headers: {
|
|
'Content-Type': 'application/x-ndjson; charset=utf-8',
|
|
'Cache-Control': 'no-cache, no-transform',
|
|
'X-Accel-Buffering': 'no',
|
|
},
|
|
})
|
|
}
|
|
|
|
export const config: Config = {
|
|
path: '/api/agent-proxy',
|
|
}
|