{
  "version": 3,
  "sources": ["../../src/app/ai/agents/evals2/scenarios/checklist/checklistBehavior.eval.ts"],
  "sourcesContent": ["import { assert } from \"@framerjs/shared\"\nimport { isAssistantToolCallPart } from \"../../../messages.ts\"\nimport type { ChecklistTask } from \"../../../tools/checklist.ts\"\nimport {\n\tCHECKLIST_COMPLETE_TASK_TOOL_NAME,\n\tCHECKLIST_CREATE_TOOL_NAME,\n\tCREATE_COMPONENT_FROM_FRAME_TOOL_NAME,\n} from \"../../../tools/toolNames.ts\"\nimport { agentEvalAsset } from \"../../harness/asset.ts\"\nimport { createEvalExportZipFixture } from \"../../harness/fixture.ts\"\nimport type { AgentEvalPartPosition, AgentEvalStepsHelper, AgentEvalToolCallName } from \"../../harness/helpers.ts\"\n\nconst checklistBehaviorFixture = createEvalExportZipFixture(\n\t\"checklist-behavior\",\n\tagentEvalAsset(\"./checklist-behavior.fixture.zip\"),\n\t{ runtimeTarget: \"cli\" },\n)\n\nfunction getToolCallPositions(\n\tsteps: AgentEvalStepsHelper,\n\ttoolName: AgentEvalToolCallName,\n\tstartStepIndex = 0,\n): AgentEvalPartPosition[] {\n\tconst positions: AgentEvalPartPosition[] = []\n\tfor (const { part, position } of steps.parts()) {\n\t\tif (position.stepIndex < startStepIndex) continue\n\t\tif (!isAssistantToolCallPart(part)) continue\n\t\tif (part.content.toolName !== toolName) continue\n\t\tpositions.push(position)\n\t}\n\treturn positions\n}\n\nfunction getCompletedTaskIdsAtOrAfter(steps: AgentEvalStepsHelper, startStepIndex: number): string[] {\n\tconst completedTaskIds: string[] = []\n\tfor (const { part, position } of steps.parts()) {\n\t\tif (position.stepIndex < startStepIndex) continue\n\t\tif (!isAssistantToolCallPart(part)) continue\n\t\tif (part.content.toolName !== CHECKLIST_COMPLETE_TASK_TOOL_NAME) continue\n\t\tcompletedTaskIds.push(part.content.input.taskId)\n\t}\n\treturn completedTaskIds\n}\n\nfunction getCompletedTaskIdsBefore(steps: AgentEvalStepsHelper, startStepIndex: number): string[] {\n\tconst completedTaskIds: string[] = []\n\tfor (const { part, position } of steps.parts()) {\n\t\tif (position.stepIndex >= startStepIndex) continue\n\t\tif (!isAssistantToolCallPart(part)) continue\n\t\tif (part.content.toolName !== CHECKLIST_COMPLETE_TASK_TOOL_NAME) continue\n\t\tcompletedTaskIds.push(part.content.input.taskId)\n\t}\n\treturn completedTaskIds\n}\n\nfunction expectChecklistTasksAreFocused(tasks: readonly ChecklistTask[]): void {\n\texpect(tasks.length).toBeGreaterThanOrEqual(3)\n\texpect(tasks.length).toBeLessThanOrEqual(8)\n\texpect(new Set(tasks.map(task => task.id)).size).toBe(tasks.length)\n\texpect(tasks.every(task => /^[a-z0-9-]+$/u.test(task.id))).toBe(true)\n\texpect(tasks.every(task => task.title.trim().split(/\\s+/u).length <= 5)).toBe(true)\n}\n\nevaluation(\n\t\"Checklist: Large Build Creates Plan\",\n\tchecklistBehaviorFixture,\n\t{\n\t\tid: \"checklist-large-build-creates-plan\",\n\t\trequestId: \"gGnZJUMl8\",\n\t\tstepIndex: 2,\n\t\tmaxSteps: 3,\n\t\trunOnStepFinish: true,\n\t\tstopWhenPassed: true,\n\t},\n\t({ agent, report, steps, tools }) => {\n\t\tconst request = agent.chatMessages.at(-1)\n\t\tconst designPlanPosition = steps.firstDesignPlanPosition()\n\t\tconst checklistCreatePosition = steps.firstToolCallPosition(CHECKLIST_CREATE_TOOL_NAME)\n\t\tconst firstProjectMutationPosition = steps.firstProjectMutationPosition()\n\t\tconst checklistCreate = tools.firstCall(CHECKLIST_CREATE_TOOL_NAME)\n\n\t\treport.correctness.required(\"continues captured website request\", () => {\n\t\t\texpect(request?.id).toBe(\"gGnZJUMl8\")\n\t\t\texpect(request?.parts).toEqual([\"Cr\u00E9e moi un site internet\"])\n\t\t})\n\t\treport.correctness.required(\"creates a checklist after the design plan\", () => {\n\t\t\tsteps.expectBefore(designPlanPosition, checklistCreatePosition)\n\t\t})\n\t\treport.correctness.required(\"creates the checklist before mutating the project\", () => {\n\t\t\texpect(checklistCreatePosition).toBeDefined()\n\t\t\tif (firstProjectMutationPosition === undefined) return\n\t\t\tsteps.expectBefore(checklistCreatePosition, firstProjectMutationPosition)\n\t\t})\n\t\treport.correctness.scored(\"creates focused checklist tasks\", () => {\n\t\t\tassert(checklistCreate !== undefined, \"Expected checklist_create to be called.\")\n\t\t\texpectChecklistTasksAreFocused(checklistCreate.input.tasks)\n\t\t})\n\t\treport.correctness.scored(\"does not create multiple checklists\", () => {\n\t\t\texpect(tools.calls(CHECKLIST_CREATE_TOOL_NAME)).toHaveLength(1)\n\t\t})\n\t},\n)\n\nevaluation(\n\t\"Checklist: Responsive Resume Keeps State\",\n\tchecklistBehaviorFixture,\n\t{\n\t\tid: \"checklist-responsive-resume-keeps-state\",\n\t\trequestId: \"ni0ztB7xN\",\n\t\tstepIndex: 5,\n\t\tmaxSteps: 4,\n\t\trunOnStepFinish: true,\n\t\tstopWhenPassed: true,\n\t},\n\t({ report, steps, tools }) => {\n\t\tconst replayStepCount = 5\n\t\tconst completedBeforeReplay = new Set(getCompletedTaskIdsBefore(steps, replayStepCount))\n\t\tconst completedAfterReplay = getCompletedTaskIdsAtOrAfter(steps, replayStepCount)\n\t\tconst repeatedCompletions = completedAfterReplay.filter(taskId => completedBeforeReplay.has(taskId))\n\n\t\ttools.reportReplayContinued(report, {\n\t\t\trequestId: \"ni0ztB7xN\",\n\t\t\tstepIndex: replayStepCount,\n\t\t})\n\t\treport.correctness.required(\"does not recreate the checklist after resume\", () => {\n\t\t\texpect(getToolCallPositions(steps, CHECKLIST_CREATE_TOOL_NAME, replayStepCount)).toHaveLength(0)\n\t\t})\n\t\treport.correctness.required(\"continues by completing a new remaining task\", () => {\n\t\t\texpect(completedAfterReplay.length).toBeGreaterThan(0)\n\t\t\texpect(repeatedCompletions).toHaveLength(0)\n\t\t})\n\t\treport.correctness.scored(\"does not produce command errors after resume\", () => {\n\t\t\texpect(tools.commandErrors()).toHaveLength(0)\n\t\t})\n\t},\n)\n\nevaluation(\n\t\"Checklist: Scoped Component Skips Checklist\",\n\tchecklistBehaviorFixture,\n\t{\n\t\tid: \"checklist-scoped-component-skips-checklist\",\n\t\trequestId: \"NUucNrr1d\",\n\t\tstepIndex: 0,\n\t\tmaxSteps: 3,\n\t},\n\t({ agent, report, tools }) => {\n\t\tconst request = agent.chatMessages.at(-1)\n\n\t\treport.correctness.required(\"continues captured component request\", () => {\n\t\t\texpect(request?.id).toBe(\"NUucNrr1d\")\n\t\t})\n\t\treport.correctness.required(\"uses the component creation tool\", () => {\n\t\t\texpect(tools.calls(CREATE_COMPONENT_FROM_FRAME_TOOL_NAME).length).toBeGreaterThan(0)\n\t\t})\n\t\treport.efficiency.required(\"does not create checklist for scoped component conversion\", () => {\n\t\t\texpect(tools.calls(CHECKLIST_CREATE_TOOL_NAME)).toHaveLength(0)\n\t\t\texpect(tools.calls(CHECKLIST_COMPLETE_TASK_TOOL_NAME)).toHaveLength(0)\n\t\t})\n\t},\n)\n"],
  "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAYA,IAAM,2BAA2B;AAAA,EAChC;AAAA,EACA,eAAe,kCAAkC;AAAA,EACjD,EAAE,eAAe,MAAM;AACxB;AAEA,SAAS,qBACR,OACA,UACA,iBAAiB,GACS;AAC1B,QAAM,YAAqC,CAAC;AAC5C,aAAW,EAAE,MAAM,SAAS,KAAK,MAAM,MAAM,GAAG;AAC/C,QAAI,SAAS,YAAY,eAAgB;AACzC,QAAI,CAAC,wBAAwB,IAAI,EAAG;AACpC,QAAI,KAAK,QAAQ,aAAa,SAAU;AACxC,cAAU,KAAK,QAAQ;AAAA,EACxB;AACA,SAAO;AACR;AAEA,SAAS,6BAA6B,OAA6B,gBAAkC;AACpG,QAAM,mBAA6B,CAAC;AACpC,aAAW,EAAE,MAAM,SAAS,KAAK,MAAM,MAAM,GAAG;AAC/C,QAAI,SAAS,YAAY,eAAgB;AACzC,QAAI,CAAC,wBAAwB,IAAI,EAAG;AACpC,QAAI,KAAK,QAAQ,aAAa,kCAAmC;AACjE,qBAAiB,KAAK,KAAK,QAAQ,MAAM,MAAM;AAAA,EAChD;AACA,SAAO;AACR;AAEA,SAAS,0BAA0B,OAA6B,gBAAkC;AACjG,QAAM,mBAA6B,CAAC;AACpC,aAAW,EAAE,MAAM,SAAS,KAAK,MAAM,MAAM,GAAG;AAC/C,QAAI,SAAS,aAAa,eAAgB;AAC1C,QAAI,CAAC,wBAAwB,IAAI,EAAG;AACpC,QAAI,KAAK,QAAQ,aAAa,kCAAmC;AACjE,qBAAiB,KAAK,KAAK,QAAQ,MAAM,MAAM;AAAA,EAChD;AACA,SAAO;AACR;AAEA,SAAS,+BAA+B,OAAuC;AAC9E,SAAO,MAAM,MAAM,EAAE,uBAAuB,CAAC;AAC7C,SAAO,MAAM,MAAM,EAAE,oBAAoB,CAAC;AAC1C,SAAO,IAAI,IAAI,MAAM,IAAI,UAAQ,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,KAAK,MAAM,MAAM;AAClE,SAAO,MAAM,MAAM,UAAQ,gBAAgB,KAAK,KAAK,EAAE,CAAC,CAAC,EAAE,KAAK,IAAI;AACpE,SAAO,MAAM,MAAM,UAAQ,KAAK,MAAM,KAAK,EAAE,MAAM,MAAM,EAAE,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI;AACnF;AAEA;AAAA,EACC;AAAA,EACA;AAAA,EACA;AAAA,IACC,IAAI;AAAA,IACJ,WAAW;AAAA,IACX,WAAW;AAAA,IACX,UAAU;AAAA,IACV,iBAAiB;AAAA,IACjB,gBAAgB;AAAA,EACjB;AAAA,EACA,CAAC,EAAE,OAAO,QAAQ,OAAO,MAAM,MAAM;AACpC,UAAM,UAAU,MAAM,aAAa,GAAG,EAAE;AACxC,UAAM,qBAAqB,MAAM,wBAAwB;AACzD,UAAM,0BAA0B,MAAM,sBAAsB,0BAA0B;AACtF,UAAM,+BAA+B,MAAM,6BAA6B;AACxE,UAAM,kBAAkB,MAAM,UAAU,0BAA0B;AAElE,WAAO,YAAY,SAAS,sCAAsC,MAAM;AACvE,aAAO,SAAS,EAAE,EAAE,KAAK,WAAW;AACpC,aAAO,SAAS,KAAK,EAAE,QAAQ,CAAC,8BAA2B,CAAC;AAAA,IAC7D,CAAC;AACD,WAAO,YAAY,SAAS,6CAA6C,MAAM;AAC9E,YAAM,aAAa,oBAAoB,uBAAuB;AAAA,IAC/D,CAAC;AACD,WAAO,YAAY,SAAS,qDAAqD,MAAM;AACtF,aAAO,uBAAuB,EAAE,YAAY;AAC5C,UAAI,iCAAiC,OAAW;AAChD,YAAM,aAAa,yBAAyB,4BAA4B;AAAA,IACzE,CAAC;AACD,WAAO,YAAY,OAAO,mCAAmC,MAAM;AAClE,aAAO,oBAAoB,QAAW,yCAAyC;AAC/E,qCAA+B,gBAAgB,MAAM,KAAK;AAAA,IAC3D,CAAC;AACD,WAAO,YAAY,OAAO,uCAAuC,MAAM;AACtE,aAAO,MAAM,MAAM,0BAA0B,CAAC,EAAE,aAAa,CAAC;AAAA,IAC/D,CAAC;AAAA,EACF;AACD;AAEA;AAAA,EACC;AAAA,EACA;AAAA,EACA;AAAA,IACC,IAAI;AAAA,IACJ,WAAW;AAAA,IACX,WAAW;AAAA,IACX,UAAU;AAAA,IACV,iBAAiB;AAAA,IACjB,gBAAgB;AAAA,EACjB;AAAA,EACA,CAAC,EAAE,QAAQ,OAAO,MAAM,MAAM;AAC7B,UAAM,kBAAkB;AACxB,UAAM,wBAAwB,IAAI,IAAI,0BAA0B,OAAO,eAAe,CAAC;AACvF,UAAM,uBAAuB,6BAA6B,OAAO,eAAe;AAChF,UAAM,sBAAsB,qBAAqB,OAAO,YAAU,sBAAsB,IAAI,MAAM,CAAC;AAEnG,UAAM,sBAAsB,QAAQ;AAAA,MACnC,WAAW;AAAA,MACX,WAAW;AAAA,IACZ,CAAC;AACD,WAAO,YAAY,SAAS,gDAAgD,MAAM;AACjF,aAAO,qBAAqB,OAAO,4BAA4B,eAAe,CAAC,EAAE,aAAa,CAAC;AAAA,IAChG,CAAC;AACD,WAAO,YAAY,SAAS,gDAAgD,MAAM;AACjF,aAAO,qBAAqB,MAAM,EAAE,gBAAgB,CAAC;AACrD,aAAO,mBAAmB,EAAE,aAAa,CAAC;AAAA,IAC3C,CAAC;AACD,WAAO,YAAY,OAAO,gDAAgD,MAAM;AAC/E,aAAO,MAAM,cAAc,CAAC,EAAE,aAAa,CAAC;AAAA,IAC7C,CAAC;AAAA,EACF;AACD;AAEA;AAAA,EACC;AAAA,EACA;AAAA,EACA;AAAA,IACC,IAAI;AAAA,IACJ,WAAW;AAAA,IACX,WAAW;AAAA,IACX,UAAU;AAAA,EACX;AAAA,EACA,CAAC,EAAE,OAAO,QAAQ,MAAM,MAAM;AAC7B,UAAM,UAAU,MAAM,aAAa,GAAG,EAAE;AAExC,WAAO,YAAY,SAAS,wCAAwC,MAAM;AACzE,aAAO,SAAS,EAAE,EAAE,KAAK,WAAW;AAAA,IACrC,CAAC;AACD,WAAO,YAAY,SAAS,oCAAoC,MAAM;AACrE,aAAO,MAAM,MAAM,qCAAqC,EAAE,MAAM,EAAE,gBAAgB,CAAC;AAAA,IACpF,CAAC;AACD,WAAO,WAAW,SAAS,6DAA6D,MAAM;AAC7F,aAAO,MAAM,MAAM,0BAA0B,CAAC,EAAE,aAAa,CAAC;AAC9D,aAAO,MAAM,MAAM,iCAAiC,CAAC,EAAE,aAAa,CAAC;AAAA,IACtE,CAAC;AAAA,EACF;AACD;",
  "names": []
}
