From de8956b9dd0c810c637150b1f582b8495a79712e Mon Sep 17 00:00:00 2001 From: Noah Horton Date: Fri, 16 Jan 2026 11:55:09 -0700 Subject: [PATCH 1/6] Add meta-command architecture for job entry points Each job now generates a single user-facing meta-command (e.g., /deepwork_jobs) that interprets user intent and routes to appropriate hidden step commands. Key changes: - Add `exposed` field to job.yml steps (default: false = hidden) - Step commands are hidden by default (underscore prefix: _job.step.md) - Steps with `exposed: true` remain visible (e.g., deepwork_jobs.learn.md) - New meta-command templates for Claude and Gemini - Generator creates meta-command first, then step commands User experience: - Before: /deepwork_jobs.define - After: /deepwork_jobs define a new job Co-Authored-By: Claude Opus 4.5 --- ...s.md => _add_platform.add_capabilities.md} | 0 ...mplement.md => _add_platform.implement.md} | 0 ....research.md => _add_platform.research.md} | 0 ...form.verify.md => _add_platform.verify.md} | 0 ...obs.define.md => _deepwork_jobs.define.md} | 0 ...plement.md => _deepwork_jobs.implement.md} | 0 .../{update.job.md => _update.job.md} | 0 .claude/commands/add_platform.md | 72 +++ .claude/commands/deepwork_jobs.md | 60 ++ .claude/commands/deepwork_policy.md | 56 ++ .claude/commands/update.md | 54 ++ .deepwork/jobs/deepwork_jobs/job.yml | 1 + ...pabilities.toml => _add_capabilities.toml} | 0 .../{implement.toml => _implement.toml} | 0 .../{research.toml => _research.toml} | 0 .../{verify.toml => _verify.toml} | 0 .gemini/commands/add_platform/index.toml | 79 +++ .../{define.toml => _define.toml} | 0 .../{implement.toml => _implement.toml} | 0 .gemini/commands/deepwork_jobs/index.toml | 66 +++ .gemini/commands/deepwork_jobs/refine.toml | 538 ------------------ .gemini/commands/deepwork_policy/index.toml | 60 ++ .../commands/update/{job.toml => _job.toml} | 0 .gemini/commands/update/index.toml | 58 ++ CHANGELOG.md | 14 + README.md | 41 +- doc/architecture.md | 246 ++++---- src/deepwork/core/adapters.py | 56 +- src/deepwork/core/generator.py | 117 +++- src/deepwork/core/parser.py | 4 + src/deepwork/schemas/job_schema.py | 5 + .../standard_jobs/deepwork_jobs/job.yml | 1 + .../claude/command-job-meta.md.jinja | 50 ++ .../gemini/command-job-meta.toml.jinja | 56 ++ tests/e2e/test_claude_code_integration.py | 18 +- tests/fixtures/jobs/exposed_step_job/job.yml | 29 + .../exposed_step_job/steps/exposed_step.md | 7 + .../exposed_step_job/steps/hidden_step.md | 7 + tests/integration/test_fruits_workflow.py | 15 +- tests/integration/test_full_workflow.py | 38 +- tests/integration/test_install_flow.py | 27 +- tests/unit/test_adapters.py | 55 +- tests/unit/test_generator.py | 82 ++- tests/unit/test_parser.py | 40 ++ 44 files changed, 1226 insertions(+), 726 deletions(-) rename .claude/commands/{add_platform.add_capabilities.md => _add_platform.add_capabilities.md} (100%) rename .claude/commands/{add_platform.implement.md => _add_platform.implement.md} (100%) rename .claude/commands/{add_platform.research.md => _add_platform.research.md} (100%) rename .claude/commands/{add_platform.verify.md => _add_platform.verify.md} (100%) rename .claude/commands/{deepwork_jobs.define.md => _deepwork_jobs.define.md} (100%) rename .claude/commands/{deepwork_jobs.implement.md => _deepwork_jobs.implement.md} (100%) rename .claude/commands/{update.job.md => _update.job.md} (100%) create mode 100644 .claude/commands/add_platform.md create mode 100644 .claude/commands/deepwork_jobs.md create mode 100644 .claude/commands/deepwork_policy.md create mode 100644 .claude/commands/update.md rename .gemini/commands/add_platform/{add_capabilities.toml => _add_capabilities.toml} (100%) rename .gemini/commands/add_platform/{implement.toml => _implement.toml} (100%) rename .gemini/commands/add_platform/{research.toml => _research.toml} (100%) rename .gemini/commands/add_platform/{verify.toml => _verify.toml} (100%) create mode 100644 .gemini/commands/add_platform/index.toml rename .gemini/commands/deepwork_jobs/{define.toml => _define.toml} (100%) rename .gemini/commands/deepwork_jobs/{implement.toml => _implement.toml} (100%) create mode 100644 .gemini/commands/deepwork_jobs/index.toml delete mode 100644 .gemini/commands/deepwork_jobs/refine.toml create mode 100644 .gemini/commands/deepwork_policy/index.toml rename .gemini/commands/update/{job.toml => _job.toml} (100%) create mode 100644 .gemini/commands/update/index.toml create mode 100644 src/deepwork/templates/claude/command-job-meta.md.jinja create mode 100644 src/deepwork/templates/gemini/command-job-meta.toml.jinja create mode 100644 tests/fixtures/jobs/exposed_step_job/job.yml create mode 100644 tests/fixtures/jobs/exposed_step_job/steps/exposed_step.md create mode 100644 tests/fixtures/jobs/exposed_step_job/steps/hidden_step.md diff --git a/.claude/commands/add_platform.add_capabilities.md b/.claude/commands/_add_platform.add_capabilities.md similarity index 100% rename from .claude/commands/add_platform.add_capabilities.md rename to .claude/commands/_add_platform.add_capabilities.md diff --git a/.claude/commands/add_platform.implement.md b/.claude/commands/_add_platform.implement.md similarity index 100% rename from .claude/commands/add_platform.implement.md rename to .claude/commands/_add_platform.implement.md diff --git a/.claude/commands/add_platform.research.md b/.claude/commands/_add_platform.research.md similarity index 100% rename from .claude/commands/add_platform.research.md rename to .claude/commands/_add_platform.research.md diff --git a/.claude/commands/add_platform.verify.md b/.claude/commands/_add_platform.verify.md similarity index 100% rename from .claude/commands/add_platform.verify.md rename to .claude/commands/_add_platform.verify.md diff --git a/.claude/commands/deepwork_jobs.define.md b/.claude/commands/_deepwork_jobs.define.md similarity index 100% rename from .claude/commands/deepwork_jobs.define.md rename to .claude/commands/_deepwork_jobs.define.md diff --git a/.claude/commands/deepwork_jobs.implement.md b/.claude/commands/_deepwork_jobs.implement.md similarity index 100% rename from .claude/commands/deepwork_jobs.implement.md rename to .claude/commands/_deepwork_jobs.implement.md diff --git a/.claude/commands/update.job.md b/.claude/commands/_update.job.md similarity index 100% rename from .claude/commands/update.job.md rename to .claude/commands/_update.job.md diff --git a/.claude/commands/add_platform.md b/.claude/commands/add_platform.md new file mode 100644 index 0000000..42cd87b --- /dev/null +++ b/.claude/commands/add_platform.md @@ -0,0 +1,72 @@ +--- +description: Add a new AI platform to DeepWork with adapter, templates, and tests +--- + +# add_platform + +You are executing the **add_platform** job. Add a new AI platform to DeepWork with adapter, templates, and tests + +A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork. + +This job guides you through four phases: +1. **Research**: Capture the platform's CLI configuration and hooks system documentation +2. **Add Capabilities**: Update the job schema and adapters with any new hook events +3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates +4. **Verify**: Ensure installation works correctly and produces expected files + +The workflow ensures consistency across all supported platforms and maintains +comprehensive test coverage for new functionality. + +**Important Notes**: +- Only hooks available on slash command definitions should be captured +- Each existing adapter must be updated when new hooks are added (typically with null values) +- Tests must achieve 100% coverage for any new functionality +- Installation verification confirms the platform integrates correctly with existing jobs + + +## Available Steps + +This job has 4 step(s): + +### research +**Research Platform Documentation**: Capture CLI configuration and hooks system documentation for the new platform +- Command: `_add_platform.research` +### add_capabilities +**Add Hook Capabilities**: Update job schema and adapters with any new hook events the platform supports +- Command: `_add_platform.add_capabilities` +- Requires: research +### implement +**Implement Platform Support**: Add platform adapter, templates, tests with 100% coverage, and README documentation +- Command: `_add_platform.implement` +- Requires: research, add_capabilities +### verify +**Verify Installation**: Set up platform directories and verify deepwork install works correctly +- Command: `_add_platform.verify` +- Requires: implement + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/add_platform` + +2. **Match intent to a step**: + - research: Capture CLI configuration and hooks system documentation for the new platform + - add_capabilities: Update job schema and adapters with any new hook events the platform supports + - implement: Add platform adapter, templates, tests with 100% coverage, and README documentation + - verify: Set up platform directories and verify deepwork install works correctly + +3. **Invoke the matched step** using the Skill tool: + ``` + Skill: + ``` + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Use AskUserQuestion to let them choose + +**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. + +## Context Files + +- Job definition: `.deepwork/jobs/add_platform/job.yml` \ No newline at end of file diff --git a/.claude/commands/deepwork_jobs.md b/.claude/commands/deepwork_jobs.md new file mode 100644 index 0000000..ba4e639 --- /dev/null +++ b/.claude/commands/deepwork_jobs.md @@ -0,0 +1,60 @@ +--- +description: DeepWork job management commands +--- + +# deepwork_jobs + +You are executing the **deepwork_jobs** job. DeepWork job management commands + +Core commands for managing DeepWork jobs. These commands help you define new multi-step +workflows and learn from running them. + +The `define` command guides you through an interactive process to create a new job by +asking structured questions about your workflow, understanding each step's inputs and outputs, +and generating all necessary files. + +The `learn` command reflects on conversations where DeepWork jobs were run, identifies +confusion or inefficiencies, and improves job instructions. It also captures bespoke +learnings specific to the current run into AGENTS.md files in the working folder. + + +## Available Steps + +This job has 3 step(s): + +### define +**Define Job Specification**: Create the job.yml specification file by understanding workflow requirements +- Command: `_deepwork_jobs.define` +### implement +**Implement Job Steps**: Generate instruction files for each step based on the job.yml specification +- Command: `_deepwork_jobs.implement` +- Requires: define +### learn +**Learn from Job Execution**: Reflect on conversation to improve job instructions and capture learnings +- Command: `deepwork_jobs.learn` + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/deepwork_jobs` + +2. **Match intent to a step**: + - define: Create the job.yml specification file by understanding workflow requirements + - implement: Generate instruction files for each step based on the job.yml specification + - learn: Reflect on conversation to improve job instructions and capture learnings + +3. **Invoke the matched step** using the Skill tool: + ``` + Skill: + ``` + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Use AskUserQuestion to let them choose + +**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. + +## Context Files + +- Job definition: `.deepwork/jobs/deepwork_jobs/job.yml` \ No newline at end of file diff --git a/.claude/commands/deepwork_policy.md b/.claude/commands/deepwork_policy.md new file mode 100644 index 0000000..01dfba9 --- /dev/null +++ b/.claude/commands/deepwork_policy.md @@ -0,0 +1,56 @@ +--- +description: Policy enforcement for AI agent sessions +--- + +# deepwork_policy + +You are executing the **deepwork_policy** job. Policy enforcement for AI agent sessions + +Manages policies that automatically trigger when certain files change during an AI agent session. +Policies help ensure that code changes follow team guidelines, documentation is updated, +and architectural decisions are respected. + +Policies are defined in a `.deepwork.policy.yml` file at the root of your project. Each policy +specifies: +- Trigger patterns: Glob patterns for files that, when changed, should trigger the policy +- Safety patterns: Glob patterns for files that, if also changed, mean the policy doesn't need to fire +- Instructions: What the agent should do when the policy triggers + +Example use cases: +- Update installation docs when configuration files change +- Require security review when authentication code is modified +- Ensure API documentation stays in sync with API code +- Remind developers to update changelogs + + +## Available Steps + +This job has 1 step(s): + +### define +**Define Policy**: Create or update policy entries in .deepwork.policy.yml +- Command: `_deepwork_policy.define` + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/deepwork_policy` + +2. **Match intent to a step**: + - define: Create or update policy entries in .deepwork.policy.yml + +3. **Invoke the matched step** using the Skill tool: + ``` + Skill: + ``` + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Use AskUserQuestion to let them choose + +**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. + +## Context Files + +- Job definition: `.deepwork/jobs/deepwork_policy/job.yml` \ No newline at end of file diff --git a/.claude/commands/update.md b/.claude/commands/update.md new file mode 100644 index 0000000..30c86a6 --- /dev/null +++ b/.claude/commands/update.md @@ -0,0 +1,54 @@ +--- +description: Update standard jobs in src/ and sync to installed locations +--- + +# update + +You are executing the **update** job. Update standard jobs in src/ and sync to installed locations + +A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs +(like `deepwork_jobs` and `deepwork_policy`) are source-controlled in +`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/` +or `.claude/commands/` directly. + +This job guides you through: +1. Identifying which standard job(s) to update from conversation context +2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`) +3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories +4. Verifying the sync completed successfully + +Use this job whenever you need to modify job.yml files, step instructions, or hooks +for any standard job in the DeepWork repository. + + +## Available Steps + +This job has 1 step(s): + +### job +**Update Standard Job**: Edit standard job source files and sync to installed locations +- Command: `_update.job` + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/update` + +2. **Match intent to a step**: + - job: Edit standard job source files and sync to installed locations + +3. **Invoke the matched step** using the Skill tool: + ``` + Skill: + ``` + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Use AskUserQuestion to let them choose + +**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. + +## Context Files + +- Job definition: `.deepwork/jobs/update/job.yml` \ No newline at end of file diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml index e95aa2c..c5c5a13 100644 --- a/.deepwork/jobs/deepwork_jobs/job.yml +++ b/.deepwork/jobs/deepwork_jobs/job.yml @@ -88,6 +88,7 @@ steps: name: "Learn from Job Execution" description: "Reflect on conversation to improve job instructions and capture learnings" instructions_file: steps/learn.md + exposed: true inputs: - name: job_name description: "Name of the job that was run (optional - will auto-detect from conversation)" diff --git a/.gemini/commands/add_platform/add_capabilities.toml b/.gemini/commands/add_platform/_add_capabilities.toml similarity index 100% rename from .gemini/commands/add_platform/add_capabilities.toml rename to .gemini/commands/add_platform/_add_capabilities.toml diff --git a/.gemini/commands/add_platform/implement.toml b/.gemini/commands/add_platform/_implement.toml similarity index 100% rename from .gemini/commands/add_platform/implement.toml rename to .gemini/commands/add_platform/_implement.toml diff --git a/.gemini/commands/add_platform/research.toml b/.gemini/commands/add_platform/_research.toml similarity index 100% rename from .gemini/commands/add_platform/research.toml rename to .gemini/commands/add_platform/_research.toml diff --git a/.gemini/commands/add_platform/verify.toml b/.gemini/commands/add_platform/_verify.toml similarity index 100% rename from .gemini/commands/add_platform/verify.toml rename to .gemini/commands/add_platform/_verify.toml diff --git a/.gemini/commands/add_platform/index.toml b/.gemini/commands/add_platform/index.toml new file mode 100644 index 0000000..787b56d --- /dev/null +++ b/.gemini/commands/add_platform/index.toml @@ -0,0 +1,79 @@ +# add_platform +# +# Add a new AI platform to DeepWork with adapter, templates, and tests +# +# Generated by DeepWork - do not edit manually + +description = "Add a new AI platform to DeepWork with adapter, templates, and tests" + +prompt = """ +# add_platform + +You are executing the **add_platform** job. Add a new AI platform to DeepWork with adapter, templates, and tests + +A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork. + +This job guides you through four phases: +1. **Research**: Capture the platform's CLI configuration and hooks system documentation +2. **Add Capabilities**: Update the job schema and adapters with any new hook events +3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates +4. **Verify**: Ensure installation works correctly and produces expected files + +The workflow ensures consistency across all supported platforms and maintains +comprehensive test coverage for new functionality. + +**Important Notes**: +- Only hooks available on slash command definitions should be captured +- Each existing adapter must be updated when new hooks are added (typically with null values) +- Tests must achieve 100% coverage for any new functionality +- Installation verification confirms the platform integrates correctly with existing jobs + + +## Available Steps + +This job has 4 step(s): + +### research +**Research Platform Documentation**: Capture CLI configuration and hooks system documentation for the new platform +- Command: `/add_platform:_research` +### add_capabilities +**Add Hook Capabilities**: Update job schema and adapters with any new hook events the platform supports +- Command: `/add_platform:_add_capabilities` +- Requires: research +### implement +**Implement Platform Support**: Add platform adapter, templates, tests with 100% coverage, and README documentation +- Command: `/add_platform:_implement` +- Requires: research, add_capabilities +### verify +**Verify Installation**: Set up platform directories and verify deepwork install works correctly +- Command: `/add_platform:_verify` +- Requires: implement + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/add_platform` + +2. **Match intent to a step**: + - research: Capture CLI configuration and hooks system documentation for the new platform + - add_capabilities: Update job schema and adapters with any new hook events the platform supports + - implement: Add platform adapter, templates, tests with 100% coverage, and README documentation + - verify: Set up platform directories and verify deepwork install works correctly + +3. **Invoke the matched step**: Tell the user to run the appropriate command: + - For research: `/add_platform:_research` + - For add_capabilities: `/add_platform:_add_capabilities` + - For implement: `/add_platform:_implement` + - For verify: `/add_platform:_verify` + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Let them choose + +**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. + +## Context Files + +- Job definition: `.deepwork/jobs/add_platform/job.yml` +""" \ No newline at end of file diff --git a/.gemini/commands/deepwork_jobs/define.toml b/.gemini/commands/deepwork_jobs/_define.toml similarity index 100% rename from .gemini/commands/deepwork_jobs/define.toml rename to .gemini/commands/deepwork_jobs/_define.toml diff --git a/.gemini/commands/deepwork_jobs/implement.toml b/.gemini/commands/deepwork_jobs/_implement.toml similarity index 100% rename from .gemini/commands/deepwork_jobs/implement.toml rename to .gemini/commands/deepwork_jobs/_implement.toml diff --git a/.gemini/commands/deepwork_jobs/index.toml b/.gemini/commands/deepwork_jobs/index.toml new file mode 100644 index 0000000..357ca56 --- /dev/null +++ b/.gemini/commands/deepwork_jobs/index.toml @@ -0,0 +1,66 @@ +# deepwork_jobs +# +# DeepWork job management commands +# +# Generated by DeepWork - do not edit manually + +description = "DeepWork job management commands" + +prompt = """ +# deepwork_jobs + +You are executing the **deepwork_jobs** job. DeepWork job management commands + +Core commands for managing DeepWork jobs. These commands help you define new multi-step +workflows and learn from running them. + +The `define` command guides you through an interactive process to create a new job by +asking structured questions about your workflow, understanding each step's inputs and outputs, +and generating all necessary files. + +The `learn` command reflects on conversations where DeepWork jobs were run, identifies +confusion or inefficiencies, and improves job instructions. It also captures bespoke +learnings specific to the current run into AGENTS.md files in the working folder. + + +## Available Steps + +This job has 3 step(s): + +### define +**Define Job Specification**: Create the job.yml specification file by understanding workflow requirements +- Command: `/deepwork_jobs:_define` +### implement +**Implement Job Steps**: Generate instruction files for each step based on the job.yml specification +- Command: `/deepwork_jobs:_implement` +- Requires: define +### learn +**Learn from Job Execution**: Reflect on conversation to improve job instructions and capture learnings +- Command: `/deepwork_jobs:learn` + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/deepwork_jobs` + +2. **Match intent to a step**: + - define: Create the job.yml specification file by understanding workflow requirements + - implement: Generate instruction files for each step based on the job.yml specification + - learn: Reflect on conversation to improve job instructions and capture learnings + +3. **Invoke the matched step**: Tell the user to run the appropriate command: + - For define: `/deepwork_jobs:_define` + - For implement: `/deepwork_jobs:_implement` + - For learn: `/deepwork_jobs:learn` + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Let them choose + +**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. + +## Context Files + +- Job definition: `.deepwork/jobs/deepwork_jobs/job.yml` +""" \ No newline at end of file diff --git a/.gemini/commands/deepwork_jobs/refine.toml b/.gemini/commands/deepwork_jobs/refine.toml deleted file mode 100644 index 1952328..0000000 --- a/.gemini/commands/deepwork_jobs/refine.toml +++ /dev/null @@ -1,538 +0,0 @@ -# deepwork_jobs:refine -# -# Modify an existing job definition -# -# Generated by DeepWork - do not edit manually - -description = "Modify an existing job definition" - -prompt = """ -# deepwork_jobs:refine - -**Standalone command** in the **deepwork_jobs** job - can be run anytime - -**Summary**: DeepWork job management commands - -## Job Overview - -Core commands for managing DeepWork jobs. These commands help you define new multi-step -workflows and refine existing ones. - -The `define` command guides you through an interactive process to create a new job by -asking detailed questions about your workflow, understanding each step's inputs and outputs, -and generating all necessary files. - -The `refine` command helps you modify existing jobs safely by understanding what you want -to change, validating the impact, and ensuring consistency across your workflow. - - - -## Instructions - -# Refine Existing DeepWork Job - -## Objective - -Help the user modify an existing DeepWork job definition by understanding what they want to change and ensuring the modifications maintain consistency and validity. - -## Task - -Guide the user through refining a job by first understanding their existing job, then what they want to change, and finally implementing those changes safely. - -### Step 1: Select and Load Job - -1. **List available jobs** - - Scan `.deepwork/jobs/` directory for installed jobs - - Display installed jobs with versions and descriptions - - Ask which job to refine - -2. **Load job definition** - - Read `.deepwork/jobs/[job_name]/job.yml` - - Parse and understand the current structure - -3. **Show current structure** - - Display all steps with their names and descriptions - - Show the dependency flow - - Highlight key inputs and outputs - -### Step 2: Understand Desired Changes - -Ask the user what they want to change: - -**Change Options:** -1. Add a new step to the workflow -2. Modify step instructions -3. Change step inputs/outputs -4. Update dependencies between steps -5. Update job metadata (description, version) -6. Remove a step -7. Add or modify stop hooks (quality validation) - -**For each change, ask clarifying questions:** -- Why do they want to make this change? -- How will it improve the workflow? -- Are there dependencies or side effects to consider? - -### Step 3: Make Changes Safely - -Based on the user's selection: - -#### Adding a Step - -1. **Gather step details** (same as define command) - - What does this step accomplish? - - What are the inputs? User parameters or file inputs? - - What outputs does it produce? - - What are the dependencies? - -2. **Determine placement** - - Where should it go in the workflow? - - Before which existing step? - - Or at the end? - -3. **Validate placement** - - Can't depend on later steps if inserted early - - File inputs must come from dependencies - - Check for circular dependencies - -4. **Update files** - - Update `job.yml` with new step - - Create step instructions file in `steps/[step_id].md` - - Prepare changelog entry describing the addition - -#### Modifying Step Instructions - -1. **Select step to modify** - - Show list of steps - - Ask which one to modify - -2. **Understand the change** - - What's not working with current instructions? - - What should be different? - - Show current instructions - -3. **Update instructions** - - Modify `.deepwork/jobs/[job_name]/steps/[step_id].md` - - Keep the same structure (Objective, Task, Process, Output Format, Quality Criteria) - - Prepare changelog entry describing the modification - -#### Changing Inputs/Outputs - -1. **Select step to modify** -2. **Show current inputs and outputs** -3. **Understand the change** - - Adding or removing? - - Why is this needed? - -4. **Validate impact** - - If removing output: check if other steps depend on it (BREAKING CHANGE) - - If adding file input: ensure from_step is in dependencies - - If removing input: ensure it's not critical - -5. **Update job.yml** - - Prepare changelog entry describing the input/output changes - -#### Updating Dependencies - -1. **Select step to modify** -2. **Show current dependency graph** -3. **Understand the change** - - Adding or removing dependency? - - Why? - -4. **Validate** - - Check for circular dependencies - - Ensure all file inputs have matching dependencies - - Ensure dependency chain makes logical sense - -5. **Update job.yml** - - Prepare changelog entry describing the dependency changes - -#### Updating Metadata - -1. **Ask what to change** - - Description? - - Version? - -2. **If version change, explain semantic versioning** - - Major (x.0.0): Breaking changes (removing steps, removing outputs) - - Minor (0.x.0): New features, backwards compatible (adding steps) - - Patch (0.0.x): Bug fixes, improvements - -3. **Update job.yml** - - Prepare changelog entry describing the metadata changes - -#### Removing a Step - -1. **Select step to remove** - -2. **CRITICAL: Validate safety** - - Check if other steps depend on this step - - Check if other steps use outputs from this step - - If dependencies exist: **WARN** and suggest updating dependents first - - This is a BREAKING CHANGE - requires major version bump - -3. **If safe to remove** - - Remove from `job.yml` - - Delete step instructions file - - Suggest version bump - - Prepare changelog entry describing the removal - -#### Adding or Modifying Stop Hooks - -Stop hooks provide quality validation loops that ensure step outputs meet criteria before completing. - -1. **Select step to modify** - - Show list of steps - - Ask which one to add/modify hooks for - -2. **Understand the need** - - What quality criteria should be validated? - - Is the output subjective (use prompt hook) or objective (use script hook)? - - Should validation happen automatically or only on specific conditions? - -3. **Choose hook type** - - **Prompt hooks** (recommended for most cases): - - Best for subjective quality criteria - - AI evaluates the output against criteria - - Example: "Verify the report is comprehensive and well-organized" - ```yaml - stop_hooks: - - prompt: | - Verify the output meets criteria: - 1. Contains all required sections - 2. Analysis is thorough - 3. Recommendations are actionable - ``` - - **Prompt file hooks**: - - For reusable or complex validation criteria - - Stores criteria in a separate markdown file - ```yaml - stop_hooks: - - prompt_file: hooks/quality_check.md - ``` - - **Script hooks**: - - For objective, programmatic validation - - Best for tests, linting, format checking - ```yaml - stop_hooks: - - script: hooks/run_tests.sh - ``` - -4. **Multiple hooks can be combined** - ```yaml - stop_hooks: - - script: hooks/lint.sh # First: objective checks - - prompt: "Verify content quality" # Then: subjective review - ``` - -5. **Update files** - - Add/modify `stop_hooks` array in job.yml - - Create hook files if using prompt_file or script types - - Update step instructions to match quality criteria - - Prepare changelog entry - -6. **Encourage prompt-based hooks** - - They leverage the AI's ability to understand context - - More flexible than rigid script checks - - Can evaluate nuanced quality criteria - -### Step 4: Update Changelog - -After making changes to the job.yml: - -1. **Add a changelog entry** - - Add a new entry to the `changelog` array in the job.yml - - Use the new version number - - List all changes made in this refinement - -2. **Changelog entry format**: - ```yaml - - version: "[new_version]" - changes: "[Description of all changes in this version]" - ``` - -3. **Example changelog entries**: - - "Added step: validate_positioning" - - "Modified step instructions for research_competitors to improve clarity and add quality criteria" - - "Removed step: duplicate_analysis (consolidated into comparative_analysis)" - - "Updated dependencies: positioning_recommendations now depends on validate_positioning" - - "Changed output filename: comparison_matrix.md → comparison_table.md" - - "Added step: validate_positioning; Updated dependencies for positioning_recommendations" - -### Step 5: Validate Changes - -After updating the changelog: - -1. **Review the updated structure** - - Show the complete updated workflow - - Highlight what changed - - Check for consistency - -2. **Validate job definition** - - No circular dependencies - - All file inputs match dependencies - - All step IDs unique - - All outputs defined - -3. **Confirm version update** - - Ensure version was bumped appropriately - - Breaking changes? Major version - - New features? Minor version - - Improvements? Patch version - -### Step 6: Sync and Reload - -1. **Run sync** - ```bash - deepwork sync - ``` - -2. **Reload commands** - - Instruct user to follow the reload instructions shown by `deepwork sync` - -3. **Provide summary** - - Show what changed - - List updated files - - Display the changelog entry - - Explain next steps - -## Safe Modification Patterns - -Guide users toward safe changes: - -### Safe Changes (Always OK) -- Adding steps at the end with new outputs -- Updating step instructions (content only) -- Changing job description -- Adding user inputs to a step -- Adding outputs (not removing) -- Patch version bumps - -### Potentially Breaking Changes (Warn User) -- Removing steps -- Removing outputs -- Removing inputs -- Changing dependencies -- Renaming outputs (other steps may reference them) -- Requires major or minor version bump - -### Always Validate -- Circular dependencies after any dependency change -- File inputs match dependencies after any change -- Step IDs remain unique -- At least one output per step - -## Example Dialog - -``` -Claude: I'll help you refine an existing job. Here are your installed jobs: - -1. competitive_research v1.0.0 - Systematic competitive analysis workflow -2. blog_post v1.0.0 - Blog post creation workflow - -Which job would you like to refine? - -User: 1 - -Claude: Loading competitive_research v1.0.0... - -Current structure: -- Step 1: identify_competitors - Identify 5-7 competitors in target market -- Step 2: research_competitors - Deep research on each competitor -- Step 3: comparative_analysis - Side-by-side comparison matrix -- Step 4: positioning_recommendations - Strategic positioning report - -What would you like to change? -1. Add a new step -2. Modify step instructions -3. Change inputs/outputs -4. Update dependencies -5. Update job metadata -6. Remove a step - -User: 1 - -Claude: Great! Let's add a new step. First, help me understand: - -1. What should this new step accomplish? -2. Where should it fit in the workflow? - - Should it go between existing steps or at the end? - -User: I want to add a step that validates our product positioning before the final recommendations. It should go between step 3 and step 4. - -Claude: Excellent idea. So this would be: -- Step 4: validate_positioning -- Step 5: positioning_recommendations (renumbered) - -For this validation step: -1. What inputs does it need? I assume it needs the comparison_matrix.md from step 3? -2. What should it output? -3. What exactly should this validation check for? - -User: Yes, it needs the comparison matrix. It should output a validation_report.md that identifies gaps in our positioning. - -Claude: Perfect. Let me confirm the details: - -**New Step: validate_positioning** -- Name: "Validate Product Positioning" -- Description: "Identify gaps and opportunities in current positioning" -- Inputs: comparison_matrix.md (from comparative_analysis step) -- Outputs: validation_report.md -- Dependencies: comparative_analysis -- Position: Between step 3 and 4 - -Should the positioning_recommendations step also use the validation_report.md as input? - -User: Yes, definitely - -Claude: Great! So I'll also update the positioning_recommendations step to include validation_report.md as an input and add validate_positioning as a dependency. - -This is a new feature, so I'll bump the version to 1.1.0 (minor version). - -Creating the new step... ✓ - -Updated files: -- .deepwork/jobs/competitive_research/job.yml (added step, updated dependencies, version → 1.1.0, updated changelog) -- .deepwork/jobs/competitive_research/steps/validate_positioning.md (created) - -Changelog entry added: -```yaml -- version: "1.1.0" - changes: "Added step: validate_positioning between comparative_analysis and positioning_recommendations; Updated dependencies for positioning_recommendations" -``` - -Now run: -```bash -deepwork sync -``` - -Then follow the reload instructions shown by `deepwork sync` and you'll have: -- /competitive_research.validate_positioning (new!) -- All other commands updated -``` - -## Validation Rules - -Enforce the same rules as job definition: -- Job name: lowercase, underscores -- Version: semantic versioning -- Step IDs: unique within job -- Dependencies: must reference existing step IDs -- File inputs: `from_step` must be in dependencies -- At least one output per step -- No circular dependencies - -## Error Handling - -If issues arise, provide clear guidance: -- **Dependency conflict**: "Step X depends on step Y which you're trying to remove. You must update step X first, or remove both steps." -- **Circular dependency**: "Adding this dependency would create a cycle: A → B → C → A. Please choose a different dependency structure." -- **Missing file input**: "Step X requires file.md from step Y, but Y is not in its dependencies. I'll add Y to the dependencies." -- **Breaking change**: "Removing this output is a breaking change. Other steps depend on it. I recommend against this change unless you update the dependent steps first." - -## Changelog Entry Format - -Instead of creating a separate refinement_summary.md file, add the changes directly to the job.yml changelog section. This creates a permanent version history within the job definition itself. - -**Location**: `.deepwork/jobs/[job_name]/job.yml` - -**Add to the `changelog` array**: - -```yaml -changelog: - - version: "1.0.0" - changes: "Initial job creation" - - version: "[new_version]" - changes: "[Concise description of all changes in this version]" -``` - -**Guidelines for changelog entries**: -- Be concise but descriptive -- Use action verbs (Added, Modified, Removed, Updated, Changed, Fixed) -- Reference specific step names when relevant -- For breaking changes, prefix with "BREAKING:" -- If multiple changes, separate with semicolons or use clear phrasing - -**Examples**: -- "Added step: validate_positioning between comparative_analysis and positioning_recommendations" -- "Modified step instructions for research_competitors to improve clarity and add quality criteria" -- "Removed step: duplicate_analysis (consolidated into comparative_analysis)" -- "Updated dependencies: positioning_recommendations now depends on validate_positioning" -- "Changed output filename: comparison_matrix.md → comparison_table.md" -- "BREAKING: Removed output file shared_data.json from identify_competitors step" -- "Fixed circular dependency between steps A and B" -- "Updated job description to reflect new validation phase" -- "Added validate_positioning step; Updated dependencies for positioning_recommendations" - -## Quality Criteria - -- Changes maintain job consistency -- Dependencies are logically valid -- Version bump follows semantic versioning -- No circular dependencies introduced -- User understands impact of changes -- Breaking changes are clearly communicated - - -## Inputs - -### User Parameters - -Please gather the following information from the user: -- **job_name**: Name of the job to refine - - -## Work Branch Management - -All work for this job should be done on a dedicated work branch: - -1. **Check current branch**: - - If already on a work branch for this job (format: `deepwork/deepwork_jobs-[instance]-[date]`), continue using it - - If on main/master, create a new work branch - -2. **Create work branch** (if needed): - ```bash - git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d) - ``` - Replace `[instance]` with a descriptive identifier (e.g., `acme`, `q1-launch`, etc.) - -## Output Requirements - -Create the following output(s): -- `job.yml` - -Ensure all outputs are: -- Well-formatted and complete -- Ready for review or use by subsequent steps - -## Completion - -After completing this step: - -1. **Verify outputs**: Confirm all required files have been created - -2. **Inform the user**: - - The refine command is complete - - Outputs created: job.yml - - This command can be run again anytime to make further changes - -## Command Complete - -This is a standalone command that can be run anytime. The outputs are ready for use. - -Consider: -- Reviewing the outputs -- Running `deepwork sync` if job definitions were changed -- Re-running this command later if further changes are needed - ---- - -## Context Files - -- Job definition: `.deepwork/jobs/deepwork_jobs/job.yml` -- Step instructions: `.deepwork/jobs/deepwork_jobs/steps/refine.md` -""" \ No newline at end of file diff --git a/.gemini/commands/deepwork_policy/index.toml b/.gemini/commands/deepwork_policy/index.toml new file mode 100644 index 0000000..e415dca --- /dev/null +++ b/.gemini/commands/deepwork_policy/index.toml @@ -0,0 +1,60 @@ +# deepwork_policy +# +# Policy enforcement for AI agent sessions +# +# Generated by DeepWork - do not edit manually + +description = "Policy enforcement for AI agent sessions" + +prompt = """ +# deepwork_policy + +You are executing the **deepwork_policy** job. Policy enforcement for AI agent sessions + +Manages policies that automatically trigger when certain files change during an AI agent session. +Policies help ensure that code changes follow team guidelines, documentation is updated, +and architectural decisions are respected. + +Policies are defined in a `.deepwork.policy.yml` file at the root of your project. Each policy +specifies: +- Trigger patterns: Glob patterns for files that, when changed, should trigger the policy +- Safety patterns: Glob patterns for files that, if also changed, mean the policy doesn't need to fire +- Instructions: What the agent should do when the policy triggers + +Example use cases: +- Update installation docs when configuration files change +- Require security review when authentication code is modified +- Ensure API documentation stays in sync with API code +- Remind developers to update changelogs + + +## Available Steps + +This job has 1 step(s): + +### define +**Define Policy**: Create or update policy entries in .deepwork.policy.yml +- Command: `/deepwork_policy:_define` + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/deepwork_policy` + +2. **Match intent to a step**: + - define: Create or update policy entries in .deepwork.policy.yml + +3. **Invoke the matched step**: Tell the user to run the appropriate command: + - For define: `/deepwork_policy:_define` + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Let them choose + +**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. + +## Context Files + +- Job definition: `.deepwork/jobs/deepwork_policy/job.yml` +""" \ No newline at end of file diff --git a/.gemini/commands/update/job.toml b/.gemini/commands/update/_job.toml similarity index 100% rename from .gemini/commands/update/job.toml rename to .gemini/commands/update/_job.toml diff --git a/.gemini/commands/update/index.toml b/.gemini/commands/update/index.toml new file mode 100644 index 0000000..d291922 --- /dev/null +++ b/.gemini/commands/update/index.toml @@ -0,0 +1,58 @@ +# update +# +# Update standard jobs in src/ and sync to installed locations +# +# Generated by DeepWork - do not edit manually + +description = "Update standard jobs in src/ and sync to installed locations" + +prompt = """ +# update + +You are executing the **update** job. Update standard jobs in src/ and sync to installed locations + +A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs +(like `deepwork_jobs` and `deepwork_policy`) are source-controlled in +`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/` +or `.claude/commands/` directly. + +This job guides you through: +1. Identifying which standard job(s) to update from conversation context +2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`) +3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories +4. Verifying the sync completed successfully + +Use this job whenever you need to modify job.yml files, step instructions, or hooks +for any standard job in the DeepWork repository. + + +## Available Steps + +This job has 1 step(s): + +### job +**Update Standard Job**: Edit standard job source files and sync to installed locations +- Command: `/update:_job` + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/update` + +2. **Match intent to a step**: + - job: Edit standard job source files and sync to installed locations + +3. **Invoke the matched step**: Tell the user to run the appropriate command: + - For job: `/update:_job` + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Let them choose + +**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. + +## Context Files + +- Job definition: `.deepwork/jobs/update/job.yml` +""" \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 4124344..7f15e4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,13 +18,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `rules_queue.py`: Queue system for rule state persistence - `command_executor.py`: Command action execution with variable substitution - Updated `rules_check.py` hook to use v2 system with queue-based deduplication +- **Meta-command architecture**: Each job now generates a single user-facing meta-command (e.g., `/deepwork_jobs`) that interprets user intent and routes to appropriate steps +- New `exposed` field in job.yml steps to control visibility (steps are hidden by default) +- New templates: `command-job-meta.md.jinja` for Claude, `command-job-meta.toml.jinja` for Gemini +- `generate_meta_command()` method in CommandGenerator +- `get_meta_command_filename()` and `get_step_command_filename()` methods in adapters ### Changed - Documentation updated with v2 rules examples and configuration +- Step commands are now hidden by default (underscore prefix, e.g., `_deepwork_jobs.define.md`) +- Steps with `exposed: true` in job.yml remain visible (e.g., `deepwork_jobs.learn.md`) +- Renamed `get_command_filename()` to `get_step_command_filename()` with new `exposed` parameter +- `generate_all_commands()` now generates meta-command first, then step commands +- Updated deepwork_jobs job: `learn` step marked as `exposed: true` ### Removed - v1 rules format (`.deepwork.rules.yml`) - now only v2 frontmatter markdown format is supported +### Migration +- Users should invoke jobs via meta-commands: `/deepwork_jobs define a new job` instead of `/deepwork_jobs.define` +- Exposed steps remain directly accessible: `/deepwork_jobs.learn` + ## [0.3.0] - 2026-01-16 ### Added diff --git a/README.md b/README.md index 9681667..d3ce435 100644 --- a/README.md +++ b/README.md @@ -70,39 +70,45 @@ Jobs are multi-step workflows where each Step has clear input and output artifac The process of defining a job itself is actually a DeepWork job. You can see it at `.deepwork/jobs/deepwork_jobs/`. -To start the process, just run the first Step in the job: +To start the process, just run the job's meta-command: ``` -/deepwork_jobs.define +/deepwork_jobs define a new job ``` -Follow the interactive prompts to: +The meta-command routes to the appropriate step. Follow the interactive prompts to: - Name your job - Define steps with inputs/outputs - Specify dependencies between steps It will also prompt you to go on the the next Step in the job. -### 2. Execute Steps +### 2. Execute Jobs -Run individual steps of your job: +Run your job using its meta-command: ``` -/your_job_name.step_1 +/your_job_name start the workflow ``` -The AI will: +The meta-command analyzes your intent and routes to the appropriate step. The AI will: - Create a work branch - Execute the step's instructions - Generate required outputs - Guide you to the next step -### 3. Manage Workflows +You can also run specific steps directly if they're exposed: -Use the refine skill to update existing jobs: +``` +/your_job_name.exposed_step +``` + +### 3. Learn from Execution + +After running a job, use the learn command to improve it: ``` -/deepwork_jobs.refine +/deepwork_jobs.learn ``` ## Example: Competitive Research Workflow @@ -153,11 +159,13 @@ steps: Usage: ``` -/competitive_research.identify_competitors +/competitive_research identify competitors +# Meta-command routes to identify_competitors step # AI creates work branch and asks for market_segment, product_category # Generates competitors.md -/competitive_research.primary_research +/competitive_research continue with primary research +# Meta-command routes to primary_research step # AI reads competitors.md # Generates primary_research.md and competitor_profiles/ ``` @@ -187,12 +195,15 @@ your-project/ │ └── steps/ # Step instructions ├── .claude/ # Claude Code commands (auto-generated) │ └── commands/ -│ ├── deepwork_jobs.define.md -│ └── job_name.step_name.md +│ ├── deepwork_jobs.md # Meta-command (user-facing) +│ ├── _deepwork_jobs.define.md # Hidden step (underscore prefix) +│ ├── deepwork_jobs.learn.md # Exposed step (no underscore) +│ └── job_name.md # Job meta-commands └── .gemini/ # Gemini CLI commands (auto-generated) └── commands/ └── job_name/ - └── step_name.toml + ├── index.toml # Meta-command + └── _step_name.toml # Hidden steps ``` **Note**: Work outputs are created on dedicated Git branches (e.g., `deepwork/job_name-instance-date`), not in a separate directory. diff --git a/doc/architecture.md b/doc/architecture.md index 2940097..a6c892b 100644 --- a/doc/architecture.md +++ b/doc/architecture.md @@ -59,8 +59,11 @@ deepwork/ # DeepWork tool repository │ │ └── rules_check.py # Cross-platform rule evaluation hook │ ├── templates/ # Command templates for each platform │ │ ├── claude/ -│ │ │ └── command-job-step.md.jinja +│ │ │ ├── command-job-meta.md.jinja # Meta-command template +│ │ │ └── command-job-step.md.jinja # Step command template │ │ ├── gemini/ +│ │ │ ├── command-job-meta.toml.jinja +│ │ │ └── command-job-step.toml.jinja │ │ └── copilot/ │ ├── standard_jobs/ # Built-in job definitions │ │ ├── deepwork_jobs/ @@ -166,10 +169,21 @@ class AgentAdapter(ABC): display_name: ClassVar[str] # "Claude Code" config_dir: ClassVar[str] # ".claude" commands_dir: ClassVar[str] = "commands" + command_template: ClassVar[str] = "command-job-step.md.jinja" + meta_command_template: ClassVar[str] = "command-job-meta.md.jinja" # Mapping from generic hook names to platform-specific names hook_name_mapping: ClassVar[dict[CommandLifecycleHook, str]] = {} + def get_meta_command_filename(self, job_name: str) -> str: + """Get filename for job's meta-command.""" + return f"{job_name}.md" + + def get_step_command_filename(self, job_name: str, step_id: str, exposed: bool = False) -> str: + """Get filename for step command. Hidden by default (underscore prefix).""" + prefix = "" if exposed else "_" + return f"{prefix}{job_name}.{step_id}.md" + def detect(self, project_root: Path) -> bool: """Check if this platform is available in the project.""" @@ -224,46 +238,56 @@ Generates AI-platform-specific command files from job definitions. This component is called by the `sync` command to regenerate all commands: 1. Reads the job definition from `.deepwork/jobs/[job-name]/job.yml` 2. Loads platform-specific templates -3. Generates command files for each step in the job -4. Writes commands to the AI platform's commands directory +3. Generates the meta-command (job-level entry point) +4. Generates step command files (hidden by default, exposed if `exposed: true`) +5. Writes commands to the AI platform's commands directory + +**Meta-Command Architecture**: + +Each job gets a single user-facing meta-command (e.g., `/deepwork_jobs`) that interprets user intent and routes to appropriate hidden step commands. Steps are hidden by default (underscore prefix) unless marked `exposed: true` in job.yml. + +```yaml +# In job.yml +steps: + - id: define + name: "Define Job" + # ... (hidden by default → _deepwork_jobs.define.md) + + - id: learn + name: "Learn from Execution" + exposed: true # Visible to users → deepwork_jobs.learn.md + # ... +``` **Example Generation Flow**: ```python class CommandGenerator: def generate_all_commands(self, job: JobDefinition, - platform: PlatformConfig, + adapter: AgentAdapter, output_dir: Path) -> list[Path]: - """Generate command files for all steps in a job.""" + """Generate all command files: meta-command and step commands.""" command_paths = [] - for step_index, step in enumerate(job.steps): - # Load step instructions - instructions = read_file(job.job_dir / step.instructions_file) - - # Build template context - context = { - "job_name": job.name, - "step_id": step.id, - "step_name": step.name, - "step_number": step_index + 1, - "total_steps": len(job.steps), - "instructions_content": instructions, - "user_inputs": [inp for inp in step.inputs if inp.is_user_input()], - "file_inputs": [inp for inp in step.inputs if inp.is_file_input()], - "outputs": step.outputs, - "dependencies": step.dependencies, - } - - # Render template - template = env.get_template("command-job-step.md.jinja") - rendered = template.render(**context) - - # Write to platform's commands directory - command_path = output_dir / platform.config_dir / platform.commands_dir / f"{job.name}.{step.id}.md" - write_file(command_path, rendered) - command_paths.append(command_path) + # 1. Generate meta-command (job-level entry point) + meta_path = self.generate_meta_command(job, adapter, output_dir) + command_paths.append(meta_path) + + # 2. Generate step commands (hidden by default) + for step in job.steps: + step_path = self.generate_step_command(job, step, adapter, output_dir) + command_paths.append(step_path) return command_paths + + def generate_step_command(self, job, step, adapter, output_dir): + # ... build context ... + + # Write to platform's commands directory + # Hidden by default (underscore prefix) unless step.exposed is True + command_filename = adapter.get_step_command_filename(job.name, step.id, step.exposed) + command_path = output_dir / adapter.config_dir / adapter.commands_dir / command_filename + write_file(command_path, rendered) + return command_path ``` --- @@ -280,11 +304,14 @@ my-project/ # User's project (target) ├── .claude/ # Claude Code directory │ ├── settings.json # Includes installed hooks │ └── commands/ # Command files -│ ├── deepwork_jobs.define.md # Core DeepWork commands -│ ├── deepwork_jobs.implement.md -│ ├── deepwork_jobs.refine.md -│ ├── deepwork_rules.define.md # Rule management -│ ├── competitive_research.identify_competitors.md +│ ├── deepwork_jobs.md # Meta-command (user-facing entry point) +│ ├── _deepwork_jobs.define.md # Hidden step (underscore prefix) +│ ├── _deepwork_jobs.implement.md # Hidden step +│ ├── deepwork_jobs.learn.md # Exposed step (no underscore) +│ ├── deepwork_rules.md # Rules meta-command +│ ├── _deepwork_rules.define.md # Hidden step +│ ├── competitive_research.md # User job meta-command +│ ├── _competitive_research.identify_competitors.md # Hidden steps │ └── ... ├── .deepwork/ # DeepWork configuration │ ├── config.yml # Platform config @@ -531,7 +558,12 @@ Create `competitors.md` with this structure: When the job is defined and `sync` is run, DeepWork generates command files. Example for Claude Code: -`.deepwork/jobs/competitive_research` a step called `identify_competitors` will generate a command file at `.claude/commands/competitive_research.identify_competitors.md`: +`.deepwork/jobs/competitive_research` generates: +- Meta-command: `.claude/commands/competitive_research.md` (user-facing entry point) +- Hidden step commands: `.claude/commands/_competitive_research.identify_competitors.md` (prefixed with underscore) +- Exposed step commands: `.claude/commands/competitive_research.step_name.md` (if `exposed: true` in job.yml) + +The meta-command routes user intent to the appropriate step command via the Skill tool. # Part 3: Runtime Execution Model @@ -552,56 +584,56 @@ This section describes how AI agents (like Claude Code) actually execute jobs us 2. **Define a Job** (once per job type): ``` # In Claude Code - User: /deepwork_jobs.define + User: /deepwork_jobs define a competitive research workflow - Claude: I'll help you define a new job. What type of work do you want to define? - - User: Competitive research + Claude: [Meta-command routes to /_deepwork_jobs.define] + I'll help you define a new job. What type of work do you want to define? [Interactive dialog to define all the steps] Claude: ✓ Job 'competitive_research' created with 5 steps - Run /deepwork_jobs.implement to generate command files - Then run 'deepwork sync' to install commands + Run /deepwork_jobs implement to generate command files - User: /deepwork_jobs.implement + User: /deepwork_jobs implement the job - Claude: [Generates step instruction files] + Claude: [Meta-command routes to /_deepwork_jobs.implement] + [Generates step instruction files] [Runs deepwork sync] ✓ Commands installed to .claude/commands/ - Run /competitive_research.identify_competitors to start + Run /competitive_research to start ``` 3. **Execute a Job Instance** (each time you need to do the work): ``` # In Claude Code - User: /competitive_research.identify_competitors + User: /competitive_research identify competitors in enterprise SaaS - Claude: Starting competitive research job... + Claude: [Meta-command routes to /_competitive_research.identify_competitors] + Starting competitive research job... Created branch: deepwork/competitive_research-acme-2026-01-11 Please provide: - - Market segment: ? + - Market segment: Enterprise SaaS - Product category: ? - User: Market segment: Enterprise SaaS - Product category: Project Management + User: Product category: Project Management Claude: [Performs research using web tools, analysis, etc.] ✓ Created competitors.md Found 8 direct competitors and 4 indirect competitors. - Review the file and run /competitive_research.primary_research when ready. + Review the file and run /competitive_research continue to primary research when ready. User: [Reviews competitors.md, maybe edits it] - /competitive_research.primary_research + /competitive_research continue to primary research - Claude: Continuing competitive research (step 2/5)... + Claude: [Meta-command routes to /_competitive_research.primary_research] + Continuing competitive research (step 2/5)... [Reads competitors.md] [Performs primary research on each competitor] ✓ Created primary_research.md and competitor_profiles/ - Next: /competitive_research.secondary_research + Next step: secondary_research [Continue through all steps...] ``` @@ -616,20 +648,22 @@ This section describes how AI agents (like Claude Code) actually execute jobs us ## How Claude Code Executes Commands -When user types `/competitive_research.identify_competitors`: +When user types `/competitive_research identify competitors`: -1. **Command Discovery**: +1. **Meta-Command Discovery**: - Claude Code scans `.claude/commands/` directory - - Finds `competitive_research.identify_competitors.md` - - Loads the command definition - -2. **Context Loading**: - - Command file contains embedded instructions - - References to job definition and step files - - Claude reads these files to understand the full context - -3. **Execution**: - - Claude follows the instructions in the command + - Finds `competitive_research.md` (the meta-command) + - Loads the meta-command definition + +2. **Intent Routing**: + - Meta-command analyzes user text ("identify competitors") + - Matches intent to appropriate step (identify_competitors) + - Invokes hidden step command via Skill tool: `/_competitive_research.identify_competitors` + +3. **Step Execution**: + - Claude Code loads `_competitive_research.identify_competitors.md` + - Step hooks (Stop, PreToolUse, etc.) are active + - Claude follows the instructions in the step command - Uses its tools (Read, Write, WebSearch, WebFetch, etc.) - Creates outputs in the specified format @@ -744,17 +778,18 @@ When all steps are done, remind the user they should: ### Standard Job: `deepwork_jobs` -DeepWork includes a built-in job called `deepwork_jobs` with three commands for managing jobs: +DeepWork includes a built-in job called `deepwork_jobs` with a meta-command and step commands for managing jobs: -1. **`/deepwork_jobs.define`** - Interactive job definition wizard -2. **`/deepwork_jobs.implement`** - Generates step instruction files from job.yml -3. **`/deepwork_jobs.refine`** - Modifies existing job definitions +- **`/deepwork_jobs`** - Meta-command (routes to appropriate step) +- **`/_deepwork_jobs.define`** - Interactive job definition wizard (hidden) +- **`/_deepwork_jobs.implement`** - Generates step instruction files from job.yml (hidden) +- **`/deepwork_jobs.learn`** - Learn from job execution (exposed via `exposed: true`) -These commands are installed automatically when you run `deepwork install`. +These commands are installed automatically when you run `deepwork install`. Users typically interact via the meta-command. -### The `/deepwork_jobs.define` Command +### The `/deepwork_jobs define` Command -When a user runs `/deepwork_jobs.define` in Claude Code: +When a user runs `/deepwork_jobs define a new job` in Claude Code, the meta-command routes to `/_deepwork_jobs.define`: **What Happens**: 1. Claude engages in interactive dialog to gather: @@ -769,25 +804,26 @@ When a user runs `/deepwork_jobs.define` in Claude Code: └── job.yml # Job metadata only ``` -3. User then runs `/deepwork_jobs.implement` to: +3. User then runs `/deepwork_jobs implement` to: - Generate step instruction files (steps/*.md) - Run `deepwork sync` to generate command files - Install commands to `.claude/commands/` 4. The workflow is now: ``` - /deepwork_jobs.define → Creates job.yml - /deepwork_jobs.implement → Creates steps/*.md and syncs commands + /deepwork_jobs define → Routes to /_deepwork_jobs.define → Creates job.yml + /deepwork_jobs implement → Routes to /_deepwork_jobs.implement → Creates steps/*.md and syncs commands ``` -5. The `/deepwork_jobs.define` command contains: +5. The `/_deepwork_jobs.define` step command contains: - The job definition YAML schema - Interactive question flow - Job.yml creation logic **Command File Structure**: -The actual command file `.claude/commands/deepwork_jobs.define.md` contains: +The meta-command `.claude/commands/deepwork_jobs.md` routes intent to steps. +The step command `.claude/commands/_deepwork_jobs.define.md` contains: ```markdown --- @@ -808,14 +844,15 @@ When creating job.yml, use this structure: [YAML schema embedded here...] ``` -### The `/deepwork_jobs.implement` Command +### The `/deepwork_jobs implement` Command Generates step instruction files from job.yml and syncs commands: ``` -User: /deepwork_jobs.implement +User: /deepwork_jobs implement the competitive_research job -Claude: Reading job definition from .deepwork/jobs/competitive_research/job.yml... +Claude: [Meta-command routes to /_deepwork_jobs.implement] + Reading job definition from .deepwork/jobs/competitive_research/job.yml... Generating step instruction files... ✓ Created steps/identify_competitors.md ✓ Created steps/primary_research.md @@ -824,46 +861,33 @@ Claude: Reading job definition from .deepwork/jobs/competitive_research/job.yml. ✓ Created steps/positioning.md Running deepwork sync... - ✓ Generated 5 command files in .claude/commands/ - - New commands available: - - /competitive_research.identify_competitors - - /competitive_research.primary_research - - /competitive_research.secondary_research - - /competitive_research.comparative_report - - /competitive_research.positioning + ✓ Generated meta-command: competitive_research.md + ✓ Generated 5 hidden step commands in .claude/commands/ + + Use /competitive_research to start the workflow ``` -### The `/deepwork_jobs.refine` Command +### The `/deepwork_jobs.learn` Command -Allows updating existing job definitions: +After running a job, use the learn command to improve instructions based on the conversation: ``` -User: /deepwork_jobs.refine +User: /deepwork_jobs.learn -Claude: Which job would you like to refine? - Available jobs: - - competitive_research - - deepwork_jobs +Claude: I'll analyze this conversation to improve the job instructions. -User: competitive_research + Analyzing what went well and what could be improved... -Claude: Loading competitive_research job definition... - What would you like to update? - 1. Add a new step - 2. Modify existing step - 3. Remove a step - 4. Update metadata + Learnings identified: + - The identify_competitors step needed more guidance on research sources + - Users often asked about handling international competitors -User: Add a new step between primary_research and secondary_research - -Claude: [Interactive dialog...] - ✓ Added step 'social_media_analysis' - ✓ Updated dependencies in job.yml - ✓ Updated changelog with version 1.1.0 - ✓ Please run /deepwork_jobs.implement to generate the new step file + ✓ Updated steps/identify_competitors.md with additional guidance + ✓ Created learning_summary.md documenting changes ``` +Note: `/deepwork_jobs.learn` is exposed (visible) because it's a standalone utility that users may want to invoke directly. + ### Template System Templates are Markdown files with variable interpolation: diff --git a/src/deepwork/core/adapters.py b/src/deepwork/core/adapters.py index 9e8e27c..69d20e6 100644 --- a/src/deepwork/core/adapters.py +++ b/src/deepwork/core/adapters.py @@ -56,6 +56,7 @@ class AgentAdapter(ABC): config_dir: ClassVar[str] commands_dir: ClassVar[str] = "commands" command_template: ClassVar[str] = "command-job-step.md.jinja" + meta_command_template: ClassVar[str] = "command-job-meta.md.jinja" # Instructions for reloading commands after sync (shown to users) # Subclasses should override with platform-specific instructions. @@ -153,20 +154,38 @@ def get_commands_dir(self, project_root: Path | None = None) -> Path: raise AdapterError("No project root specified") return root / self.config_dir / self.commands_dir - def get_command_filename(self, job_name: str, step_id: str) -> str: + def get_meta_command_filename(self, job_name: str) -> str: """ - Get the filename for a command. + Get the filename for a job's meta-command. + The meta-command is the primary user interface for a job. + Can be overridden for different file formats. + + Args: + job_name: Name of the job + + Returns: + Meta-command filename (e.g., "job_name.md") + """ + return f"{job_name}.md" + + def get_step_command_filename(self, job_name: str, step_id: str, exposed: bool = False) -> str: + """ + Get the filename for a step command. + + Step commands are hidden by default (underscore prefix) unless exposed=True. Can be overridden for different file formats (e.g., TOML for Gemini). Args: job_name: Name of the job step_id: ID of the step + exposed: If True, command is visible (no underscore prefix). Default: False. Returns: - Command filename (e.g., "job_name.step_id.md") + Command filename (e.g., "_job_name.step_id.md" or "job_name.step_id.md" if exposed) """ - return f"{job_name}.{step_id}.md" + prefix = "" if exposed else "_" + return f"{prefix}{job_name}.{step_id}.md" def detect(self, project_root: Path | None = None) -> bool: """ @@ -346,6 +365,7 @@ class GeminiAdapter(AgentAdapter): display_name = "Gemini CLI" config_dir = ".gemini" command_template = "command-job-step.toml.jinja" + meta_command_template = "command-job-meta.toml.jinja" # Gemini CLI can reload with /memory refresh reload_instructions: ClassVar[str] = ( @@ -356,21 +376,39 @@ class GeminiAdapter(AgentAdapter): # Hooks are global/project-level in settings.json, not per-command hook_name_mapping: ClassVar[dict[CommandLifecycleHook, str]] = {} - def get_command_filename(self, job_name: str, step_id: str) -> str: + def get_meta_command_filename(self, job_name: str) -> str: + """ + Get the filename for a Gemini job's meta-command. + + Gemini uses TOML files and colon namespacing via subdirectories. + For job "my_job", creates: my_job/index.toml + + Args: + job_name: Name of the job + + Returns: + Meta-command filename path (e.g., "my_job/index.toml") + """ + return f"{job_name}/index.toml" + + def get_step_command_filename(self, job_name: str, step_id: str, exposed: bool = False) -> str: """ - Get the filename for a Gemini command. + Get the filename for a Gemini step command. Gemini uses TOML files and colon namespacing via subdirectories. - For job "my_job" and step "step_one", creates: my_job/step_one.toml + Step commands are hidden by default (underscore prefix) unless exposed=True. + For job "my_job" and step "step_one", creates: my_job/_step_one.toml Args: job_name: Name of the job step_id: ID of the step + exposed: If True, command is visible (no underscore prefix). Default: False. Returns: - Command filename path (e.g., "my_job/step_one.toml") + Command filename path (e.g., "my_job/_step_one.toml" or "my_job/step_one.toml" if exposed) """ - return f"{job_name}/{step_id}.toml" + prefix = "" if exposed else "_" + return f"{job_name}/{prefix}{step_id}.toml" def sync_hooks(self, project_path: Path, hooks: dict[str, list[dict[str, Any]]]) -> int: """ diff --git a/src/deepwork/core/generator.py b/src/deepwork/core/generator.py index 380ab5b..235897b 100644 --- a/src/deepwork/core/generator.py +++ b/src/deepwork/core/generator.py @@ -201,6 +201,110 @@ def _build_step_context( "stop_hooks": stop_hooks, # Backward compat: after_agent hooks only } + def _build_meta_command_context( + self, job: JobDefinition, adapter: AgentAdapter + ) -> dict[str, Any]: + """ + Build template context for a job's meta-command. + + Args: + job: Job definition + adapter: Agent adapter for platform-specific configuration + + Returns: + Template context dictionary + """ + # Build step info for the meta-command + steps_info = [] + for step in job.steps: + command_filename = adapter.get_step_command_filename(job.name, step.id, step.exposed) + # Extract just the command name (without path and extension) + # For Claude: _job_name.step_id.md -> _job_name.step_id + # For Gemini: job_name/_step_id.toml -> job_name:_step_id + if adapter.name == "gemini": + # Gemini uses colon for namespacing: job_name:step_id or job_name:_step_id + parts = command_filename.replace(".toml", "").split("/") + command_name = ":".join(parts) + else: + # Claude uses dot for namespacing: _job_name.step_id + command_name = command_filename.replace(".md", "") + + steps_info.append({ + "id": step.id, + "name": step.name, + "description": step.description, + "command_name": command_name, + "dependencies": step.dependencies, + "exposed": step.exposed, + }) + + return { + "job_name": job.name, + "job_version": job.version, + "job_summary": job.summary, + "job_description": job.description, + "total_steps": len(job.steps), + "steps": steps_info, + } + + def generate_meta_command( + self, + job: JobDefinition, + adapter: AgentAdapter, + output_dir: Path | str, + ) -> Path: + """ + Generate the meta-command file for a job. + + The meta-command is the primary user interface for a job, routing + user intent to the appropriate step. + + Args: + job: Job definition + adapter: Agent adapter for the target platform + output_dir: Directory to write command file to + + Returns: + Path to generated meta-command file + + Raises: + GeneratorError: If generation fails + """ + output_dir = Path(output_dir) + + # Create commands subdirectory if needed + commands_dir = output_dir / adapter.commands_dir + commands_dir.mkdir(parents=True, exist_ok=True) + + # Build context + context = self._build_meta_command_context(job, adapter) + + # Load and render template + env = self._get_jinja_env(adapter) + try: + template = env.get_template(adapter.meta_command_template) + except TemplateNotFound as e: + raise GeneratorError(f"Meta-command template not found: {e}") from e + + try: + rendered = template.render(**context) + except Exception as e: + raise GeneratorError(f"Meta-command template rendering failed: {e}") from e + + # Write meta-command file + command_filename = adapter.get_meta_command_filename(job.name) + command_path = commands_dir / command_filename + + # Ensure parent directories exist (for Gemini's job_name/index.toml structure) + command_path.parent.mkdir(parents=True, exist_ok=True) + + try: + safe_write(command_path, rendered) + except Exception as e: + raise GeneratorError(f"Failed to write meta-command file: {e}") from e + + return command_path + def generate_step_command( self, job: JobDefinition, @@ -250,8 +354,8 @@ def generate_step_command( except Exception as e: raise GeneratorError(f"Template rendering failed: {e}") from e - # Write command file - command_filename = adapter.get_command_filename(job.name, step.id) + # Write command file (hidden by default unless step.exposed is True) + command_filename = adapter.get_step_command_filename(job.name, step.id, step.exposed) command_path = commands_dir / command_filename try: @@ -268,7 +372,7 @@ def generate_all_commands( output_dir: Path | str, ) -> list[Path]: """ - Generate slash-command files for all steps in a job. + Generate all command files for a job: meta-command and step commands. Args: job: Job definition @@ -276,13 +380,18 @@ def generate_all_commands( output_dir: Directory to write command files to Returns: - List of paths to generated command files + List of paths to generated command files (meta-command first, then steps) Raises: GeneratorError: If generation fails """ command_paths = [] + # Generate meta-command first (job-level entry point) + meta_command_path = self.generate_meta_command(job, adapter, output_dir) + command_paths.append(meta_command_path) + + # Generate step commands (hidden by default unless step.exposed is True) for step in job.steps: command_path = self.generate_step_command(job, step, adapter, output_dir) command_paths.append(command_path) diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py index 42fba81..e41c9c3 100644 --- a/src/deepwork/core/parser.py +++ b/src/deepwork/core/parser.py @@ -108,6 +108,9 @@ class Step: # Event names: after_agent, before_tool, before_prompt hooks: dict[str, list[HookAction]] = field(default_factory=dict) + # If true, step command is visible (no underscore prefix). Default: false (hidden). + exposed: bool = False + @property def stop_hooks(self) -> list[HookAction]: """ @@ -144,6 +147,7 @@ def from_dict(cls, data: dict[str, Any]) -> "Step": outputs=data["outputs"], dependencies=data.get("dependencies", []), hooks=hooks, + exposed=data.get("exposed", False), ) diff --git a/src/deepwork/schemas/job_schema.py b/src/deepwork/schemas/job_schema.py index 4127abb..0d229b6 100644 --- a/src/deepwork/schemas/job_schema.py +++ b/src/deepwork/schemas/job_schema.py @@ -203,6 +203,11 @@ "description": "DEPRECATED: Use hooks.after_agent instead. Stop hooks for quality validation loops.", "items": HOOK_ACTION_SCHEMA, }, + "exposed": { + "type": "boolean", + "description": "If true, step command is visible (no underscore prefix). Default: false (hidden).", + "default": False, + }, }, "additionalProperties": False, }, diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml index e95aa2c..c5c5a13 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml +++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml @@ -88,6 +88,7 @@ steps: name: "Learn from Job Execution" description: "Reflect on conversation to improve job instructions and capture learnings" instructions_file: steps/learn.md + exposed: true inputs: - name: job_name description: "Name of the job that was run (optional - will auto-detect from conversation)" diff --git a/src/deepwork/templates/claude/command-job-meta.md.jinja b/src/deepwork/templates/claude/command-job-meta.md.jinja new file mode 100644 index 0000000..8faf7ed --- /dev/null +++ b/src/deepwork/templates/claude/command-job-meta.md.jinja @@ -0,0 +1,50 @@ +--- +description: {{ job_summary }} +--- + +# {{ job_name }} + +You are executing the **{{ job_name }}** job. {{ job_summary }} + +{% if job_description %} +{{ job_description }} +{% endif %} + +## Available Steps + +This job has {{ total_steps }} step(s): + +{% for step in steps %} +### {{ step.id }} +**{{ step.name }}**: {{ step.description }} +- Command: `{{ step.command_name }}` +{% if step.dependencies %} +- Requires: {{ step.dependencies | join(', ') }} +{% endif %} +{% endfor %} + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/{{ job_name }}` + +2. **Match intent to a step**: +{% for step in steps %} + - {{ step.id }}: {{ step.description }} +{% endfor %} + +3. **Invoke the matched step** using the Skill tool: + ``` + Skill: + ``` + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Use AskUserQuestion to let them choose + +**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. + +## Context Files + +- Job definition: `.deepwork/jobs/{{ job_name }}/job.yml` diff --git a/src/deepwork/templates/gemini/command-job-meta.toml.jinja b/src/deepwork/templates/gemini/command-job-meta.toml.jinja new file mode 100644 index 0000000..24c09ef --- /dev/null +++ b/src/deepwork/templates/gemini/command-job-meta.toml.jinja @@ -0,0 +1,56 @@ +# {{ job_name }} +# +# {{ job_summary }} +# +# Generated by DeepWork - do not edit manually + +description = "{{ job_summary | replace('"', '\\"') }}" + +prompt = """ +# {{ job_name }} + +You are executing the **{{ job_name }}** job. {{ job_summary }} + +{% if job_description %} +{{ job_description }} +{% endif %} + +## Available Steps + +This job has {{ total_steps }} step(s): + +{% for step in steps %} +### {{ step.id }} +**{{ step.name }}**: {{ step.description }} +- Command: `/{{ step.command_name }}` +{% if step.dependencies %} +- Requires: {{ step.dependencies | join(', ') }} +{% endif %} +{% endfor %} + +## Instructions + +Determine what the user wants to do and route to the appropriate step. + +1. **Analyze user intent** from the text that follows `/{{ job_name }}` + +2. **Match intent to a step**: +{% for step in steps %} + - {{ step.id }}: {{ step.description }} +{% endfor %} + +3. **Invoke the matched step**: Tell the user to run the appropriate command: +{% for step in steps %} + - For {{ step.id }}: `/{{ step.command_name }}` +{% endfor %} + +4. **If intent is ambiguous**, ask the user which step they want: + - Present the available steps as numbered options + - Let them choose + +**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. + +## Context Files + +- Job definition: `.deepwork/jobs/{{ job_name }}/job.yml` +""" diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py index 0f75fac..c319663 100644 --- a/tests/e2e/test_claude_code_integration.py +++ b/tests/e2e/test_claude_code_integration.py @@ -94,12 +94,14 @@ def test_generate_fruits_commands_in_temp_project(self) -> None: command_paths = generator.generate_all_commands(job, adapter, commands_dir) - # Validate commands were generated - assert len(command_paths) == 2 + # Validate commands were generated (1 meta + 2 steps) + assert len(command_paths) == 3 - identify_cmd = commands_dir / "commands" / "fruits.identify.md" - classify_cmd = commands_dir / "commands" / "fruits.classify.md" + meta_cmd = commands_dir / "commands" / "fruits.md" + identify_cmd = commands_dir / "commands" / "_fruits.identify.md" + classify_cmd = commands_dir / "commands" / "_fruits.classify.md" + assert meta_cmd.exists() assert identify_cmd.exists() assert classify_cmd.exists() @@ -127,7 +129,8 @@ def test_command_structure_matches_claude_code_expectations(self) -> None: adapter = ClaudeAdapter() generator.generate_all_commands(job, adapter, commands_dir) - identify_cmd = commands_dir / "commands" / "fruits.identify.md" + # Step commands are now hidden (underscore prefix) + identify_cmd = commands_dir / "commands" / "_fruits.identify.md" content = identify_cmd.read_text() # Claude Code expects specific sections @@ -152,13 +155,14 @@ def test_dependency_chain_in_commands(self) -> None: adapter = ClaudeAdapter() generator.generate_all_commands(job, adapter, commands_dir) + # Step commands are now hidden (underscore prefix) # First step should have no prerequisites - identify_cmd = commands_dir / "commands" / "fruits.identify.md" + identify_cmd = commands_dir / "commands" / "_fruits.identify.md" identify_content = identify_cmd.read_text() assert "## Prerequisites" not in identify_content # Second step should reference first step - classify_cmd = commands_dir / "commands" / "fruits.classify.md" + classify_cmd = commands_dir / "commands" / "_fruits.classify.md" classify_content = classify_cmd.read_text() assert "## Prerequisites" in classify_content assert "identify" in classify_content.lower() diff --git a/tests/fixtures/jobs/exposed_step_job/job.yml b/tests/fixtures/jobs/exposed_step_job/job.yml new file mode 100644 index 0000000..d480dae --- /dev/null +++ b/tests/fixtures/jobs/exposed_step_job/job.yml @@ -0,0 +1,29 @@ +name: exposed_job +version: "0.1.0" +summary: "A job with exposed and hidden steps for testing" +description: | + A test job demonstrating the exposed step feature. + + This job has two steps: one hidden by default and one explicitly exposed. + +changelog: + - version: "0.1.0" + changes: "Initial version" + +steps: + - id: hidden_step + name: "Hidden Step" + description: "A step that is hidden by default" + instructions_file: steps/hidden_step.md + outputs: + - hidden_output.md + dependencies: [] + + - id: exposed_step + name: "Exposed Step" + description: "A step that is explicitly exposed" + instructions_file: steps/exposed_step.md + exposed: true + outputs: + - exposed_output.md + dependencies: [] diff --git a/tests/fixtures/jobs/exposed_step_job/steps/exposed_step.md b/tests/fixtures/jobs/exposed_step_job/steps/exposed_step.md new file mode 100644 index 0000000..309cba5 --- /dev/null +++ b/tests/fixtures/jobs/exposed_step_job/steps/exposed_step.md @@ -0,0 +1,7 @@ +# Exposed Step Instructions + +This step is explicitly exposed (visible command). + +## Task + +Perform a task that can be run directly by users. diff --git a/tests/fixtures/jobs/exposed_step_job/steps/hidden_step.md b/tests/fixtures/jobs/exposed_step_job/steps/hidden_step.md new file mode 100644 index 0000000..01b3434 --- /dev/null +++ b/tests/fixtures/jobs/exposed_step_job/steps/hidden_step.md @@ -0,0 +1,7 @@ +# Hidden Step Instructions + +This step is hidden by default (no underscore prefix). + +## Task + +Perform a task that produces output. diff --git a/tests/integration/test_fruits_workflow.py b/tests/integration/test_fruits_workflow.py index 168c94d..efd449d 100644 --- a/tests/integration/test_fruits_workflow.py +++ b/tests/integration/test_fruits_workflow.py @@ -80,11 +80,14 @@ def test_fruits_command_generation(self, fixtures_dir: Path, temp_dir: Path) -> command_paths = generator.generate_all_commands(job, adapter, commands_dir) - assert len(command_paths) == 2 + # Now includes meta-command + step commands + assert len(command_paths) == 3 # 1 meta + 2 steps # Verify command files exist - identify_cmd = commands_dir / "commands" / "fruits.identify.md" - classify_cmd = commands_dir / "commands" / "fruits.classify.md" + meta_cmd = commands_dir / "commands" / "fruits.md" + identify_cmd = commands_dir / "commands" / "_fruits.identify.md" + classify_cmd = commands_dir / "commands" / "_fruits.classify.md" + assert meta_cmd.exists() assert identify_cmd.exists() assert classify_cmd.exists() @@ -100,7 +103,8 @@ def test_fruits_identify_command_content(self, fixtures_dir: Path, temp_dir: Pat generator.generate_all_commands(job, adapter, commands_dir) - identify_cmd = commands_dir / "commands" / "fruits.identify.md" + # Step commands are now hidden (underscore prefix) + identify_cmd = commands_dir / "commands" / "_fruits.identify.md" content = identify_cmd.read_text() # Check header @@ -130,7 +134,8 @@ def test_fruits_classify_command_content(self, fixtures_dir: Path, temp_dir: Pat generator.generate_all_commands(job, adapter, commands_dir) - classify_cmd = commands_dir / "commands" / "fruits.classify.md" + # Step commands are now hidden (underscore prefix) + classify_cmd = commands_dir / "commands" / "_fruits.classify.md" content = classify_cmd.read_text() # Check header diff --git a/tests/integration/test_full_workflow.py b/tests/integration/test_full_workflow.py index 270c91c..7d05634 100644 --- a/tests/integration/test_full_workflow.py +++ b/tests/integration/test_full_workflow.py @@ -27,10 +27,17 @@ def test_parse_and_generate_workflow(self, fixtures_dir: Path, temp_dir: Path) - command_paths = generator.generate_all_commands(job, adapter, commands_dir) - assert len(command_paths) == 4 + # Now includes meta-command + step commands + assert len(command_paths) == 5 # 1 meta + 4 steps - # Verify all command files exist and have correct content - for i, command_path in enumerate(command_paths): + # First command is the meta-command + assert command_paths[0].exists() + meta_content = command_paths[0].read_text() + assert f"# {job.name}" in meta_content + assert "Available Steps" in meta_content + + # Verify all step command files exist and have correct content + for i, command_path in enumerate(command_paths[1:]): # Skip meta-command assert command_path.exists() content = command_path.read_text() @@ -56,10 +63,11 @@ def test_simple_job_workflow(self, fixtures_dir: Path, temp_dir: Path) -> None: command_paths = generator.generate_all_commands(job, adapter, commands_dir) - assert len(command_paths) == 1 + # Now includes meta-command + step commands + assert len(command_paths) == 2 # 1 meta + 1 step - # Verify command content - content = command_paths[0].read_text() + # Verify step command content (skip meta-command at index 0) + content = command_paths[1].read_text() assert "# simple_job.single_step" in content # Single step with no dependencies is treated as standalone assert "Standalone command" in content @@ -78,19 +86,21 @@ def test_command_generation_with_dependencies(self, fixtures_dir: Path, temp_dir command_paths = generator.generate_all_commands(job, adapter, commands_dir) + # command_paths[0] is meta-command, steps start at index 1 + # Check first step (no prerequisites) - step1_content = command_paths[0].read_text() + step1_content = command_paths[1].read_text() assert "## Prerequisites" not in step1_content assert "/competitive_research.primary_research" in step1_content # Next step # Check second step (has prerequisites and next step) - step2_content = command_paths[1].read_text() + step2_content = command_paths[2].read_text() assert "## Prerequisites" in step2_content assert "/competitive_research.identify_competitors" in step2_content assert "/competitive_research.secondary_research" in step2_content # Next step # Check last step (has prerequisites, no next step) - step4_content = command_paths[3].read_text() + step4_content = command_paths[4].read_text() assert "## Prerequisites" in step4_content assert "## Workflow Complete" in step4_content assert "## Next Step" not in step4_content @@ -107,15 +117,17 @@ def test_command_generation_with_file_inputs(self, fixtures_dir: Path, temp_dir: command_paths = generator.generate_all_commands(job, adapter, commands_dir) + # command_paths[0] is meta-command, steps start at index 1 + # Check step with file input - step2_content = command_paths[1].read_text() # primary_research + step2_content = command_paths[2].read_text() # primary_research (index 2) assert "## Inputs" in step2_content assert "### Required Files" in step2_content assert "competitors.md" in step2_content assert "from step `identify_competitors`" in step2_content # Check step with multiple file inputs - step4_content = command_paths[3].read_text() # comparative_report + step4_content = command_paths[4].read_text() # comparative_report (index 4) assert "primary_research.md" in step4_content assert "secondary_research.md" in step4_content @@ -131,8 +143,10 @@ def test_command_generation_with_user_inputs(self, fixtures_dir: Path, temp_dir: command_paths = generator.generate_all_commands(job, adapter, commands_dir) + # command_paths[0] is meta-command, steps start at index 1 + # Check step with user inputs - step1_content = command_paths[0].read_text() # identify_competitors + step1_content = command_paths[1].read_text() # identify_competitors (index 1) assert "## Inputs" in step1_content assert "### User Parameters" in step1_content assert "market_segment" in step1_content diff --git a/tests/integration/test_install_flow.py b/tests/integration/test_install_flow.py index 23037f6..b2a3555 100644 --- a/tests/integration/test_install_flow.py +++ b/tests/integration/test_install_flow.py @@ -41,11 +41,20 @@ def test_install_with_claude(self, mock_claude_project: Path) -> None: # Verify core commands were created claude_dir = mock_claude_project / ".claude" / "commands" - assert (claude_dir / "deepwork_jobs.define.md").exists() + # Meta-command + assert (claude_dir / "deepwork_jobs.md").exists() + # Hidden step command (underscore prefix) + assert (claude_dir / "_deepwork_jobs.define.md").exists() + # Exposed step command (no underscore - learn has exposed: true) assert (claude_dir / "deepwork_jobs.learn.md").exists() - # Verify command content - define_command = (claude_dir / "deepwork_jobs.define.md").read_text() + # Verify meta-command content + meta_command = (claude_dir / "deepwork_jobs.md").read_text() + assert "# deepwork_jobs" in meta_command + assert "Available Steps" in meta_command + + # Verify hidden step command content + define_command = (claude_dir / "_deepwork_jobs.define.md").read_text() assert "# deepwork_jobs.define" in define_command assert "Define Job Specification" in define_command @@ -106,11 +115,15 @@ def test_install_with_multiple_platforms_auto_detect( # Verify commands were created for both platforms claude_dir = mock_multi_platform_project / ".claude" / "commands" - assert (claude_dir / "deepwork_jobs.define.md").exists() + # Meta-command and hidden step commands + assert (claude_dir / "deepwork_jobs.md").exists() + assert (claude_dir / "_deepwork_jobs.define.md").exists() # Gemini uses job_name/step_id.toml structure gemini_dir = mock_multi_platform_project / ".gemini" / "commands" - assert (gemini_dir / "deepwork_jobs" / "define.toml").exists() + # Meta-command (index.toml) and hidden step commands + assert (gemini_dir / "deepwork_jobs" / "index.toml").exists() + assert (gemini_dir / "deepwork_jobs" / "_define.toml").exists() def test_install_with_specified_platform_when_missing(self, mock_git_repo: Path) -> None: """Test that install fails when specified platform is not present.""" @@ -149,7 +162,9 @@ def test_install_is_idempotent(self, mock_claude_project: Path) -> None: assert (deepwork_dir / "config.yml").exists() claude_dir = mock_claude_project / ".claude" / "commands" - assert (claude_dir / "deepwork_jobs.define.md").exists() + # Meta-command and step commands + assert (claude_dir / "deepwork_jobs.md").exists() + assert (claude_dir / "_deepwork_jobs.define.md").exists() assert (claude_dir / "deepwork_jobs.learn.md").exists() def test_install_creates_rules_directory(self, mock_claude_project: Path) -> None: diff --git a/tests/unit/test_adapters.py b/tests/unit/test_adapters.py index a1d06f3..b0675e9 100644 --- a/tests/unit/test_adapters.py +++ b/tests/unit/test_adapters.py @@ -119,11 +119,27 @@ def test_get_commands_dir_raises_without_root(self) -> None: with pytest.raises(AdapterError, match="No project root specified"): adapter.get_commands_dir() - def test_get_command_filename(self) -> None: - """Test get_command_filename.""" + def test_get_meta_command_filename(self) -> None: + """Test get_meta_command_filename.""" adapter = ClaudeAdapter() - result = adapter.get_command_filename("my_job", "step_one") + result = adapter.get_meta_command_filename("my_job") + + assert result == "my_job.md" + + def test_get_step_command_filename_hidden_by_default(self) -> None: + """Test get_step_command_filename returns hidden filename by default.""" + adapter = ClaudeAdapter() + + result = adapter.get_step_command_filename("my_job", "step_one") + + assert result == "_my_job.step_one.md" + + def test_get_step_command_filename_exposed(self) -> None: + """Test get_step_command_filename returns visible filename when exposed.""" + adapter = ClaudeAdapter() + + result = adapter.get_step_command_filename("my_job", "step_one", exposed=True) assert result == "my_job.step_one.md" @@ -246,22 +262,41 @@ def test_get_commands_dir_raises_without_root(self) -> None: with pytest.raises(AdapterError, match="No project root specified"): adapter.get_commands_dir() - def test_get_command_filename(self) -> None: - """Test get_command_filename returns TOML with subdirectory.""" + def test_get_meta_command_filename(self) -> None: + """Test get_meta_command_filename returns index.toml in subdirectory.""" adapter = GeminiAdapter() - result = adapter.get_command_filename("my_job", "step_one") + result = adapter.get_meta_command_filename("my_job") + + # Gemini uses subdirectories with index.toml for meta-commands + assert result == "my_job/index.toml" + + def test_get_step_command_filename_hidden_by_default(self) -> None: + """Test get_step_command_filename returns hidden TOML with subdirectory.""" + adapter = GeminiAdapter() + + result = adapter.get_step_command_filename("my_job", "step_one") # Gemini uses subdirectories for namespacing (colon becomes path) + # Hidden steps have underscore prefix + assert result == "my_job/_step_one.toml" + + def test_get_step_command_filename_exposed(self) -> None: + """Test get_step_command_filename returns visible TOML when exposed.""" + adapter = GeminiAdapter() + + result = adapter.get_step_command_filename("my_job", "step_one", exposed=True) + + # Exposed steps have no underscore prefix assert result == "my_job/step_one.toml" - def test_get_command_filename_with_underscores(self) -> None: - """Test get_command_filename with underscores in names.""" + def test_get_step_command_filename_with_underscores(self) -> None: + """Test get_step_command_filename with underscores in names.""" adapter = GeminiAdapter() - result = adapter.get_command_filename("competitive_research", "identify_competitors") + result = adapter.get_step_command_filename("competitive_research", "identify_competitors") - assert result == "competitive_research/identify_competitors.toml" + assert result == "competitive_research/_identify_competitors.toml" def test_hook_name_mapping_is_empty(self) -> None: """Test that Gemini has no command-level hooks.""" diff --git a/tests/unit/test_generator.py b/tests/unit/test_generator.py index f83d20a..f7db7d4 100644 --- a/tests/unit/test_generator.py +++ b/tests/unit/test_generator.py @@ -36,7 +36,7 @@ def test_init_raises_for_missing_templates_dir(self, temp_dir: Path) -> None: CommandGenerator(nonexistent) def test_generate_step_command_simple_job(self, fixtures_dir: Path, temp_dir: Path) -> None: - """Test generating command for simple job step.""" + """Test generating command for simple job step (hidden by default).""" job_dir = fixtures_dir / "jobs" / "simple_job" job = parse_job_definition(job_dir) @@ -46,7 +46,8 @@ def test_generate_step_command_simple_job(self, fixtures_dir: Path, temp_dir: Pa command_path = generator.generate_step_command(job, job.steps[0], adapter, temp_dir) assert command_path.exists() - assert command_path.name == "simple_job.single_step.md" + # Step commands are hidden by default (underscore prefix) + assert command_path.name == "_simple_job.single_step.md" content = command_path.read_text() assert "# simple_job.single_step" in content @@ -176,7 +177,7 @@ def test_generate_step_command_raises_for_missing_instructions( instructions_file.write_text(original_content) def test_generate_all_commands(self, fixtures_dir: Path, temp_dir: Path) -> None: - """Test generating commands for all steps in a job.""" + """Test generating commands for all steps in a job (meta + hidden steps).""" job_dir = fixtures_dir / "jobs" / "complex_job" job = parse_job_definition(job_dir) @@ -185,15 +186,78 @@ def test_generate_all_commands(self, fixtures_dir: Path, temp_dir: Path) -> None command_paths = generator.generate_all_commands(job, adapter, temp_dir) - assert len(command_paths) == 4 + # Now includes meta-command plus step commands + assert len(command_paths) == 5 # 1 meta + 4 steps assert all(p.exists() for p in command_paths) - # Check filenames + # Check filenames - meta-command first, then hidden step commands expected_names = [ - "competitive_research.identify_competitors.md", - "competitive_research.primary_research.md", - "competitive_research.secondary_research.md", - "competitive_research.comparative_report.md", + "competitive_research.md", # Meta-command + "_competitive_research.identify_competitors.md", # Hidden steps + "_competitive_research.primary_research.md", + "_competitive_research.secondary_research.md", + "_competitive_research.comparative_report.md", + ] + actual_names = [p.name for p in command_paths] + assert actual_names == expected_names + + def test_generate_meta_command(self, fixtures_dir: Path, temp_dir: Path) -> None: + """Test generating meta-command for a job.""" + job_dir = fixtures_dir / "jobs" / "complex_job" + job = parse_job_definition(job_dir) + + generator = CommandGenerator() + adapter = ClaudeAdapter() + + meta_command_path = generator.generate_meta_command(job, adapter, temp_dir) + + assert meta_command_path.exists() + assert meta_command_path.name == "competitive_research.md" + + content = meta_command_path.read_text() + # Check meta-command content + assert "# competitive_research" in content + assert "Available Steps" in content + assert "identify_competitors" in content + assert "primary_research" in content + assert "Skill tool" in content + + def test_generate_step_command_exposed_step(self, fixtures_dir: Path, temp_dir: Path) -> None: + """Test generating command for exposed step (no underscore prefix).""" + job_dir = fixtures_dir / "jobs" / "exposed_step_job" + job = parse_job_definition(job_dir) + + generator = CommandGenerator() + adapter = ClaudeAdapter() + + # Generate the exposed step (index 1) + command_path = generator.generate_step_command(job, job.steps[1], adapter, temp_dir) + + assert command_path.exists() + # Exposed step should NOT have underscore prefix + assert command_path.name == "exposed_job.exposed_step.md" + + def test_generate_all_commands_with_exposed_steps( + self, fixtures_dir: Path, temp_dir: Path + ) -> None: + """Test generating all commands with mix of hidden and exposed steps.""" + job_dir = fixtures_dir / "jobs" / "exposed_step_job" + job = parse_job_definition(job_dir) + + generator = CommandGenerator() + adapter = ClaudeAdapter() + + command_paths = generator.generate_all_commands(job, adapter, temp_dir) + + # Meta-command + 2 steps (1 hidden, 1 exposed) + assert len(command_paths) == 3 + assert all(p.exists() for p in command_paths) + + # Check filenames - hidden step has underscore, exposed doesn't + expected_names = [ + "exposed_job.md", # Meta-command + "_exposed_job.hidden_step.md", # Hidden step + "exposed_job.exposed_step.md", # Exposed step (no underscore) ] actual_names = [p.name for p in command_paths] assert actual_names == expected_names diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index 11e0666..cfdba8f 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -86,6 +86,33 @@ def test_from_dict_with_inputs(self) -> None: assert step.inputs[1].is_file_input() assert step.dependencies == ["step0"] + def test_from_dict_exposed_default_false(self) -> None: + """Test that exposed defaults to False.""" + data = { + "id": "step1", + "name": "Step 1", + "description": "First step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + } + step = Step.from_dict(data) + + assert step.exposed is False + + def test_from_dict_exposed_true(self) -> None: + """Test creating step with exposed=True.""" + data = { + "id": "step1", + "name": "Step 1", + "description": "First step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "exposed": True, + } + step = Step.from_dict(data) + + assert step.exposed is True + class TestJobDefinition: """Tests for JobDefinition dataclass.""" @@ -284,6 +311,19 @@ def test_parses_file_inputs(self, fixtures_dir: Path) -> None: assert step.inputs[0].file == "competitors.md" assert step.inputs[0].from_step == "identify_competitors" + def test_parses_exposed_steps(self, fixtures_dir: Path) -> None: + """Test parsing job with exposed and hidden steps.""" + job_dir = fixtures_dir / "jobs" / "exposed_step_job" + job = parse_job_definition(job_dir) + + assert len(job.steps) == 2 + # First step is hidden by default + assert job.steps[0].id == "hidden_step" + assert job.steps[0].exposed is False + # Second step is explicitly exposed + assert job.steps[1].id == "exposed_step" + assert job.steps[1].exposed is True + def test_raises_for_missing_directory(self, temp_dir: Path) -> None: """Test parsing fails for missing directory.""" nonexistent = temp_dir / "nonexistent" From e4cf2b1cd8f292f2e32744a7e440ebbef60e3662 Mon Sep 17 00:00:00 2001 From: Noah Horton Date: Fri, 16 Jan 2026 11:58:33 -0700 Subject: [PATCH 2/6] Apply ruff formatting --- src/deepwork/core/generator.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/deepwork/core/generator.py b/src/deepwork/core/generator.py index 235897b..c18317e 100644 --- a/src/deepwork/core/generator.py +++ b/src/deepwork/core/generator.py @@ -229,14 +229,16 @@ def _build_meta_command_context( # Claude uses dot for namespacing: _job_name.step_id command_name = command_filename.replace(".md", "") - steps_info.append({ - "id": step.id, - "name": step.name, - "description": step.description, - "command_name": command_name, - "dependencies": step.dependencies, - "exposed": step.exposed, - }) + steps_info.append( + { + "id": step.id, + "name": step.name, + "description": step.description, + "command_name": command_name, + "dependencies": step.dependencies, + "exposed": step.exposed, + } + ) return { "job_name": job.name, From 8e9d05ddf1ca997867eba1e3560160858be58614 Mon Sep 17 00:00:00 2001 From: Noah Horton Date: Fri, 16 Jan 2026 12:27:46 -0700 Subject: [PATCH 3/6] Use uw. prefix for hidden step commands instead of tilde The tilde (~) prefix doesn't sort commands to the end as intended. Using "uw." (underwork) prefix ensures hidden step commands sort after user-facing meta-commands alphabetically. - Rename all hidden step command files from _ to uw. prefix - Update adapters.py to generate uw. prefix for hidden steps - Update all references in parent command files - Update architecture.md documentation - Update all test assertions Co-Authored-By: Claude Opus 4.5 --- .claude/commands/add_platform.md | 8 +++---- .claude/commands/deepwork_jobs.md | 4 ++-- .claude/commands/deepwork_policy.md | 2 +- .claude/commands/update.md | 2 +- ...md => uw.add_platform.add_capabilities.md} | 0 ...lement.md => uw.add_platform.implement.md} | 0 ...esearch.md => uw.add_platform.research.md} | 0 ...rm.verify.md => uw.add_platform.verify.md} | 0 ...s.define.md => uw.deepwork_jobs.define.md} | 0 ...ement.md => uw.deepwork_jobs.implement.md} | 0 .../{_update.job.md => uw.update.job.md} | 0 .gemini/commands/add_platform/index.toml | 16 +++++++------- ...bilities.toml => uw.add_capabilities.toml} | 0 .../{_implement.toml => uw.implement.toml} | 0 .../{_research.toml => uw.research.toml} | 0 .../{_verify.toml => uw.verify.toml} | 0 .gemini/commands/deepwork_jobs/index.toml | 8 +++---- .../{_define.toml => uw.define.toml} | 0 .../{_implement.toml => uw.implement.toml} | 0 .gemini/commands/deepwork_policy/index.toml | 4 ++-- .gemini/commands/update/index.toml | 4 ++-- .../update/{_job.toml => uw.job.toml} | 0 doc/architecture.md | 22 +++++++++---------- src/deepwork/core/adapters.py | 18 +++++++-------- src/deepwork/core/generator.py | 8 +++---- src/deepwork/core/parser.py | 2 +- src/deepwork/schemas/job_schema.py | 2 +- tests/e2e/test_claude_code_integration.py | 14 ++++++------ .../exposed_step_job/steps/hidden_step.md | 2 +- tests/integration/test_fruits_workflow.py | 12 +++++----- tests/integration/test_install_flow.py | 14 ++++++------ tests/unit/test_adapters.py | 10 ++++----- tests/unit/test_generator.py | 22 +++++++++---------- 33 files changed, 87 insertions(+), 87 deletions(-) rename .claude/commands/{_add_platform.add_capabilities.md => uw.add_platform.add_capabilities.md} (100%) rename .claude/commands/{_add_platform.implement.md => uw.add_platform.implement.md} (100%) rename .claude/commands/{_add_platform.research.md => uw.add_platform.research.md} (100%) rename .claude/commands/{_add_platform.verify.md => uw.add_platform.verify.md} (100%) rename .claude/commands/{_deepwork_jobs.define.md => uw.deepwork_jobs.define.md} (100%) rename .claude/commands/{_deepwork_jobs.implement.md => uw.deepwork_jobs.implement.md} (100%) rename .claude/commands/{_update.job.md => uw.update.job.md} (100%) rename .gemini/commands/add_platform/{_add_capabilities.toml => uw.add_capabilities.toml} (100%) rename .gemini/commands/add_platform/{_implement.toml => uw.implement.toml} (100%) rename .gemini/commands/add_platform/{_research.toml => uw.research.toml} (100%) rename .gemini/commands/add_platform/{_verify.toml => uw.verify.toml} (100%) rename .gemini/commands/deepwork_jobs/{_define.toml => uw.define.toml} (100%) rename .gemini/commands/deepwork_jobs/{_implement.toml => uw.implement.toml} (100%) rename .gemini/commands/update/{_job.toml => uw.job.toml} (100%) diff --git a/.claude/commands/add_platform.md b/.claude/commands/add_platform.md index 42cd87b..5364068 100644 --- a/.claude/commands/add_platform.md +++ b/.claude/commands/add_platform.md @@ -30,18 +30,18 @@ This job has 4 step(s): ### research **Research Platform Documentation**: Capture CLI configuration and hooks system documentation for the new platform -- Command: `_add_platform.research` +- Command: `uw.add_platform.research` ### add_capabilities **Add Hook Capabilities**: Update job schema and adapters with any new hook events the platform supports -- Command: `_add_platform.add_capabilities` +- Command: `uw.add_platform.add_capabilities` - Requires: research ### implement **Implement Platform Support**: Add platform adapter, templates, tests with 100% coverage, and README documentation -- Command: `_add_platform.implement` +- Command: `uw.add_platform.implement` - Requires: research, add_capabilities ### verify **Verify Installation**: Set up platform directories and verify deepwork install works correctly -- Command: `_add_platform.verify` +- Command: `uw.add_platform.verify` - Requires: implement ## Instructions diff --git a/.claude/commands/deepwork_jobs.md b/.claude/commands/deepwork_jobs.md index ba4e639..ff4feab 100644 --- a/.claude/commands/deepwork_jobs.md +++ b/.claude/commands/deepwork_jobs.md @@ -24,10 +24,10 @@ This job has 3 step(s): ### define **Define Job Specification**: Create the job.yml specification file by understanding workflow requirements -- Command: `_deepwork_jobs.define` +- Command: `uw.deepwork_jobs.define` ### implement **Implement Job Steps**: Generate instruction files for each step based on the job.yml specification -- Command: `_deepwork_jobs.implement` +- Command: `uw.deepwork_jobs.implement` - Requires: define ### learn **Learn from Job Execution**: Reflect on conversation to improve job instructions and capture learnings diff --git a/.claude/commands/deepwork_policy.md b/.claude/commands/deepwork_policy.md index 01dfba9..d705cc7 100644 --- a/.claude/commands/deepwork_policy.md +++ b/.claude/commands/deepwork_policy.md @@ -29,7 +29,7 @@ This job has 1 step(s): ### define **Define Policy**: Create or update policy entries in .deepwork.policy.yml -- Command: `_deepwork_policy.define` +- Command: `uw.deepwork_policy.define` ## Instructions diff --git a/.claude/commands/update.md b/.claude/commands/update.md index 30c86a6..25a61eb 100644 --- a/.claude/commands/update.md +++ b/.claude/commands/update.md @@ -27,7 +27,7 @@ This job has 1 step(s): ### job **Update Standard Job**: Edit standard job source files and sync to installed locations -- Command: `_update.job` +- Command: `uw.update.job` ## Instructions diff --git a/.claude/commands/_add_platform.add_capabilities.md b/.claude/commands/uw.add_platform.add_capabilities.md similarity index 100% rename from .claude/commands/_add_platform.add_capabilities.md rename to .claude/commands/uw.add_platform.add_capabilities.md diff --git a/.claude/commands/_add_platform.implement.md b/.claude/commands/uw.add_platform.implement.md similarity index 100% rename from .claude/commands/_add_platform.implement.md rename to .claude/commands/uw.add_platform.implement.md diff --git a/.claude/commands/_add_platform.research.md b/.claude/commands/uw.add_platform.research.md similarity index 100% rename from .claude/commands/_add_platform.research.md rename to .claude/commands/uw.add_platform.research.md diff --git a/.claude/commands/_add_platform.verify.md b/.claude/commands/uw.add_platform.verify.md similarity index 100% rename from .claude/commands/_add_platform.verify.md rename to .claude/commands/uw.add_platform.verify.md diff --git a/.claude/commands/_deepwork_jobs.define.md b/.claude/commands/uw.deepwork_jobs.define.md similarity index 100% rename from .claude/commands/_deepwork_jobs.define.md rename to .claude/commands/uw.deepwork_jobs.define.md diff --git a/.claude/commands/_deepwork_jobs.implement.md b/.claude/commands/uw.deepwork_jobs.implement.md similarity index 100% rename from .claude/commands/_deepwork_jobs.implement.md rename to .claude/commands/uw.deepwork_jobs.implement.md diff --git a/.claude/commands/_update.job.md b/.claude/commands/uw.update.job.md similarity index 100% rename from .claude/commands/_update.job.md rename to .claude/commands/uw.update.job.md diff --git a/.gemini/commands/add_platform/index.toml b/.gemini/commands/add_platform/index.toml index 787b56d..5ee8dbd 100644 --- a/.gemini/commands/add_platform/index.toml +++ b/.gemini/commands/add_platform/index.toml @@ -35,18 +35,18 @@ This job has 4 step(s): ### research **Research Platform Documentation**: Capture CLI configuration and hooks system documentation for the new platform -- Command: `/add_platform:_research` +- Command: `/add_platform:uw.research` ### add_capabilities **Add Hook Capabilities**: Update job schema and adapters with any new hook events the platform supports -- Command: `/add_platform:_add_capabilities` +- Command: `/add_platform:uw.add_capabilities` - Requires: research ### implement **Implement Platform Support**: Add platform adapter, templates, tests with 100% coverage, and README documentation -- Command: `/add_platform:_implement` +- Command: `/add_platform:uw.implement` - Requires: research, add_capabilities ### verify **Verify Installation**: Set up platform directories and verify deepwork install works correctly -- Command: `/add_platform:_verify` +- Command: `/add_platform:uw.verify` - Requires: implement ## Instructions @@ -62,10 +62,10 @@ Determine what the user wants to do and route to the appropriate step. - verify: Set up platform directories and verify deepwork install works correctly 3. **Invoke the matched step**: Tell the user to run the appropriate command: - - For research: `/add_platform:_research` - - For add_capabilities: `/add_platform:_add_capabilities` - - For implement: `/add_platform:_implement` - - For verify: `/add_platform:_verify` + - For research: `/add_platform:uw.research` + - For add_capabilities: `/add_platform:uw.add_capabilities` + - For implement: `/add_platform:uw.implement` + - For verify: `/add_platform:uw.verify` 4. **If intent is ambiguous**, ask the user which step they want: - Present the available steps as numbered options diff --git a/.gemini/commands/add_platform/_add_capabilities.toml b/.gemini/commands/add_platform/uw.add_capabilities.toml similarity index 100% rename from .gemini/commands/add_platform/_add_capabilities.toml rename to .gemini/commands/add_platform/uw.add_capabilities.toml diff --git a/.gemini/commands/add_platform/_implement.toml b/.gemini/commands/add_platform/uw.implement.toml similarity index 100% rename from .gemini/commands/add_platform/_implement.toml rename to .gemini/commands/add_platform/uw.implement.toml diff --git a/.gemini/commands/add_platform/_research.toml b/.gemini/commands/add_platform/uw.research.toml similarity index 100% rename from .gemini/commands/add_platform/_research.toml rename to .gemini/commands/add_platform/uw.research.toml diff --git a/.gemini/commands/add_platform/_verify.toml b/.gemini/commands/add_platform/uw.verify.toml similarity index 100% rename from .gemini/commands/add_platform/_verify.toml rename to .gemini/commands/add_platform/uw.verify.toml diff --git a/.gemini/commands/deepwork_jobs/index.toml b/.gemini/commands/deepwork_jobs/index.toml index 357ca56..b197d4c 100644 --- a/.gemini/commands/deepwork_jobs/index.toml +++ b/.gemini/commands/deepwork_jobs/index.toml @@ -29,10 +29,10 @@ This job has 3 step(s): ### define **Define Job Specification**: Create the job.yml specification file by understanding workflow requirements -- Command: `/deepwork_jobs:_define` +- Command: `/deepwork_jobs:uw.define` ### implement **Implement Job Steps**: Generate instruction files for each step based on the job.yml specification -- Command: `/deepwork_jobs:_implement` +- Command: `/deepwork_jobs:uw.implement` - Requires: define ### learn **Learn from Job Execution**: Reflect on conversation to improve job instructions and capture learnings @@ -50,8 +50,8 @@ Determine what the user wants to do and route to the appropriate step. - learn: Reflect on conversation to improve job instructions and capture learnings 3. **Invoke the matched step**: Tell the user to run the appropriate command: - - For define: `/deepwork_jobs:_define` - - For implement: `/deepwork_jobs:_implement` + - For define: `/deepwork_jobs:uw.define` + - For implement: `/deepwork_jobs:uw.implement` - For learn: `/deepwork_jobs:learn` 4. **If intent is ambiguous**, ask the user which step they want: diff --git a/.gemini/commands/deepwork_jobs/_define.toml b/.gemini/commands/deepwork_jobs/uw.define.toml similarity index 100% rename from .gemini/commands/deepwork_jobs/_define.toml rename to .gemini/commands/deepwork_jobs/uw.define.toml diff --git a/.gemini/commands/deepwork_jobs/_implement.toml b/.gemini/commands/deepwork_jobs/uw.implement.toml similarity index 100% rename from .gemini/commands/deepwork_jobs/_implement.toml rename to .gemini/commands/deepwork_jobs/uw.implement.toml diff --git a/.gemini/commands/deepwork_policy/index.toml b/.gemini/commands/deepwork_policy/index.toml index e415dca..1276544 100644 --- a/.gemini/commands/deepwork_policy/index.toml +++ b/.gemini/commands/deepwork_policy/index.toml @@ -34,7 +34,7 @@ This job has 1 step(s): ### define **Define Policy**: Create or update policy entries in .deepwork.policy.yml -- Command: `/deepwork_policy:_define` +- Command: `/deepwork_policy:uw.define` ## Instructions @@ -46,7 +46,7 @@ Determine what the user wants to do and route to the appropriate step. - define: Create or update policy entries in .deepwork.policy.yml 3. **Invoke the matched step**: Tell the user to run the appropriate command: - - For define: `/deepwork_policy:_define` + - For define: `/deepwork_policy:uw.define` 4. **If intent is ambiguous**, ask the user which step they want: - Present the available steps as numbered options diff --git a/.gemini/commands/update/index.toml b/.gemini/commands/update/index.toml index d291922..5c423cd 100644 --- a/.gemini/commands/update/index.toml +++ b/.gemini/commands/update/index.toml @@ -32,7 +32,7 @@ This job has 1 step(s): ### job **Update Standard Job**: Edit standard job source files and sync to installed locations -- Command: `/update:_job` +- Command: `/update:uw.job` ## Instructions @@ -44,7 +44,7 @@ Determine what the user wants to do and route to the appropriate step. - job: Edit standard job source files and sync to installed locations 3. **Invoke the matched step**: Tell the user to run the appropriate command: - - For job: `/update:_job` + - For job: `/update:uw.job` 4. **If intent is ambiguous**, ask the user which step they want: - Present the available steps as numbered options diff --git a/.gemini/commands/update/_job.toml b/.gemini/commands/update/uw.job.toml similarity index 100% rename from .gemini/commands/update/_job.toml rename to .gemini/commands/update/uw.job.toml diff --git a/doc/architecture.md b/doc/architecture.md index a6c892b..bfc05f3 100644 --- a/doc/architecture.md +++ b/doc/architecture.md @@ -180,8 +180,8 @@ class AgentAdapter(ABC): return f"{job_name}.md" def get_step_command_filename(self, job_name: str, step_id: str, exposed: bool = False) -> str: - """Get filename for step command. Hidden by default (underscore prefix).""" - prefix = "" if exposed else "_" + """Get filename for step command. Hidden by default (uw. prefix).""" + prefix = "" if exposed else "uw." return f"{prefix}{job_name}.{step_id}.md" def detect(self, project_root: Path) -> bool: @@ -244,14 +244,14 @@ This component is called by the `sync` command to regenerate all commands: **Meta-Command Architecture**: -Each job gets a single user-facing meta-command (e.g., `/deepwork_jobs`) that interprets user intent and routes to appropriate hidden step commands. Steps are hidden by default (underscore prefix) unless marked `exposed: true` in job.yml. +Each job gets a single user-facing meta-command (e.g., `/deepwork_jobs`) that interprets user intent and routes to appropriate hidden step commands. Steps are hidden by default (uw. prefix) unless marked `exposed: true` in job.yml. ```yaml # In job.yml steps: - id: define name: "Define Job" - # ... (hidden by default → _deepwork_jobs.define.md) + # ... (hidden by default → uw.deepwork_jobs.define.md) - id: learn name: "Learn from Execution" @@ -283,7 +283,7 @@ class CommandGenerator: # ... build context ... # Write to platform's commands directory - # Hidden by default (underscore prefix) unless step.exposed is True + # Hidden by default (uw. prefix) unless step.exposed is True command_filename = adapter.get_step_command_filename(job.name, step.id, step.exposed) command_path = output_dir / adapter.config_dir / adapter.commands_dir / command_filename write_file(command_path, rendered) @@ -305,13 +305,13 @@ my-project/ # User's project (target) │ ├── settings.json # Includes installed hooks │ └── commands/ # Command files │ ├── deepwork_jobs.md # Meta-command (user-facing entry point) -│ ├── _deepwork_jobs.define.md # Hidden step (underscore prefix) -│ ├── _deepwork_jobs.implement.md # Hidden step -│ ├── deepwork_jobs.learn.md # Exposed step (no underscore) +│ ├── uw.deepwork_jobs.define.md # Hidden step (uw. prefix) +│ ├── uw.deepwork_jobs.implement.md # Hidden step +│ ├── deepwork_jobs.learn.md # Exposed step (no uw. prefix) │ ├── deepwork_rules.md # Rules meta-command -│ ├── _deepwork_rules.define.md # Hidden step +│ ├── uw.deepwork_rules.define.md # Hidden step │ ├── competitive_research.md # User job meta-command -│ ├── _competitive_research.identify_competitors.md # Hidden steps +│ ├── uw.competitive_research.identify_competitors.md # Hidden steps │ └── ... ├── .deepwork/ # DeepWork configuration │ ├── config.yml # Platform config @@ -560,7 +560,7 @@ When the job is defined and `sync` is run, DeepWork generates command files. Exa `.deepwork/jobs/competitive_research` generates: - Meta-command: `.claude/commands/competitive_research.md` (user-facing entry point) -- Hidden step commands: `.claude/commands/_competitive_research.identify_competitors.md` (prefixed with underscore) +- Hidden step commands: `.claude/commands/uw.competitive_research.identify_competitors.md` (prefixed with uw.) - Exposed step commands: `.claude/commands/competitive_research.step_name.md` (if `exposed: true` in job.yml) The meta-command routes user intent to the appropriate step command via the Skill tool. diff --git a/src/deepwork/core/adapters.py b/src/deepwork/core/adapters.py index 69d20e6..0aa6c27 100644 --- a/src/deepwork/core/adapters.py +++ b/src/deepwork/core/adapters.py @@ -173,18 +173,18 @@ def get_step_command_filename(self, job_name: str, step_id: str, exposed: bool = """ Get the filename for a step command. - Step commands are hidden by default (underscore prefix) unless exposed=True. + Step commands are hidden by default (uw. prefix) unless exposed=True. Can be overridden for different file formats (e.g., TOML for Gemini). Args: job_name: Name of the job step_id: ID of the step - exposed: If True, command is visible (no underscore prefix). Default: False. + exposed: If True, command is visible (no uw. prefix). Default: False. Returns: - Command filename (e.g., "_job_name.step_id.md" or "job_name.step_id.md" if exposed) + Command filename (e.g., "uw.job_name.step_id.md" or "job_name.step_id.md" if exposed) """ - prefix = "" if exposed else "_" + prefix = "" if exposed else "uw." return f"{prefix}{job_name}.{step_id}.md" def detect(self, project_root: Path | None = None) -> bool: @@ -396,18 +396,18 @@ def get_step_command_filename(self, job_name: str, step_id: str, exposed: bool = Get the filename for a Gemini step command. Gemini uses TOML files and colon namespacing via subdirectories. - Step commands are hidden by default (underscore prefix) unless exposed=True. - For job "my_job" and step "step_one", creates: my_job/_step_one.toml + Step commands are hidden by default (uw. prefix) unless exposed=True. + For job "my_job" and step "step_one", creates: my_job/uw.step_one.toml Args: job_name: Name of the job step_id: ID of the step - exposed: If True, command is visible (no underscore prefix). Default: False. + exposed: If True, command is visible (no uw. prefix). Default: False. Returns: - Command filename path (e.g., "my_job/_step_one.toml" or "my_job/step_one.toml" if exposed) + Command filename path (e.g., "my_job/uw.step_one.toml" or "my_job/step_one.toml" if exposed) """ - prefix = "" if exposed else "_" + prefix = "" if exposed else "uw." return f"{job_name}/{prefix}{step_id}.toml" def sync_hooks(self, project_path: Path, hooks: dict[str, list[dict[str, Any]]]) -> int: diff --git a/src/deepwork/core/generator.py b/src/deepwork/core/generator.py index c18317e..d3331d8 100644 --- a/src/deepwork/core/generator.py +++ b/src/deepwork/core/generator.py @@ -219,14 +219,14 @@ def _build_meta_command_context( for step in job.steps: command_filename = adapter.get_step_command_filename(job.name, step.id, step.exposed) # Extract just the command name (without path and extension) - # For Claude: _job_name.step_id.md -> _job_name.step_id - # For Gemini: job_name/_step_id.toml -> job_name:_step_id + # For Claude: uw.job_name.step_id.md -> uw.job_name.step_id + # For Gemini: job_name/uw.step_id.toml -> job_name:uw.step_id if adapter.name == "gemini": - # Gemini uses colon for namespacing: job_name:step_id or job_name:_step_id + # Gemini uses colon for namespacing: job_name:step_id or job_name:uw.step_id parts = command_filename.replace(".toml", "").split("/") command_name = ":".join(parts) else: - # Claude uses dot for namespacing: _job_name.step_id + # Claude uses dot for namespacing: uw.job_name.step_id command_name = command_filename.replace(".md", "") steps_info.append( diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py index e41c9c3..c7b6a60 100644 --- a/src/deepwork/core/parser.py +++ b/src/deepwork/core/parser.py @@ -108,7 +108,7 @@ class Step: # Event names: after_agent, before_tool, before_prompt hooks: dict[str, list[HookAction]] = field(default_factory=dict) - # If true, step command is visible (no underscore prefix). Default: false (hidden). + # If true, step command is visible (no tilde prefix). Default: false (hidden). exposed: bool = False @property diff --git a/src/deepwork/schemas/job_schema.py b/src/deepwork/schemas/job_schema.py index 0d229b6..b66fe24 100644 --- a/src/deepwork/schemas/job_schema.py +++ b/src/deepwork/schemas/job_schema.py @@ -205,7 +205,7 @@ }, "exposed": { "type": "boolean", - "description": "If true, step command is visible (no underscore prefix). Default: false (hidden).", + "description": "If true, step command is visible (no tilde prefix). Default: false (hidden).", "default": False, }, }, diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py index c319663..f582078 100644 --- a/tests/e2e/test_claude_code_integration.py +++ b/tests/e2e/test_claude_code_integration.py @@ -98,8 +98,8 @@ def test_generate_fruits_commands_in_temp_project(self) -> None: assert len(command_paths) == 3 meta_cmd = commands_dir / "commands" / "fruits.md" - identify_cmd = commands_dir / "commands" / "_fruits.identify.md" - classify_cmd = commands_dir / "commands" / "_fruits.classify.md" + identify_cmd = commands_dir / "commands" / "uw.fruits.identify.md" + classify_cmd = commands_dir / "commands" / "uw.fruits.classify.md" assert meta_cmd.exists() assert identify_cmd.exists() @@ -129,8 +129,8 @@ def test_command_structure_matches_claude_code_expectations(self) -> None: adapter = ClaudeAdapter() generator.generate_all_commands(job, adapter, commands_dir) - # Step commands are now hidden (underscore prefix) - identify_cmd = commands_dir / "commands" / "_fruits.identify.md" + # Step commands are now hidden (uw. prefix) + identify_cmd = commands_dir / "commands" / "uw.fruits.identify.md" content = identify_cmd.read_text() # Claude Code expects specific sections @@ -155,14 +155,14 @@ def test_dependency_chain_in_commands(self) -> None: adapter = ClaudeAdapter() generator.generate_all_commands(job, adapter, commands_dir) - # Step commands are now hidden (underscore prefix) + # Step commands are now hidden (uw. prefix) # First step should have no prerequisites - identify_cmd = commands_dir / "commands" / "_fruits.identify.md" + identify_cmd = commands_dir / "commands" / "uw.fruits.identify.md" identify_content = identify_cmd.read_text() assert "## Prerequisites" not in identify_content # Second step should reference first step - classify_cmd = commands_dir / "commands" / "_fruits.classify.md" + classify_cmd = commands_dir / "commands" / "uw.fruits.classify.md" classify_content = classify_cmd.read_text() assert "## Prerequisites" in classify_content assert "identify" in classify_content.lower() diff --git a/tests/fixtures/jobs/exposed_step_job/steps/hidden_step.md b/tests/fixtures/jobs/exposed_step_job/steps/hidden_step.md index 01b3434..89dc067 100644 --- a/tests/fixtures/jobs/exposed_step_job/steps/hidden_step.md +++ b/tests/fixtures/jobs/exposed_step_job/steps/hidden_step.md @@ -1,6 +1,6 @@ # Hidden Step Instructions -This step is hidden by default (no underscore prefix). +This step is hidden by default (has uw. prefix). ## Task diff --git a/tests/integration/test_fruits_workflow.py b/tests/integration/test_fruits_workflow.py index efd449d..ea8b05f 100644 --- a/tests/integration/test_fruits_workflow.py +++ b/tests/integration/test_fruits_workflow.py @@ -85,8 +85,8 @@ def test_fruits_command_generation(self, fixtures_dir: Path, temp_dir: Path) -> # Verify command files exist meta_cmd = commands_dir / "commands" / "fruits.md" - identify_cmd = commands_dir / "commands" / "_fruits.identify.md" - classify_cmd = commands_dir / "commands" / "_fruits.classify.md" + identify_cmd = commands_dir / "commands" / "uw.fruits.identify.md" + classify_cmd = commands_dir / "commands" / "uw.fruits.classify.md" assert meta_cmd.exists() assert identify_cmd.exists() assert classify_cmd.exists() @@ -103,8 +103,8 @@ def test_fruits_identify_command_content(self, fixtures_dir: Path, temp_dir: Pat generator.generate_all_commands(job, adapter, commands_dir) - # Step commands are now hidden (underscore prefix) - identify_cmd = commands_dir / "commands" / "_fruits.identify.md" + # Step commands are now hidden (uw. prefix) + identify_cmd = commands_dir / "commands" / "uw.fruits.identify.md" content = identify_cmd.read_text() # Check header @@ -134,8 +134,8 @@ def test_fruits_classify_command_content(self, fixtures_dir: Path, temp_dir: Pat generator.generate_all_commands(job, adapter, commands_dir) - # Step commands are now hidden (underscore prefix) - classify_cmd = commands_dir / "commands" / "_fruits.classify.md" + # Step commands are now hidden (uw. prefix) + classify_cmd = commands_dir / "commands" / "uw.fruits.classify.md" content = classify_cmd.read_text() # Check header diff --git a/tests/integration/test_install_flow.py b/tests/integration/test_install_flow.py index b2a3555..66bcdc2 100644 --- a/tests/integration/test_install_flow.py +++ b/tests/integration/test_install_flow.py @@ -43,9 +43,9 @@ def test_install_with_claude(self, mock_claude_project: Path) -> None: claude_dir = mock_claude_project / ".claude" / "commands" # Meta-command assert (claude_dir / "deepwork_jobs.md").exists() - # Hidden step command (underscore prefix) - assert (claude_dir / "_deepwork_jobs.define.md").exists() - # Exposed step command (no underscore - learn has exposed: true) + # Hidden step command (uw. prefix) + assert (claude_dir / "uw.deepwork_jobs.define.md").exists() + # Exposed step command (no uw. prefix - learn has exposed: true) assert (claude_dir / "deepwork_jobs.learn.md").exists() # Verify meta-command content @@ -54,7 +54,7 @@ def test_install_with_claude(self, mock_claude_project: Path) -> None: assert "Available Steps" in meta_command # Verify hidden step command content - define_command = (claude_dir / "_deepwork_jobs.define.md").read_text() + define_command = (claude_dir / "uw.deepwork_jobs.define.md").read_text() assert "# deepwork_jobs.define" in define_command assert "Define Job Specification" in define_command @@ -117,13 +117,13 @@ def test_install_with_multiple_platforms_auto_detect( claude_dir = mock_multi_platform_project / ".claude" / "commands" # Meta-command and hidden step commands assert (claude_dir / "deepwork_jobs.md").exists() - assert (claude_dir / "_deepwork_jobs.define.md").exists() + assert (claude_dir / "uw.deepwork_jobs.define.md").exists() # Gemini uses job_name/step_id.toml structure gemini_dir = mock_multi_platform_project / ".gemini" / "commands" # Meta-command (index.toml) and hidden step commands assert (gemini_dir / "deepwork_jobs" / "index.toml").exists() - assert (gemini_dir / "deepwork_jobs" / "_define.toml").exists() + assert (gemini_dir / "deepwork_jobs" / "uw.define.toml").exists() def test_install_with_specified_platform_when_missing(self, mock_git_repo: Path) -> None: """Test that install fails when specified platform is not present.""" @@ -164,7 +164,7 @@ def test_install_is_idempotent(self, mock_claude_project: Path) -> None: claude_dir = mock_claude_project / ".claude" / "commands" # Meta-command and step commands assert (claude_dir / "deepwork_jobs.md").exists() - assert (claude_dir / "_deepwork_jobs.define.md").exists() + assert (claude_dir / "uw.deepwork_jobs.define.md").exists() assert (claude_dir / "deepwork_jobs.learn.md").exists() def test_install_creates_rules_directory(self, mock_claude_project: Path) -> None: diff --git a/tests/unit/test_adapters.py b/tests/unit/test_adapters.py index b0675e9..d052f00 100644 --- a/tests/unit/test_adapters.py +++ b/tests/unit/test_adapters.py @@ -133,7 +133,7 @@ def test_get_step_command_filename_hidden_by_default(self) -> None: result = adapter.get_step_command_filename("my_job", "step_one") - assert result == "_my_job.step_one.md" + assert result == "uw.my_job.step_one.md" def test_get_step_command_filename_exposed(self) -> None: """Test get_step_command_filename returns visible filename when exposed.""" @@ -278,8 +278,8 @@ def test_get_step_command_filename_hidden_by_default(self) -> None: result = adapter.get_step_command_filename("my_job", "step_one") # Gemini uses subdirectories for namespacing (colon becomes path) - # Hidden steps have underscore prefix - assert result == "my_job/_step_one.toml" + # Hidden steps have uw. prefix + assert result == "my_job/uw.step_one.toml" def test_get_step_command_filename_exposed(self) -> None: """Test get_step_command_filename returns visible TOML when exposed.""" @@ -287,7 +287,7 @@ def test_get_step_command_filename_exposed(self) -> None: result = adapter.get_step_command_filename("my_job", "step_one", exposed=True) - # Exposed steps have no underscore prefix + # Exposed steps have no uw. prefix assert result == "my_job/step_one.toml" def test_get_step_command_filename_with_underscores(self) -> None: @@ -296,7 +296,7 @@ def test_get_step_command_filename_with_underscores(self) -> None: result = adapter.get_step_command_filename("competitive_research", "identify_competitors") - assert result == "competitive_research/_identify_competitors.toml" + assert result == "competitive_research/uw.identify_competitors.toml" def test_hook_name_mapping_is_empty(self) -> None: """Test that Gemini has no command-level hooks.""" diff --git a/tests/unit/test_generator.py b/tests/unit/test_generator.py index f7db7d4..839cd6f 100644 --- a/tests/unit/test_generator.py +++ b/tests/unit/test_generator.py @@ -46,8 +46,8 @@ def test_generate_step_command_simple_job(self, fixtures_dir: Path, temp_dir: Pa command_path = generator.generate_step_command(job, job.steps[0], adapter, temp_dir) assert command_path.exists() - # Step commands are hidden by default (underscore prefix) - assert command_path.name == "_simple_job.single_step.md" + # Step commands are hidden by default (uw. prefix) + assert command_path.name == "uw.simple_job.single_step.md" content = command_path.read_text() assert "# simple_job.single_step" in content @@ -193,10 +193,10 @@ def test_generate_all_commands(self, fixtures_dir: Path, temp_dir: Path) -> None # Check filenames - meta-command first, then hidden step commands expected_names = [ "competitive_research.md", # Meta-command - "_competitive_research.identify_competitors.md", # Hidden steps - "_competitive_research.primary_research.md", - "_competitive_research.secondary_research.md", - "_competitive_research.comparative_report.md", + "uw.competitive_research.identify_competitors.md", # Hidden steps + "uw.competitive_research.primary_research.md", + "uw.competitive_research.secondary_research.md", + "uw.competitive_research.comparative_report.md", ] actual_names = [p.name for p in command_paths] assert actual_names == expected_names @@ -223,7 +223,7 @@ def test_generate_meta_command(self, fixtures_dir: Path, temp_dir: Path) -> None assert "Skill tool" in content def test_generate_step_command_exposed_step(self, fixtures_dir: Path, temp_dir: Path) -> None: - """Test generating command for exposed step (no underscore prefix).""" + """Test generating command for exposed step (no uw. prefix).""" job_dir = fixtures_dir / "jobs" / "exposed_step_job" job = parse_job_definition(job_dir) @@ -234,7 +234,7 @@ def test_generate_step_command_exposed_step(self, fixtures_dir: Path, temp_dir: command_path = generator.generate_step_command(job, job.steps[1], adapter, temp_dir) assert command_path.exists() - # Exposed step should NOT have underscore prefix + # Exposed step should NOT have uw. prefix assert command_path.name == "exposed_job.exposed_step.md" def test_generate_all_commands_with_exposed_steps( @@ -253,11 +253,11 @@ def test_generate_all_commands_with_exposed_steps( assert len(command_paths) == 3 assert all(p.exists() for p in command_paths) - # Check filenames - hidden step has underscore, exposed doesn't + # Check filenames - hidden step has uw. prefix, exposed doesn't expected_names = [ "exposed_job.md", # Meta-command - "_exposed_job.hidden_step.md", # Hidden step - "exposed_job.exposed_step.md", # Exposed step (no underscore) + "uw.exposed_job.hidden_step.md", # Hidden step + "exposed_job.exposed_step.md", # Exposed step (no uw. prefix) ] actual_names = [p.name for p in command_paths] assert actual_names == expected_names From 5a1e56128f11afde6b07d1cc0779bce1d8f53e5b Mon Sep 17 00:00:00 2001 From: Noah Horton Date: Fri, 16 Jan 2026 16:39:49 -0700 Subject: [PATCH 4/6] Add AGENT: TAKE ACTION prefix to hook prompts Prevents Claude from stopping when hooks are presented as errors. Updated prompts in policy.yml, add_platform, update, and deepwork_jobs. Co-Authored-By: Claude Opus 4.5 --- .claude/commands/deepwork_jobs.learn.md | 4 ++-- .claude/commands/uw.add_platform.add_capabilities.md | 4 ++-- .claude/commands/uw.add_platform.implement.md | 4 ++-- .claude/commands/uw.add_platform.research.md | 4 ++-- .claude/commands/uw.add_platform.verify.md | 4 ++-- .claude/commands/uw.deepwork_jobs.define.md | 4 ++-- .claude/commands/uw.deepwork_jobs.implement.md | 4 ++-- .claude/commands/uw.update.job.md | 4 ++-- .deepwork/jobs/add_platform/job.yml | 8 ++++---- .deepwork/jobs/deepwork_jobs/job.yml | 6 +++--- .deepwork/jobs/update/job.yml | 2 +- src/deepwork/standard_jobs/deepwork_jobs/job.yml | 6 +++--- 12 files changed, 27 insertions(+), 27 deletions(-) diff --git a/.claude/commands/deepwork_jobs.learn.md b/.claude/commands/deepwork_jobs.learn.md index b1eb339..c165d18 100644 --- a/.claude/commands/deepwork_jobs.learn.md +++ b/.claude/commands/deepwork_jobs.learn.md @@ -9,7 +9,7 @@ hooks: ## Quality Criteria - Verify the learning process meets ALL quality criteria before completing: + **AGENT: TAKE ACTION** - Verify the learning process meets ALL quality criteria before completing: 1. **Conversation Analyzed**: Did you review the conversation for DeepWork job executions? 2. **Confusion Identified**: Did you identify points of confusion, errors, or inefficiencies? @@ -386,7 +386,7 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -Verify the learning process meets ALL quality criteria before completing: +**AGENT: TAKE ACTION** - Verify the learning process meets ALL quality criteria before completing: 1. **Conversation Analyzed**: Did you review the conversation for DeepWork job executions? 2. **Confusion Identified**: Did you identify points of confusion, errors, or inefficiencies? diff --git a/.claude/commands/uw.add_platform.add_capabilities.md b/.claude/commands/uw.add_platform.add_capabilities.md index dac48f9..5b1b022 100644 --- a/.claude/commands/uw.add_platform.add_capabilities.md +++ b/.claude/commands/uw.add_platform.add_capabilities.md @@ -9,7 +9,7 @@ hooks: ## Quality Criteria - Verify the capability additions meet ALL criteria: + **AGENT: TAKE ACTION** - Verify the capability additions meet ALL criteria: 1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py 2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields (set to None/null if the platform doesn't support that hook) @@ -242,7 +242,7 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -Verify the capability additions meet ALL criteria: +**AGENT: TAKE ACTION** - Verify the capability additions meet ALL criteria: 1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py 2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields (set to None/null if the platform doesn't support that hook) diff --git a/.claude/commands/uw.add_platform.implement.md b/.claude/commands/uw.add_platform.implement.md index 098fb55..381ae89 100644 --- a/.claude/commands/uw.add_platform.implement.md +++ b/.claude/commands/uw.add_platform.implement.md @@ -11,7 +11,7 @@ hooks: ## Quality Criteria - Verify the implementation meets ALL criteria: + **AGENT: TAKE ACTION** - Verify the implementation meets ALL criteria: 1. Platform adapter class is added to src/deepwork/adapters.py 2. Templates exist in src/deepwork/templates// with appropriate command structure 3. Tests exist for all new functionality @@ -343,7 +343,7 @@ This step uses an iterative quality validation loop. After completing your work, The validation script will be executed automatically when you attempt to complete this step. ### Quality Criteria (2) -Verify the implementation meets ALL criteria: +**AGENT: TAKE ACTION** - Verify the implementation meets ALL criteria: 1. Platform adapter class is added to src/deepwork/adapters.py 2. Templates exist in src/deepwork/templates// with appropriate command structure 3. Tests exist for all new functionality diff --git a/.claude/commands/uw.add_platform.research.md b/.claude/commands/uw.add_platform.research.md index 3fd6ec0..3c82569 100644 --- a/.claude/commands/uw.add_platform.research.md +++ b/.claude/commands/uw.add_platform.research.md @@ -9,7 +9,7 @@ hooks: ## Quality Criteria - Verify the research output meets ALL criteria: + **AGENT: TAKE ACTION** - Verify the research output meets ALL criteria: 1. Both files exist in doc/platforms//: cli_configuration.md and hooks_system.md 2. Each file has a comment at the top with: - Last updated date @@ -288,7 +288,7 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -Verify the research output meets ALL criteria: +**AGENT: TAKE ACTION** - Verify the research output meets ALL criteria: 1. Both files exist in doc/platforms//: cli_configuration.md and hooks_system.md 2. Each file has a comment at the top with: - Last updated date diff --git a/.claude/commands/uw.add_platform.verify.md b/.claude/commands/uw.add_platform.verify.md index d937b53..44288d2 100644 --- a/.claude/commands/uw.add_platform.verify.md +++ b/.claude/commands/uw.add_platform.verify.md @@ -9,7 +9,7 @@ hooks: ## Quality Criteria - Verify the installation meets ALL criteria: + **AGENT: TAKE ACTION** - Verify the installation meets ALL criteria: 1. Platform-specific directories/files are added to the deepwork repo as needed 2. Running `deepwork install --platform ` completes without errors 3. Expected command files are created in the platform's command directory @@ -213,7 +213,7 @@ No specific files are output by this command. This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -Verify the installation meets ALL criteria: +**AGENT: TAKE ACTION** - Verify the installation meets ALL criteria: 1. Platform-specific directories/files are added to the deepwork repo as needed 2. Running `deepwork install --platform ` completes without errors 3. Expected command files are created in the platform's command directory diff --git a/.claude/commands/uw.deepwork_jobs.define.md b/.claude/commands/uw.deepwork_jobs.define.md index 4de4eaf..a4319d7 100644 --- a/.claude/commands/uw.deepwork_jobs.define.md +++ b/.claude/commands/uw.deepwork_jobs.define.md @@ -9,7 +9,7 @@ hooks: ## Quality Criteria - Verify the job.yml output meets ALL quality criteria before completing: + **AGENT: TAKE ACTION** - Verify the job.yml output meets ALL quality criteria before completing: 1. **User Understanding**: Did you fully understand the user's workflow by asking structured questions? 2. **Structured Questions Used**: Did you ask structured questions (using the AskUserQuestion tool) to gather user input? @@ -427,7 +427,7 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -Verify the job.yml output meets ALL quality criteria before completing: +**AGENT: TAKE ACTION** - Verify the job.yml output meets ALL quality criteria before completing: 1. **User Understanding**: Did you fully understand the user's workflow by asking structured questions? 2. **Structured Questions Used**: Did you ask structured questions (using the AskUserQuestion tool) to gather user input? diff --git a/.claude/commands/uw.deepwork_jobs.implement.md b/.claude/commands/uw.deepwork_jobs.implement.md index 7c22467..6689d99 100644 --- a/.claude/commands/uw.deepwork_jobs.implement.md +++ b/.claude/commands/uw.deepwork_jobs.implement.md @@ -9,7 +9,7 @@ hooks: ## Quality Criteria - Verify the implementation meets ALL quality criteria before completing: + **AGENT: TAKE ACTION** - Verify the implementation meets ALL quality criteria before completing: 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? 2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? @@ -345,7 +345,7 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -Verify the implementation meets ALL quality criteria before completing: +**AGENT: TAKE ACTION** - Verify the implementation meets ALL quality criteria before completing: 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? 2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? diff --git a/.claude/commands/uw.update.job.md b/.claude/commands/uw.update.job.md index 9698eec..2c3317e 100644 --- a/.claude/commands/uw.update.job.md +++ b/.claude/commands/uw.update.job.md @@ -9,7 +9,7 @@ hooks: ## Quality Criteria - Verify the update process completed successfully: + **AGENT: TAKE ACTION** - Verify the update process completed successfully: 1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/) 2. `deepwork install --platform claude` was run 3. Files in .deepwork/jobs/ match the source files @@ -161,7 +161,7 @@ No specific files are output by this command. This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -Verify the update process completed successfully: +**AGENT: TAKE ACTION** - Verify the update process completed successfully: 1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/) 2. `deepwork install --platform claude` was run 3. Files in .deepwork/jobs/ match the source files diff --git a/.deepwork/jobs/add_platform/job.yml b/.deepwork/jobs/add_platform/job.yml index cca6d63..df97239 100644 --- a/.deepwork/jobs/add_platform/job.yml +++ b/.deepwork/jobs/add_platform/job.yml @@ -39,7 +39,7 @@ steps: hooks: after_agent: - prompt: | - Verify the research output meets ALL criteria: + **AGENT: TAKE ACTION** - Verify the research output meets ALL criteria: 1. Both files exist in doc/platforms//: cli_configuration.md and hooks_system.md 2. Each file has a comment at the top with: - Last updated date @@ -66,7 +66,7 @@ steps: hooks: after_agent: - prompt: | - Verify the capability additions meet ALL criteria: + **AGENT: TAKE ACTION** - Verify the capability additions meet ALL criteria: 1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py 2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields (set to None/null if the platform doesn't support that hook) @@ -99,7 +99,7 @@ steps: after_agent: - script: hooks/run_tests.sh - prompt: | - Verify the implementation meets ALL criteria: + **AGENT: TAKE ACTION** - Verify the implementation meets ALL criteria: 1. Platform adapter class is added to src/deepwork/adapters.py 2. Templates exist in src/deepwork/templates// with appropriate command structure 3. Tests exist for all new functionality @@ -125,7 +125,7 @@ steps: hooks: after_agent: - prompt: | - Verify the installation meets ALL criteria: + **AGENT: TAKE ACTION** - Verify the installation meets ALL criteria: 1. Platform-specific directories/files are added to the deepwork repo as needed 2. Running `deepwork install --platform ` completes without errors 3. Expected command files are created in the platform's command directory diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml index c5c5a13..f54f7f4 100644 --- a/.deepwork/jobs/deepwork_jobs/job.yml +++ b/.deepwork/jobs/deepwork_jobs/job.yml @@ -39,7 +39,7 @@ steps: hooks: after_agent: - prompt: | - Verify the job.yml output meets ALL quality criteria before completing: + **AGENT: TAKE ACTION** - Verify the job.yml output meets ALL quality criteria before completing: 1. **User Understanding**: Did you fully understand the user's workflow by asking structured questions? 2. **Structured Questions Used**: Did you ask structured questions (using the AskUserQuestion tool) to gather user input? @@ -67,7 +67,7 @@ steps: hooks: after_agent: - prompt: | - Verify the implementation meets ALL quality criteria before completing: + **AGENT: TAKE ACTION** - Verify the implementation meets ALL quality criteria before completing: 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? 2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? @@ -98,7 +98,7 @@ steps: hooks: after_agent: - prompt: | - Verify the learning process meets ALL quality criteria before completing: + **AGENT: TAKE ACTION** - Verify the learning process meets ALL quality criteria before completing: 1. **Conversation Analyzed**: Did you review the conversation for DeepWork job executions? 2. **Confusion Identified**: Did you identify points of confusion, errors, or inefficiencies? diff --git a/.deepwork/jobs/update/job.yml b/.deepwork/jobs/update/job.yml index 4f8ab33..379257c 100644 --- a/.deepwork/jobs/update/job.yml +++ b/.deepwork/jobs/update/job.yml @@ -34,7 +34,7 @@ steps: dependencies: [] stop_hooks: - prompt: | - Verify the update process completed successfully: + **AGENT: TAKE ACTION** - Verify the update process completed successfully: 1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/) 2. `deepwork install --platform claude` was run 3. Files in .deepwork/jobs/ match the source files diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml index c5c5a13..f54f7f4 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml +++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml @@ -39,7 +39,7 @@ steps: hooks: after_agent: - prompt: | - Verify the job.yml output meets ALL quality criteria before completing: + **AGENT: TAKE ACTION** - Verify the job.yml output meets ALL quality criteria before completing: 1. **User Understanding**: Did you fully understand the user's workflow by asking structured questions? 2. **Structured Questions Used**: Did you ask structured questions (using the AskUserQuestion tool) to gather user input? @@ -67,7 +67,7 @@ steps: hooks: after_agent: - prompt: | - Verify the implementation meets ALL quality criteria before completing: + **AGENT: TAKE ACTION** - Verify the implementation meets ALL quality criteria before completing: 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? 2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? @@ -98,7 +98,7 @@ steps: hooks: after_agent: - prompt: | - Verify the learning process meets ALL quality criteria before completing: + **AGENT: TAKE ACTION** - Verify the learning process meets ALL quality criteria before completing: 1. **Conversation Analyzed**: Did you review the conversation for DeepWork job executions? 2. **Confusion Identified**: Did you identify points of confusion, errors, or inefficiencies? From 12fb39fe918d201b814cb7956f278f20b080282f Mon Sep 17 00:00:00 2001 From: Noah Horton Date: Fri, 16 Jan 2026 18:11:41 -0700 Subject: [PATCH 5/6] Add commit job with script-only hooks for test and format validation Introduces a new bespoke commit job for this repo with three steps: test, format, and reconcile_and_push. The job validates code quality before pushing changes by running pytest and ruff checks. Uses script hooks (not prompt hooks) for the test and format steps, with guidance on interpreting hook output included in the step instructions. This simpler approach lets the agent handle results directly rather than requiring a separate prompt-based evaluator. Co-Authored-By: Claude Opus 4.5 --- .claude/commands/add_platform.md | 19 +- .claude/commands/commit.md | 73 ++++ .claude/commands/deepwork_jobs.learn.md | 22 +- .claude/commands/deepwork_jobs.md | 19 +- .claude/commands/deepwork_policy.md | 19 +- .claude/commands/update.md | 19 +- .../uw.add_platform.add_capabilities.md | 38 +- .claude/commands/uw.add_platform.implement.md | 48 +-- .claude/commands/uw.add_platform.research.md | 40 +- .claude/commands/uw.add_platform.verify.md | 40 +- .claude/commands/uw.commit.format.md | 198 ++++++++++ .../commands/uw.commit.reconcile_and_push.md | 244 ++++++++++++ .claude/commands/uw.commit.test.md | 170 ++++++++ .claude/commands/uw.deepwork_jobs.define.md | 22 +- .../commands/uw.deepwork_jobs.implement.md | 60 ++- .claude/commands/uw.update.job.md | 26 +- .deepwork/jobs/add_platform/job.yml | 61 +-- .deepwork/jobs/commit/AGENTS.md | 30 ++ .deepwork/jobs/commit/hooks/.gitkeep | 0 .deepwork/jobs/commit/hooks/run_ruff.sh | 3 + .deepwork/jobs/commit/hooks/run_tests.sh | 3 + .deepwork/jobs/commit/job.yml | 70 ++++ .deepwork/jobs/commit/steps/format.md | 89 +++++ .../jobs/commit/steps/reconcile_and_push.md | 108 ++++++ .deepwork/jobs/commit/steps/test.md | 67 ++++ .deepwork/jobs/commit/templates/.gitkeep | 0 .deepwork/jobs/deepwork_jobs/job.yml | 54 +-- .deepwork/jobs/update/job.yml | 13 +- .gemini/commands/add_platform/index.toml | 23 +- .../add_platform/uw.add_capabilities.toml | 15 + .../commands/add_platform/uw.implement.toml | 15 + .../commands/add_platform/uw.research.toml | 15 + .gemini/commands/add_platform/uw.verify.toml | 15 + .gemini/commands/commit/index.toml | 81 ++++ .gemini/commands/commit/uw.format.toml | 172 ++++++++ .../commit/uw.reconcile_and_push.toml | 216 +++++++++++ .gemini/commands/commit/uw.test.toml | 144 +++++++ .gemini/commands/deepwork_jobs/index.toml | 21 +- .gemini/commands/deepwork_jobs/learn.toml | 19 + .gemini/commands/deepwork_jobs/uw.define.toml | 17 + .../commands/deepwork_jobs/uw.implement.toml | 18 + .gemini/commands/deepwork_policy/index.toml | 17 +- .gemini/commands/update/index.toml | 17 +- .gemini/commands/update/uw.job.toml | 13 + src/deepwork/core/generator.py | 1 + src/deepwork/core/parser.py | 4 + src/deepwork/schemas/job_schema.py | 8 + .../standard_jobs/deepwork_jobs/job.yml | 54 +-- .../claude/command-job-meta.md.jinja | 19 +- .../claude/command-job-step.md.jinja | 47 ++- .../gemini/command-job-meta.toml.jinja | 17 +- .../gemini/command-job-step.toml.jinja | 12 +- tests/unit/test_stop_hooks.py | 366 ++++++++++++++++++ 53 files changed, 2500 insertions(+), 401 deletions(-) create mode 100644 .claude/commands/commit.md create mode 100644 .claude/commands/uw.commit.format.md create mode 100644 .claude/commands/uw.commit.reconcile_and_push.md create mode 100644 .claude/commands/uw.commit.test.md create mode 100644 .deepwork/jobs/commit/AGENTS.md create mode 100644 .deepwork/jobs/commit/hooks/.gitkeep create mode 100755 .deepwork/jobs/commit/hooks/run_ruff.sh create mode 100755 .deepwork/jobs/commit/hooks/run_tests.sh create mode 100644 .deepwork/jobs/commit/job.yml create mode 100644 .deepwork/jobs/commit/steps/format.md create mode 100644 .deepwork/jobs/commit/steps/reconcile_and_push.md create mode 100644 .deepwork/jobs/commit/steps/test.md create mode 100644 .deepwork/jobs/commit/templates/.gitkeep create mode 100644 .gemini/commands/commit/index.toml create mode 100644 .gemini/commands/commit/uw.format.toml create mode 100644 .gemini/commands/commit/uw.reconcile_and_push.toml create mode 100644 .gemini/commands/commit/uw.test.toml diff --git a/.claude/commands/add_platform.md b/.claude/commands/add_platform.md index 5364068..e23ea1f 100644 --- a/.claude/commands/add_platform.md +++ b/.claude/commands/add_platform.md @@ -46,26 +46,29 @@ This job has 4 step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and run through the steps in sequence. 1. **Analyze user intent** from the text that follows `/add_platform` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: - research: Capture CLI configuration and hooks system documentation for the new platform - add_capabilities: Update job schema and adapters with any new hook events the platform supports - implement: Add platform adapter, templates, tests with 100% coverage, and README documentation - verify: Set up platform directories and verify deepwork install works correctly -3. **Invoke the matched step** using the Skill tool: - ``` - Skill: - ``` +3. **Run the workflow** starting from the identified step: + - Invoke the starting step using the Skill tool + - When that step completes, **automatically continue** to the next step in the workflow + - Continue until the workflow is complete or the user intervenes -4. **If intent is ambiguous**, ask the user which step they want: +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Use AskUserQuestion to let them choose -**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. +**Critical**: +- You MUST invoke each step using the Skill tool. Do not copy/paste step instructions. +- After each step completes, check if there's a next step and invoke it automatically. +- The workflow continues until all dependent steps are complete. ## Context Files diff --git a/.claude/commands/commit.md b/.claude/commands/commit.md new file mode 100644 index 0000000..4ebc2f2 --- /dev/null +++ b/.claude/commands/commit.md @@ -0,0 +1,73 @@ +--- +description: Validate, format, and push changes with tests passing +--- + +# commit + +You are executing the **commit** job. Validate, format, and push changes with tests passing + +A pre-commit workflow that ensures code quality before pushing changes. + +This job runs through three validation and preparation steps: +1. Runs the test suite and fixes any failures until all tests pass (max 5 attempts) +2. Runs ruff formatting and linting, fixing issues until clean (max 5 attempts) +3. Fetches from remote, rebases if needed, generates a simple commit message, + commits changes, and pushes to the remote branch + +Each step uses a quality validation loop to ensure it completes successfully +before moving to the next step. The format step runs as a subagent to +minimize token usage. + +Key behaviors: +- Rebase strategy when remote has changes (keeps linear history) +- Simple summary commit messages (no conventional commits format) +- Maximum 5 fix attempts before stopping + +Designed for developers who want a reliable pre-push workflow that catches +issues early and ensures consistent code quality. + + +## Available Steps + +This job has 3 step(s): + +### test +**Run Tests**: Run pytest and fix any failures until all tests pass (max 5 attempts) +- Command: `uw.commit.test` +### format +**Format Code**: Run ruff formatting and linting, fix issues until clean (max 5 attempts, runs as subagent) +- Command: `uw.commit.format` +- Requires: test +### reconcile_and_push +**Reconcile and Push**: Fetch remote, rebase if needed, commit with simple summary message, and push +- Command: `uw.commit.reconcile_and_push` +- Requires: format + +## Instructions + +This is a **multi-step workflow**. Determine the starting point and run through the steps in sequence. + +1. **Analyze user intent** from the text that follows `/commit` + +2. **Identify the starting step** based on intent: + - test: Run pytest and fix any failures until all tests pass (max 5 attempts) + - format: Run ruff formatting and linting, fix issues until clean (max 5 attempts, runs as subagent) + - reconcile_and_push: Fetch remote, rebase if needed, commit with simple summary message, and push + +3. **Run the workflow** starting from the identified step: + - Invoke the starting step using the Skill tool + - When that step completes, **automatically continue** to the next step in the workflow + - Continue until the workflow is complete or the user intervenes + +4. **If intent is ambiguous**, ask the user which step to start from: + - Present the available steps as numbered options + - Use AskUserQuestion to let them choose + +**Critical**: +- You MUST invoke each step using the Skill tool. Do not copy/paste step instructions. +- After each step completes, check if there's a next step and invoke it automatically. +- The workflow continues until all dependent steps are complete. + +## Context Files + +- Job definition: `.deepwork/jobs/commit/job.yml` \ No newline at end of file diff --git a/.claude/commands/deepwork_jobs.learn.md b/.claude/commands/deepwork_jobs.learn.md index c165d18..d891065 100644 --- a/.claude/commands/deepwork_jobs.learn.md +++ b/.claude/commands/deepwork_jobs.learn.md @@ -9,10 +9,8 @@ hooks: ## Quality Criteria - **AGENT: TAKE ACTION** - Verify the learning process meets ALL quality criteria before completing: - - 1. **Conversation Analyzed**: Did you review the conversation for DeepWork job executions? - 2. **Confusion Identified**: Did you identify points of confusion, errors, or inefficiencies? + 1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions? + 2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies? 3. **Instructions Improved**: Were job instructions updated to address identified issues? 4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity? 5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files? @@ -22,10 +20,6 @@ hooks: 9. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md? 10. **Sync Complete**: Has `deepwork sync` been run if instructions were modified? - If ANY criterion is not met, continue working to address it. - If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. - - ## Instructions Review the conversation and determine if ALL quality criteria above have been satisfied. @@ -34,8 +28,8 @@ hooks: If the agent has included `✓ Quality Criteria Met` in their response AND all criteria appear to be met, respond with: {"ok": true} - If criteria are NOT met AND the promise tag is missing, respond with: - {"ok": false, "reason": "Continue working. [specific feedback on what's wrong]"} + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # deepwork_jobs.learn @@ -386,10 +380,9 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -**AGENT: TAKE ACTION** - Verify the learning process meets ALL quality criteria before completing: -1. **Conversation Analyzed**: Did you review the conversation for DeepWork job executions? -2. **Confusion Identified**: Did you identify points of confusion, errors, or inefficiencies? +1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions? +2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies? 3. **Instructions Improved**: Were job instructions updated to address identified issues? 4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity? 5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files? @@ -399,9 +392,6 @@ This step uses an iterative quality validation loop. After completing your work, 9. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md? 10. **Sync Complete**: Has `deepwork sync` been run if instructions were modified? -If ANY criterion is not met, continue working to address it. -If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. - ### Completion Promise diff --git a/.claude/commands/deepwork_jobs.md b/.claude/commands/deepwork_jobs.md index ff4feab..2d9b911 100644 --- a/.claude/commands/deepwork_jobs.md +++ b/.claude/commands/deepwork_jobs.md @@ -35,25 +35,28 @@ This job has 3 step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and run through the steps in sequence. 1. **Analyze user intent** from the text that follows `/deepwork_jobs` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: - define: Create the job.yml specification file by understanding workflow requirements - implement: Generate instruction files for each step based on the job.yml specification - learn: Reflect on conversation to improve job instructions and capture learnings -3. **Invoke the matched step** using the Skill tool: - ``` - Skill: - ``` +3. **Run the workflow** starting from the identified step: + - Invoke the starting step using the Skill tool + - When that step completes, **automatically continue** to the next step in the workflow + - Continue until the workflow is complete or the user intervenes -4. **If intent is ambiguous**, ask the user which step they want: +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Use AskUserQuestion to let them choose -**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. +**Critical**: +- You MUST invoke each step using the Skill tool. Do not copy/paste step instructions. +- After each step completes, check if there's a next step and invoke it automatically. +- The workflow continues until all dependent steps are complete. ## Context Files diff --git a/.claude/commands/deepwork_policy.md b/.claude/commands/deepwork_policy.md index d705cc7..9734231 100644 --- a/.claude/commands/deepwork_policy.md +++ b/.claude/commands/deepwork_policy.md @@ -33,23 +33,26 @@ This job has 1 step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and run through the steps in sequence. 1. **Analyze user intent** from the text that follows `/deepwork_policy` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: - define: Create or update policy entries in .deepwork.policy.yml -3. **Invoke the matched step** using the Skill tool: - ``` - Skill: - ``` +3. **Run the workflow** starting from the identified step: + - Invoke the starting step using the Skill tool + - When that step completes, **automatically continue** to the next step in the workflow + - Continue until the workflow is complete or the user intervenes -4. **If intent is ambiguous**, ask the user which step they want: +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Use AskUserQuestion to let them choose -**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. +**Critical**: +- You MUST invoke each step using the Skill tool. Do not copy/paste step instructions. +- After each step completes, check if there's a next step and invoke it automatically. +- The workflow continues until all dependent steps are complete. ## Context Files diff --git a/.claude/commands/update.md b/.claude/commands/update.md index 25a61eb..47e84ce 100644 --- a/.claude/commands/update.md +++ b/.claude/commands/update.md @@ -31,23 +31,26 @@ This job has 1 step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and run through the steps in sequence. 1. **Analyze user intent** from the text that follows `/update` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: - job: Edit standard job source files and sync to installed locations -3. **Invoke the matched step** using the Skill tool: - ``` - Skill: - ``` +3. **Run the workflow** starting from the identified step: + - Invoke the starting step using the Skill tool + - When that step completes, **automatically continue** to the next step in the workflow + - Continue until the workflow is complete or the user intervenes -4. **If intent is ambiguous**, ask the user which step they want: +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Use AskUserQuestion to let them choose -**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. +**Critical**: +- You MUST invoke each step using the Skill tool. Do not copy/paste step instructions. +- After each step completes, check if there's a next step and invoke it automatically. +- The workflow continues until all dependent steps are complete. ## Context Files diff --git a/.claude/commands/uw.add_platform.add_capabilities.md b/.claude/commands/uw.add_platform.add_capabilities.md index 5b1b022..76b4f71 100644 --- a/.claude/commands/uw.add_platform.add_capabilities.md +++ b/.claude/commands/uw.add_platform.add_capabilities.md @@ -9,17 +9,12 @@ hooks: ## Quality Criteria - **AGENT: TAKE ACTION** - Verify the capability additions meet ALL criteria: - 1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py - 2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields - (set to None/null if the platform doesn't support that hook) - 3. Only hooks available on slash command definitions are added (not general CLI hooks) - 4. job_schema.py remains valid Python with no syntax errors - 5. adapters.py remains consistent - all adapters have the same hook fields - 6. If no new hooks are needed, document why in a comment - - If ALL criteria are met, include `✓ Quality Criteria Met`. - + 1. Are any new hooks from the platform (for slash commands only) added to src/deepwork/schemas/job_schema.py? + 2. Are all existing adapters in src/deepwork/adapters.py updated with the new hook fields? (set to None/null if the platform doesn't support that hook) + 3. Are only hooks available on slash command definitions added (not general CLI hooks)? + 4. Does job_schema.py remain valid Python with no syntax errors? + 5. Is adapters.py consistent - all adapters have the same hook fields? + 6. If no new hooks are needed, is there documentation explaining why? ## Instructions @@ -29,8 +24,8 @@ hooks: If the agent has included `✓ Quality Criteria Met` in their response AND all criteria appear to be met, respond with: {"ok": true} - If criteria are NOT met AND the promise tag is missing, respond with: - {"ok": false, "reason": "Continue working. [specific feedback on what's wrong]"} + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # add_platform.add_capabilities @@ -242,16 +237,13 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -**AGENT: TAKE ACTION** - Verify the capability additions meet ALL criteria: -1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py -2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields - (set to None/null if the platform doesn't support that hook) -3. Only hooks available on slash command definitions are added (not general CLI hooks) -4. job_schema.py remains valid Python with no syntax errors -5. adapters.py remains consistent - all adapters have the same hook fields -6. If no new hooks are needed, document why in a comment - -If ALL criteria are met, include `✓ Quality Criteria Met`. + +1. Are any new hooks from the platform (for slash commands only) added to src/deepwork/schemas/job_schema.py? +2. Are all existing adapters in src/deepwork/adapters.py updated with the new hook fields? (set to None/null if the platform doesn't support that hook) +3. Are only hooks available on slash command definitions added (not general CLI hooks)? +4. Does job_schema.py remain valid Python with no syntax errors? +5. Is adapters.py consistent - all adapters have the same hook fields? +6. If no new hooks are needed, is there documentation explaining why? ### Completion Promise diff --git a/.claude/commands/uw.add_platform.implement.md b/.claude/commands/uw.add_platform.implement.md index 381ae89..9696ef6 100644 --- a/.claude/commands/uw.add_platform.implement.md +++ b/.claude/commands/uw.add_platform.implement.md @@ -3,27 +3,18 @@ description: Add platform adapter, templates, tests with 100% coverage, and READ hooks: Stop: - hooks: - - type: command - command: ".deepwork/jobs/add_platform/hooks/run_tests.sh" - type: prompt prompt: | You must evaluate whether Claude has met all the below quality criteria for the request. ## Quality Criteria - **AGENT: TAKE ACTION** - Verify the implementation meets ALL criteria: - 1. Platform adapter class is added to src/deepwork/adapters.py - 2. Templates exist in src/deepwork/templates// with appropriate command structure - 3. Tests exist for all new functionality - 4. Test coverage is 100% for new code (run: uv run pytest --cov) - 5. All tests pass - 6. README.md is updated with: - - New platform listed in supported platforms - - Installation instructions for the platform - - Any platform-specific notes - - If ALL criteria are met, include `✓ Quality Criteria Met`. - + 1. Is the platform adapter class added to src/deepwork/adapters.py? + 2. Do templates exist in src/deepwork/templates// with appropriate command structure? + 3. Do tests exist for all new functionality? + 4. Is test coverage 100% for new code (run: uv run pytest --cov)? + 5. Do all tests pass? + 6. Is README.md updated with: New platform listed in supported platforms, Installation instructions for the platform, Any platform-specific notes ## Instructions @@ -33,8 +24,8 @@ hooks: If the agent has included `✓ Quality Criteria Met` in their response AND all criteria appear to be met, respond with: {"ok": true} - If criteria are NOT met AND the promise tag is missing, respond with: - {"ok": false, "reason": "Continue working. [specific feedback on what's wrong]"} + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # add_platform.implement @@ -339,23 +330,18 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. +### Quality Criteria + +1. Is the platform adapter class added to src/deepwork/adapters.py? +2. Do templates exist in src/deepwork/templates// with appropriate command structure? +3. Do tests exist for all new functionality? +4. Is test coverage 100% for new code (run: uv run pytest --cov)? +5. Do all tests pass? +6. Is README.md updated with: New platform listed in supported platforms, Installation instructions for the platform, Any platform-specific notes + **Validation Script**: `.deepwork/jobs/add_platform/hooks/run_tests.sh` The validation script will be executed automatically when you attempt to complete this step. -### Quality Criteria (2) -**AGENT: TAKE ACTION** - Verify the implementation meets ALL criteria: -1. Platform adapter class is added to src/deepwork/adapters.py -2. Templates exist in src/deepwork/templates// with appropriate command structure -3. Tests exist for all new functionality -4. Test coverage is 100% for new code (run: uv run pytest --cov) -5. All tests pass -6. README.md is updated with: - - New platform listed in supported platforms - - Installation instructions for the platform - - Any platform-specific notes - -If ALL criteria are met, include `✓ Quality Criteria Met`. - ### Completion Promise diff --git a/.claude/commands/uw.add_platform.research.md b/.claude/commands/uw.add_platform.research.md index 3c82569..b9ef31e 100644 --- a/.claude/commands/uw.add_platform.research.md +++ b/.claude/commands/uw.add_platform.research.md @@ -9,18 +9,12 @@ hooks: ## Quality Criteria - **AGENT: TAKE ACTION** - Verify the research output meets ALL criteria: - 1. Both files exist in doc/platforms//: cli_configuration.md and hooks_system.md - 2. Each file has a comment at the top with: - - Last updated date - - Source URL where the documentation was obtained - 3. cli_configuration.md covers how the platform's CLI is configured - 4. hooks_system.md covers hooks available for slash command definitions ONLY - 5. No extraneous documentation (only these two specific topics) - 6. Documentation is comprehensive enough to implement the platform - - If ALL criteria are met, include `✓ Quality Criteria Met`. - + 1. Do both files exist in doc/platforms//: cli_configuration.md and hooks_system.md? + 2. Does each file have a comment at the top with last updated date and source URL? + 3. Does cli_configuration.md cover how the platform's CLI is configured? + 4. Does hooks_system.md cover hooks available for slash command definitions ONLY? + 5. Is there no extraneous documentation (only these two specific topics)? + 6. Is the documentation comprehensive enough to implement the platform? ## Instructions @@ -30,8 +24,8 @@ hooks: If the agent has included `✓ Quality Criteria Met` in their response AND all criteria appear to be met, respond with: {"ok": true} - If criteria are NOT met AND the promise tag is missing, respond with: - {"ok": false, "reason": "Continue working. [specific feedback on what's wrong]"} + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # add_platform.research @@ -288,17 +282,13 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -**AGENT: TAKE ACTION** - Verify the research output meets ALL criteria: -1. Both files exist in doc/platforms//: cli_configuration.md and hooks_system.md -2. Each file has a comment at the top with: - - Last updated date - - Source URL where the documentation was obtained -3. cli_configuration.md covers how the platform's CLI is configured -4. hooks_system.md covers hooks available for slash command definitions ONLY -5. No extraneous documentation (only these two specific topics) -6. Documentation is comprehensive enough to implement the platform - -If ALL criteria are met, include `✓ Quality Criteria Met`. + +1. Do both files exist in doc/platforms//: cli_configuration.md and hooks_system.md? +2. Does each file have a comment at the top with last updated date and source URL? +3. Does cli_configuration.md cover how the platform's CLI is configured? +4. Does hooks_system.md cover hooks available for slash command definitions ONLY? +5. Is there no extraneous documentation (only these two specific topics)? +6. Is the documentation comprehensive enough to implement the platform? ### Completion Promise diff --git a/.claude/commands/uw.add_platform.verify.md b/.claude/commands/uw.add_platform.verify.md index 44288d2..ac8ab1b 100644 --- a/.claude/commands/uw.add_platform.verify.md +++ b/.claude/commands/uw.add_platform.verify.md @@ -9,16 +9,12 @@ hooks: ## Quality Criteria - **AGENT: TAKE ACTION** - Verify the installation meets ALL criteria: - 1. Platform-specific directories/files are added to the deepwork repo as needed - 2. Running `deepwork install --platform ` completes without errors - 3. Expected command files are created in the platform's command directory - 4. Command file content matches the templates and job definitions - 5. Established DeepWork jobs (deepwork_jobs, deepwork_rules) are installed correctly - 6. The platform can be used alongside existing platforms without conflicts - - If ALL criteria are met, include `✓ Quality Criteria Met`. - + 1. Are platform-specific directories/files added to the deepwork repo as needed? + 2. Does running `deepwork install --platform ` complete without errors? + 3. Are expected command files created in the platform's command directory? + 4. Does command file content match the templates and job definitions? + 5. Are established DeepWork jobs (deepwork_jobs, deepwork_policy) installed correctly? + 6. Can the platform be used alongside existing platforms without conflicts? ## Instructions @@ -28,8 +24,8 @@ hooks: If the agent has included `✓ Quality Criteria Met` in their response AND all criteria appear to be met, respond with: {"ok": true} - If criteria are NOT met AND the promise tag is missing, respond with: - {"ok": false, "reason": "Continue working. [specific feedback on what's wrong]"} + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # add_platform.verify @@ -121,7 +117,7 @@ Ensure the implementation step is complete: - `deepwork_jobs.define.md` exists (or equivalent for the platform) - `deepwork_jobs.implement.md` exists - `deepwork_jobs.refine.md` exists - - `deepwork_rules.define.md` exists + - `deepwork_policy.define.md` exists - All expected step commands exist 4. **Validate command file content** @@ -151,7 +147,7 @@ Ensure the implementation step is complete: - `deepwork install --platform ` completes without errors - All expected command files are created: - deepwork_jobs.define, implement, refine - - deepwork_rules.define + - deepwork_policy.define - Any other standard job commands - Command file content is correct: - Matches platform's expected format @@ -213,15 +209,13 @@ No specific files are output by this command. This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -**AGENT: TAKE ACTION** - Verify the installation meets ALL criteria: -1. Platform-specific directories/files are added to the deepwork repo as needed -2. Running `deepwork install --platform ` completes without errors -3. Expected command files are created in the platform's command directory -4. Command file content matches the templates and job definitions -5. Established DeepWork jobs (deepwork_jobs, deepwork_rules) are installed correctly -6. The platform can be used alongside existing platforms without conflicts - -If ALL criteria are met, include `✓ Quality Criteria Met`. + +1. Are platform-specific directories/files added to the deepwork repo as needed? +2. Does running `deepwork install --platform ` complete without errors? +3. Are expected command files created in the platform's command directory? +4. Does command file content match the templates and job definitions? +5. Are established DeepWork jobs (deepwork_jobs, deepwork_policy) installed correctly? +6. Can the platform be used alongside existing platforms without conflicts? ### Completion Promise diff --git a/.claude/commands/uw.commit.format.md b/.claude/commands/uw.commit.format.md new file mode 100644 index 0000000..3b2032c --- /dev/null +++ b/.claude/commands/uw.commit.format.md @@ -0,0 +1,198 @@ +--- +description: Run ruff formatting and linting, fix issues until clean (max 5 attempts, runs as subagent) +hooks: + Stop: + - hooks: + - type: command + command: ".deepwork/jobs/commit/hooks/run_ruff.sh" + - type: prompt + prompt: | + Evaluate the ruff format and lint check output above. + + **If ruff reported issues (exit code non-zero)**: Start your response with "**AGENT: TAKE ACTION** -" followed by what needs to be fixed. + + **If ruff reported no issues (exit code 0)**: Confirm the agent included `✓ Quality Criteria Met`. Allow completion. + +--- + +# commit.format + +**Step 2 of 3** in the **commit** workflow + +**Summary**: Validate, format, and push changes with tests passing + +## Job Overview + +A pre-commit workflow that ensures code quality before pushing changes. + +This job runs through three validation and preparation steps: +1. Runs the test suite and fixes any failures until all tests pass (max 5 attempts) +2. Runs ruff formatting and linting, fixing issues until clean (max 5 attempts) +3. Fetches from remote, rebases if needed, generates a simple commit message, + commits changes, and pushes to the remote branch + +Each step uses a quality validation loop to ensure it completes successfully +before moving to the next step. The format step runs as a subagent to +minimize token usage. + +Key behaviors: +- Rebase strategy when remote has changes (keeps linear history) +- Simple summary commit messages (no conventional commits format) +- Maximum 5 fix attempts before stopping + +Designed for developers who want a reliable pre-push workflow that catches +issues early and ensures consistent code quality. + + +## Prerequisites + +This step requires completion of the following step(s): +- `/commit.test` + +Please ensure these steps have been completed before proceeding. + +## Instructions + +# Format Code + +## Objective + +Run ruff formatting and linting checks, fixing any issues until the code is clean (maximum 5 attempts). + +## Task + +Execute ruff to check code formatting and linting. If any issues are found, fix them. Continue this cycle until ruff reports no issues or you've made 5 fix attempts. + +**Note**: This step is designed to run as a subagent to minimize token usage. Focus on efficient, targeted fixes. + +### Process + +1. **Check format and lint status** + The hook automatically runs: + ```bash + uv run ruff format --check src/ tests/ + uv run ruff check src/ tests/ + ``` + +2. **Analyze ruff output** + - If both commands pass (exit code 0), you're done with this step + - If issues are reported, examine them carefully + +3. **Fix issues** (if needed) + + **For formatting issues**: + ```bash + uv run ruff format src/ tests/ + ``` + This auto-fixes formatting issues. + + **For linting issues**: + - Some can be auto-fixed: `uv run ruff check --fix src/ tests/` + - Others require manual fixes based on the error messages + - Common issues: unused imports, undefined names, line length + +4. **Repeat if necessary** + - Re-run checks after fixes + - Continue until all issues are resolved + - Track your attempts - stop after 5 fix attempts if issues remain + - If you cannot fix after 5 attempts, report remaining issues to the user + +### Common Ruff Issues and Fixes + +| Issue | Fix | +|-------|-----| +| F401 unused import | Remove the import | +| F841 unused variable | Remove or use the variable | +| E501 line too long | Break into multiple lines | +| I001 import sorting | Run `ruff check --fix` or reorder manually | +| E711 comparison to None | Use `is None` instead of `== None` | + +### Important Notes + +- **Run ruff format first** - It auto-fixes most formatting issues +- **Use --fix for lint issues** - Many lint issues can be auto-fixed +- **Minimal manual fixes** - Only manually fix what auto-fix can't handle +- **Track attempts** - Keep count of fix attempts to respect the 5-attempt limit + +## Output Format + +No file output is required. Success is determined by ruff passing all checks. + +**On success**: Report that ruff checks pass and proceed to the next step. + +**On failure after 5 attempts**: Report which issues remain and why you couldn't fix them. + +## Quality Criteria + +- `uv run ruff format --check src/ tests/` passes (exit code 0) +- `uv run ruff check src/ tests/` passes (exit code 0) +- Any fixes made don't break functionality (tests should still pass) +- If issues couldn't be fixed in 5 attempts, clear explanation provided +- When all checks pass, include `✓ Quality Criteria Met` in your response + +## Context + +This is the second step in the commit workflow, after tests pass. Code must be properly formatted and lint-free before committing. The format step uses a script hook that automatically runs ruff checks, so focus on analyzing results and making fixes efficiently. + + + +## Work Branch Management + +All work for this job should be done on a dedicated work branch: + +1. **Check current branch**: + - If already on a work branch for this job (format: `deepwork/commit-[instance]-[date]`), continue using it + - If on main/master, create a new work branch + +2. **Create work branch** (if needed): + ```bash + git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d) + ``` + Replace `[instance]` with a descriptive identifier (e.g., `acme`, `q1-launch`, etc.) + +## Output Requirements + +No specific files are output by this command. + +## Quality Validation Loop + +This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. + + +**Validation Script**: `.deepwork/jobs/commit/hooks/run_ruff.sh` + +The validation script will be executed automatically when you attempt to complete this step. + +### Completion Promise + +To signal that all quality criteria have been met, include this tag in your final response: + +``` +✓ Quality Criteria Met +``` + +**Important**: Only include this promise tag when you have verified that ALL quality criteria above are satisfied. The validation loop will continue until this promise is detected. + +## Completion + +After completing this step: + +1. **Verify outputs**: Confirm all required files have been created + +2. **Inform the user**: + - Step 2 of 3 is complete + - Ready to proceed to next step: `/commit.reconcile_and_push` + +## Next Step + +To continue the workflow, run: +``` +/commit.reconcile_and_push +``` + +--- + +## Context Files + +- Job definition: `.deepwork/jobs/commit/job.yml` +- Step instructions: `.deepwork/jobs/commit/steps/format.md` \ No newline at end of file diff --git a/.claude/commands/uw.commit.reconcile_and_push.md b/.claude/commands/uw.commit.reconcile_and_push.md new file mode 100644 index 0000000..ac5c662 --- /dev/null +++ b/.claude/commands/uw.commit.reconcile_and_push.md @@ -0,0 +1,244 @@ +--- +description: Fetch remote, rebase if needed, commit with simple summary message, and push +hooks: + Stop: + - hooks: + - type: prompt + prompt: | + You must evaluate whether Claude has met all the below quality criteria for the request. + + ## Quality Criteria + + 1. Did the agent fetch from the remote to check for updates? + 2. If there were remote changes, did the agent rebase local changes on top? + 3. Did the agent generate a simple summary commit message based on the changes? + 4. Did the agent commit the changes? + 5. Did the agent push to the remote branch? + + ## Instructions + + Review the conversation and determine if ALL quality criteria above have been satisfied. + Look for evidence that each criterion has been addressed. + + If the agent has included `✓ Quality Criteria Met` in their response AND + all criteria appear to be met, respond with: {"ok": true} + + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} +--- + +# commit.reconcile_and_push + +**Step 3 of 3** in the **commit** workflow + +**Summary**: Validate, format, and push changes with tests passing + +## Job Overview + +A pre-commit workflow that ensures code quality before pushing changes. + +This job runs through three validation and preparation steps: +1. Runs the test suite and fixes any failures until all tests pass (max 5 attempts) +2. Runs ruff formatting and linting, fixing issues until clean (max 5 attempts) +3. Fetches from remote, rebases if needed, generates a simple commit message, + commits changes, and pushes to the remote branch + +Each step uses a quality validation loop to ensure it completes successfully +before moving to the next step. The format step runs as a subagent to +minimize token usage. + +Key behaviors: +- Rebase strategy when remote has changes (keeps linear history) +- Simple summary commit messages (no conventional commits format) +- Maximum 5 fix attempts before stopping + +Designed for developers who want a reliable pre-push workflow that catches +issues early and ensures consistent code quality. + + +## Prerequisites + +This step requires completion of the following step(s): +- `/commit.format` + +Please ensure these steps have been completed before proceeding. + +## Instructions + +# Reconcile and Push + +## Objective + +Fetch the latest changes from the remote, rebase if necessary, generate a commit message, commit the changes, and push to the remote branch. + +## Task + +Ensure the local branch is up-to-date with the remote, commit all staged changes with a clear summary message, and push to the remote repository. + +### Process + +1. **Fetch from remote** + ```bash + git fetch origin + ``` + +2. **Check for remote changes** + ```bash + git status + ``` + Look for "Your branch is behind" or "Your branch and 'origin/...' have diverged" + +3. **Rebase if needed** + If the remote has changes that aren't in your local branch: + ```bash + git rebase origin/ + ``` + + **If rebase conflicts occur**: + - Resolve conflicts in the affected files + - Stage resolved files: `git add ` + - Continue rebase: `git rebase --continue` + - If conflicts are too complex, abort and report to user: `git rebase --abort` + +4. **Review changes to commit** + ```bash + git status + git diff --staged + git diff + ``` + + Stage any unstaged changes that should be committed: + ```bash + git add -A + ``` + +5. **Generate commit message** + Analyze the changes and create a **simple summary** commit message: + - Look at the diff to understand what changed + - Write a clear, concise description (1-2 sentences) + - Focus on the "what" and "why", not the "how" + - No conventional commits format needed - just a clear summary + + **Good commit messages**: + - "Add user authentication with session management" + - "Fix race condition in data processing pipeline" + - "Update dependencies and fix compatibility issues" + - "Refactor database queries for better performance" + +6. **Commit the changes** + ```bash + git commit -m "" + ``` + +7. **Push to remote** + ```bash + git push origin + ``` + + If push is rejected (remote has new changes), fetch and rebase again, then retry push. + +### Handling Edge Cases + +**No changes to commit**: +- If `git status` shows nothing to commit, inform the user and skip the commit/push + +**Protected branch**: +- If push fails due to branch protection, inform the user they may need to create a PR + +**Rebase conflicts**: +- Attempt to resolve simple conflicts +- For complex conflicts, abort the rebase and ask the user for guidance + +**Diverged branches**: +- Always use rebase (not merge) to maintain linear history +- If rebase fails repeatedly, report the issue to the user + +## Output Format + +No file output is required. Success is determined by successfully pushing to the remote. + +**On success**: Report the commit hash and confirm the push succeeded. + +**On failure**: Report what went wrong (conflicts, push rejection, etc.) and suggest next steps. + +## Quality Criteria + +- Fetched latest changes from remote +- Rebased on top of remote changes if any existed +- Generated a clear, simple summary commit message +- Successfully committed all changes +- Successfully pushed to the remote branch +- When all steps complete successfully, include `✓ Quality Criteria Met` in your response + +## Context + +This is the final step in the commit workflow. By this point, tests pass and code is formatted. This step ensures your changes are properly committed with a good message and pushed to share with the team. The rebase strategy keeps the git history linear and clean. + + + +## Work Branch Management + +All work for this job should be done on a dedicated work branch: + +1. **Check current branch**: + - If already on a work branch for this job (format: `deepwork/commit-[instance]-[date]`), continue using it + - If on main/master, create a new work branch + +2. **Create work branch** (if needed): + ```bash + git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d) + ``` + Replace `[instance]` with a descriptive identifier (e.g., `acme`, `q1-launch`, etc.) + +## Output Requirements + +No specific files are output by this command. + +## Quality Validation Loop + +This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. + +### Quality Criteria + +1. Did the agent fetch from the remote to check for updates? +2. If there were remote changes, did the agent rebase local changes on top? +3. Did the agent generate a simple summary commit message based on the changes? +4. Did the agent commit the changes? +5. Did the agent push to the remote branch? + + +### Completion Promise + +To signal that all quality criteria have been met, include this tag in your final response: + +``` +✓ Quality Criteria Met +``` + +**Important**: Only include this promise tag when you have verified that ALL quality criteria above are satisfied. The validation loop will continue until this promise is detected. + +## Completion + +After completing this step: + +1. **Verify outputs**: Confirm all required files have been created + +2. **Inform the user**: + - Step 3 of 3 is complete + - This is the final step - the job is complete! + +## Workflow Complete + +This is the final step in the commit workflow. All outputs should now be complete and ready for review. + +Consider: +- Reviewing all work products +- Creating a pull request to merge the work branch +- Documenting any insights or learnings + +--- + +## Context Files + +- Job definition: `.deepwork/jobs/commit/job.yml` +- Step instructions: `.deepwork/jobs/commit/steps/reconcile_and_push.md` \ No newline at end of file diff --git a/.claude/commands/uw.commit.test.md b/.claude/commands/uw.commit.test.md new file mode 100644 index 0000000..31fd454 --- /dev/null +++ b/.claude/commands/uw.commit.test.md @@ -0,0 +1,170 @@ +--- +description: Run pytest and fix any failures until all tests pass (max 5 attempts) +hooks: + Stop: + - hooks: + - type: command + command: ".deepwork/jobs/commit/hooks/run_tests.sh" + - type: prompt + prompt: | + Evaluate the pytest output above. + + **If any tests failed**: Start your response with "**AGENT: TAKE ACTION** -" followed by which tests failed and why. + + **If ALL tests passed**: Confirm the agent included `✓ Quality Criteria Met`. Allow completion. + +--- + +# commit.test + +**Step 1 of 3** in the **commit** workflow + +**Summary**: Validate, format, and push changes with tests passing + +## Job Overview + +A pre-commit workflow that ensures code quality before pushing changes. + +This job runs through three validation and preparation steps: +1. Runs the test suite and fixes any failures until all tests pass (max 5 attempts) +2. Runs ruff formatting and linting, fixing issues until clean (max 5 attempts) +3. Fetches from remote, rebases if needed, generates a simple commit message, + commits changes, and pushes to the remote branch + +Each step uses a quality validation loop to ensure it completes successfully +before moving to the next step. The format step runs as a subagent to +minimize token usage. + +Key behaviors: +- Rebase strategy when remote has changes (keeps linear history) +- Simple summary commit messages (no conventional commits format) +- Maximum 5 fix attempts before stopping + +Designed for developers who want a reliable pre-push workflow that catches +issues early and ensures consistent code quality. + + + +## Instructions + +# Run Tests + +## Objective + +Run the project test suite and fix any failing tests until all tests pass (maximum 5 attempts). + +## Task + +Execute pytest to run all tests. If any tests fail, analyze the failures and fix them. Continue this cycle until all tests pass or you've made 5 fix attempts. + +### Process + +1. **Run the test suite** + ```bash + uv run pytest tests/ -v + ``` + +2. **Analyze test results** + - If all tests pass, you're done with this step + - If tests fail, examine the failure output carefully + +3. **Fix failing tests** (if needed) + - Read the failing test to understand what it's testing + - Read the relevant source code + - Determine if the issue is in the test or the implementation + - Make the minimal fix needed to pass the test + - Re-run tests to verify the fix + +4. **Repeat if necessary** + - Continue the fix cycle until all tests pass + - Track your attempts - stop after 5 fix attempts if tests still fail + - If you cannot fix after 5 attempts, report the remaining failures to the user + +### Important Notes + +- **Don't skip tests** - All tests must pass before proceeding +- **Minimal fixes** - Make the smallest change needed to fix each failure +- **Understand before fixing** - Read and understand failing tests before attempting fixes +- **Track attempts** - Keep count of fix attempts to respect the 5-attempt limit + +## Output Format + +No file output is required. Success is determined by all tests passing. + +**On success**: Report that all tests pass and proceed to the next step. + +**On failure after 5 attempts**: Report which tests are still failing and why you couldn't fix them. + +## Quality Criteria + +- All tests pass (`uv run pytest tests/ -v` exits with code 0) +- Any fixes made are minimal and don't break other functionality +- If tests couldn't be fixed in 5 attempts, clear explanation provided +- When all tests pass, include `✓ Quality Criteria Met` in your response + +## Context + +This is the first step in the commit workflow. Tests must pass before code formatting is checked, ensuring that any changes being committed are functionally correct. The test step uses a script hook that automatically runs pytest, so focus on analyzing results and making fixes. + + + +## Work Branch Management + +All work for this job should be done on a dedicated work branch: + +1. **Check current branch**: + - If already on a work branch for this job (format: `deepwork/commit-[instance]-[date]`), continue using it + - If on main/master, create a new work branch + +2. **Create work branch** (if needed): + ```bash + git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d) + ``` + Replace `[instance]` with a descriptive identifier (e.g., `acme`, `q1-launch`, etc.) + +## Output Requirements + +No specific files are output by this command. + +## Quality Validation Loop + +This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. + + +**Validation Script**: `.deepwork/jobs/commit/hooks/run_tests.sh` + +The validation script will be executed automatically when you attempt to complete this step. + +### Completion Promise + +To signal that all quality criteria have been met, include this tag in your final response: + +``` +✓ Quality Criteria Met +``` + +**Important**: Only include this promise tag when you have verified that ALL quality criteria above are satisfied. The validation loop will continue until this promise is detected. + +## Completion + +After completing this step: + +1. **Verify outputs**: Confirm all required files have been created + +2. **Inform the user**: + - Step 1 of 3 is complete + - Ready to proceed to next step: `/commit.format` + +## Next Step + +To continue the workflow, run: +``` +/commit.format +``` + +--- + +## Context Files + +- Job definition: `.deepwork/jobs/commit/job.yml` +- Step instructions: `.deepwork/jobs/commit/steps/test.md` \ No newline at end of file diff --git a/.claude/commands/uw.deepwork_jobs.define.md b/.claude/commands/uw.deepwork_jobs.define.md index a4319d7..6902fe1 100644 --- a/.claude/commands/uw.deepwork_jobs.define.md +++ b/.claude/commands/uw.deepwork_jobs.define.md @@ -9,10 +9,8 @@ hooks: ## Quality Criteria - **AGENT: TAKE ACTION** - Verify the job.yml output meets ALL quality criteria before completing: - - 1. **User Understanding**: Did you fully understand the user's workflow by asking structured questions? - 2. **Structured Questions Used**: Did you ask structured questions (using the AskUserQuestion tool) to gather user input? + 1. **User Understanding**: Did the agent fully understand the user's workflow by asking structured questions? + 2. **Structured Questions Used**: Did the agent ask structured questions (using the AskUserQuestion tool) to gather user input? 3. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs? 4. **Logical Dependencies**: Do step dependencies make sense and avoid circular references? 5. **Concise Summary**: Is the summary under 200 characters and descriptive? @@ -20,10 +18,6 @@ hooks: 7. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)? 8. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`? - If ANY criterion is not met, continue working to address it. - If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. - - ## Instructions Review the conversation and determine if ALL quality criteria above have been satisfied. @@ -32,8 +26,8 @@ hooks: If the agent has included `✓ Quality Criteria Met` in their response AND all criteria appear to be met, respond with: {"ok": true} - If criteria are NOT met AND the promise tag is missing, respond with: - {"ok": false, "reason": "Continue working. [specific feedback on what's wrong]"} + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # deepwork_jobs.define @@ -427,10 +421,9 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -**AGENT: TAKE ACTION** - Verify the job.yml output meets ALL quality criteria before completing: -1. **User Understanding**: Did you fully understand the user's workflow by asking structured questions? -2. **Structured Questions Used**: Did you ask structured questions (using the AskUserQuestion tool) to gather user input? +1. **User Understanding**: Did the agent fully understand the user's workflow by asking structured questions? +2. **Structured Questions Used**: Did the agent ask structured questions (using the AskUserQuestion tool) to gather user input? 3. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs? 4. **Logical Dependencies**: Do step dependencies make sense and avoid circular references? 5. **Concise Summary**: Is the summary under 200 characters and descriptive? @@ -438,9 +431,6 @@ This step uses an iterative quality validation loop. After completing your work, 7. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)? 8. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`? -If ANY criterion is not met, continue working to address it. -If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. - ### Completion Promise diff --git a/.claude/commands/uw.deepwork_jobs.implement.md b/.claude/commands/uw.deepwork_jobs.implement.md index 6689d99..e766e4c 100644 --- a/.claude/commands/uw.deepwork_jobs.implement.md +++ b/.claude/commands/uw.deepwork_jobs.implement.md @@ -9,8 +9,6 @@ hooks: ## Quality Criteria - **AGENT: TAKE ACTION** - Verify the implementation meets ALL quality criteria before completing: - 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? 2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? 3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic? @@ -19,13 +17,7 @@ hooks: 6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? 7. **Sync Complete**: Has `deepwork sync` been run successfully? 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? - 9. **Rules Considered**: Have you thought about whether rules would benefit this job? - - If relevant rules were identified, did you explain them and offer to run `/deepwork_rules.define`? - - Not every job needs rules - only suggest when genuinely helpful. - - If ANY criterion is not met, continue working to address it. - If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. - + 9. **Policies Considered**: Has the agent thought about whether policies would benefit this job? If relevant policies were identified, did they explain them and offer to run `/deepwork_policy.define`? Not every job needs policies - only suggest when genuinely helpful. ## Instructions @@ -35,8 +27,8 @@ hooks: If the agent has included `✓ Quality Criteria Met` in their response AND all criteria appear to be met, respond with: {"ok": true} - If criteria are NOT met AND the promise tag is missing, respond with: - {"ok": false, "reason": "Continue working. [specific feedback on what's wrong]"} + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # deepwork_jobs.implement @@ -200,19 +192,19 @@ This will: After running `deepwork sync`, look at the "To use the new commands" section in the output. **Relay these exact reload instructions to the user** so they know how to pick up the new commands. Don't just reference the sync output - tell them directly what they need to do (e.g., "Type 'exit' then run 'claude --resume'" for Claude Code, or "Run '/memory refresh'" for Gemini CLI). -### Step 7: Consider Rules for the New Job +### Step 7: Consider Policies for the New Job -After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain. +After implementing the job, consider whether there are **policies** that would help enforce quality or consistency when working with this job's domain. -**What are rules?** +**What are policies?** -Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure: +Policies are automated guardrails defined in `.deepwork.policy.yml` that trigger when certain files change during an AI session. They help ensure: - Documentation stays in sync with code - Team guidelines are followed - Architectural decisions are respected - Quality standards are maintained -**When to suggest rules:** +**When to suggest policies:** Think about the job you just implemented and ask: - Does this job produce outputs that other files depend on? @@ -220,28 +212,28 @@ Think about the job you just implemented and ask: - Are there quality checks or reviews that should happen when certain files in this domain change? - Could changes to the job's output files impact other parts of the project? -**Examples of rules that might make sense:** +**Examples of policies that might make sense:** -| Job Type | Potential Rule | -|----------|----------------| +| Job Type | Potential Policy | +|----------|------------------| | API Design | "Update API docs when endpoint definitions change" | | Database Schema | "Review migrations when schema files change" | | Competitive Research | "Update strategy docs when competitor analysis changes" | | Feature Development | "Update changelog when feature files change" | | Configuration Management | "Update install guide when config files change" | -**How to offer rule creation:** +**How to offer policy creation:** -If you identify one or more rules that would benefit the user, explain: -1. **What the rule would do** - What triggers it and what action it prompts +If you identify one or more policies that would benefit the user, explain: +1. **What the policy would do** - What triggers it and what action it prompts 2. **Why it would help** - How it prevents common mistakes or keeps things in sync 3. **What files it would watch** - The trigger patterns Then ask the user: -> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up." +> "Would you like me to create this policy for you? I can run `/deepwork_policy.define` to set it up." -If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule. +If the user agrees, invoke the `/deepwork_policy.define` command to guide them through creating the policy. **Example dialogue:** @@ -250,15 +242,15 @@ Based on the competitive_research job you just created, I noticed that when competitor analysis files change, it would be helpful to remind you to update your strategy documentation. -I'd suggest a rule like: +I'd suggest a policy like: - **Name**: "Update strategy when competitor analysis changes" - **Trigger**: `**/positioning_report.md` - **Action**: Prompt to review and update `docs/strategy.md` -Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up. +Would you like me to create this policy? I can run `/deepwork_policy.define` to set it up. ``` -**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense. +**Note:** Not every job needs policies. Only suggest them when they would genuinely help maintain consistency or quality. Don't force policies where they don't make sense. ## Example Implementation @@ -292,8 +284,8 @@ Before marking this step complete, ensure: - [ ] `deepwork sync` executed successfully - [ ] Commands generated in platform directory - [ ] User informed to follow reload instructions from `deepwork sync` -- [ ] Considered whether rules would benefit this job (Step 7) -- [ ] If rules suggested, offered to run `/deepwork_rules.define` +- [ ] Considered whether policies would benefit this job (Step 7) +- [ ] If policies suggested, offered to run `/deepwork_policy.define` ## Quality Criteria @@ -305,7 +297,7 @@ Before marking this step complete, ensure: - Steps with user inputs explicitly use "ask structured questions" phrasing - Sync completed successfully - Commands available for use -- Thoughtfully considered relevant rules for the job domain +- Thoughtfully considered relevant policies for the job domain ## Inputs @@ -345,7 +337,6 @@ Ensure all outputs are: This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -**AGENT: TAKE ACTION** - Verify the implementation meets ALL quality criteria before completing: 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? 2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? @@ -355,12 +346,7 @@ This step uses an iterative quality validation loop. After completing your work, 6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? 7. **Sync Complete**: Has `deepwork sync` been run successfully? 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? -9. **Rules Considered**: Have you thought about whether rules would benefit this job? - - If relevant rules were identified, did you explain them and offer to run `/deepwork_rules.define`? - - Not every job needs rules - only suggest when genuinely helpful. - -If ANY criterion is not met, continue working to address it. -If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. +9. **Policies Considered**: Has the agent thought about whether policies would benefit this job? If relevant policies were identified, did they explain them and offer to run `/deepwork_policy.define`? Not every job needs policies - only suggest when genuinely helpful. ### Completion Promise diff --git a/.claude/commands/uw.update.job.md b/.claude/commands/uw.update.job.md index 2c3317e..023b05a 100644 --- a/.claude/commands/uw.update.job.md +++ b/.claude/commands/uw.update.job.md @@ -9,13 +9,10 @@ hooks: ## Quality Criteria - **AGENT: TAKE ACTION** - Verify the update process completed successfully: - 1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/) - 2. `deepwork install --platform claude` was run - 3. Files in .deepwork/jobs/ match the source files - 4. Command files in .claude/commands/ were regenerated - If ALL criteria are met, include `✓ Quality Criteria Met`. - + 1. Were changes made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/)? + 2. Was `deepwork install --platform claude` run? + 3. Do files in .deepwork/jobs/ match the source files? + 4. Were command files in .claude/commands/ regenerated? ## Instructions @@ -25,8 +22,8 @@ hooks: If the agent has included `✓ Quality Criteria Met` in their response AND all criteria appear to be met, respond with: {"ok": true} - If criteria are NOT met AND the promise tag is missing, respond with: - {"ok": false, "reason": "Continue working. [specific feedback on what's wrong]"} + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # update.job @@ -161,12 +158,11 @@ No specific files are output by this command. This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. ### Quality Criteria -**AGENT: TAKE ACTION** - Verify the update process completed successfully: -1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/) -2. `deepwork install --platform claude` was run -3. Files in .deepwork/jobs/ match the source files -4. Command files in .claude/commands/ were regenerated -If ALL criteria are met, include `✓ Quality Criteria Met`. + +1. Were changes made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/)? +2. Was `deepwork install --platform claude` run? +3. Do files in .deepwork/jobs/ match the source files? +4. Were command files in .claude/commands/ regenerated? ### Completion Promise diff --git a/.deepwork/jobs/add_platform/job.yml b/.deepwork/jobs/add_platform/job.yml index df97239..ac51019 100644 --- a/.deepwork/jobs/add_platform/job.yml +++ b/.deepwork/jobs/add_platform/job.yml @@ -36,20 +36,13 @@ steps: - cli_configuration.md - hooks_system.md dependencies: [] - hooks: - after_agent: - - prompt: | - **AGENT: TAKE ACTION** - Verify the research output meets ALL criteria: - 1. Both files exist in doc/platforms//: cli_configuration.md and hooks_system.md - 2. Each file has a comment at the top with: - - Last updated date - - Source URL where the documentation was obtained - 3. cli_configuration.md covers how the platform's CLI is configured - 4. hooks_system.md covers hooks available for slash command definitions ONLY - 5. No extraneous documentation (only these two specific topics) - 6. Documentation is comprehensive enough to implement the platform - - If ALL criteria are met, include `✓ Quality Criteria Met`. + quality_criteria: + - "Do both files exist in doc/platforms//: cli_configuration.md and hooks_system.md?" + - "Does each file have a comment at the top with last updated date and source URL?" + - "Does cli_configuration.md cover how the platform's CLI is configured?" + - "Does hooks_system.md cover hooks available for slash command definitions ONLY?" + - "Is there no extraneous documentation (only these two specific topics)?" + - "Is the documentation comprehensive enough to implement the platform?" - id: add_capabilities name: "Add Hook Capabilities" @@ -63,19 +56,13 @@ steps: - adapters.py dependencies: - research - hooks: - after_agent: - - prompt: | - **AGENT: TAKE ACTION** - Verify the capability additions meet ALL criteria: - 1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py - 2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields - (set to None/null if the platform doesn't support that hook) - 3. Only hooks available on slash command definitions are added (not general CLI hooks) - 4. job_schema.py remains valid Python with no syntax errors - 5. adapters.py remains consistent - all adapters have the same hook fields - 6. If no new hooks are needed, document why in a comment - - If ALL criteria are met, include `✓ Quality Criteria Met`. + quality_criteria: + - "Are any new hooks from the platform (for slash commands only) added to src/deepwork/schemas/job_schema.py?" + - "Are all existing adapters in src/deepwork/adapters.py updated with the new hook fields? (set to None/null if the platform doesn't support that hook)" + - "Are only hooks available on slash command definitions added (not general CLI hooks)?" + - "Does job_schema.py remain valid Python with no syntax errors?" + - "Is adapters.py consistent - all adapters have the same hook fields?" + - "If no new hooks are needed, is there documentation explaining why?" - id: implement name: "Implement Platform Support" @@ -98,19 +85,13 @@ steps: hooks: after_agent: - script: hooks/run_tests.sh - - prompt: | - **AGENT: TAKE ACTION** - Verify the implementation meets ALL criteria: - 1. Platform adapter class is added to src/deepwork/adapters.py - 2. Templates exist in src/deepwork/templates// with appropriate command structure - 3. Tests exist for all new functionality - 4. Test coverage is 100% for new code (run: uv run pytest --cov) - 5. All tests pass - 6. README.md is updated with: - - New platform listed in supported platforms - - Installation instructions for the platform - - Any platform-specific notes - - If ALL criteria are met, include `✓ Quality Criteria Met`. + quality_criteria: + - "Is the platform adapter class added to src/deepwork/adapters.py?" + - "Do templates exist in src/deepwork/templates// with appropriate command structure?" + - "Do tests exist for all new functionality?" + - "Is test coverage 100% for new code (run: uv run pytest --cov)?" + - "Do all tests pass?" + - "Is README.md updated with: New platform listed in supported platforms, Installation instructions for the platform, Any platform-specific notes" - id: verify name: "Verify Installation" diff --git a/.deepwork/jobs/commit/AGENTS.md b/.deepwork/jobs/commit/AGENTS.md new file mode 100644 index 0000000..15f0271 --- /dev/null +++ b/.deepwork/jobs/commit/AGENTS.md @@ -0,0 +1,30 @@ +# Job Management + +This folder and its subfolders are managed using the `deepwork_jobs` slash commands. + +## Recommended Commands + +- `/deepwork_jobs.define` - Create or modify the job.yml specification +- `/deepwork_jobs.implement` - Generate step instruction files from the specification +- `/deepwork_jobs.learn` - Improve instructions based on execution learnings + +## Directory Structure + +``` +. +├── AGENTS.md # This file - project context and guidance +├── job.yml # Job specification (created by /deepwork_jobs.define) +├── steps/ # Step instruction files (created by /deepwork_jobs.implement) +│ └── *.md # One file per step +├── hooks/ # Custom validation scripts and prompts +│ └── *.md|*.sh # Hook files referenced in job.yml +└── templates/ # Example file formats and templates + └── *.md|*.yml # Templates referenced in step instructions +``` + +## Editing Guidelines + +1. **Use slash commands** for structural changes (adding steps, modifying job.yml) +2. **Direct edits** are fine for minor instruction tweaks +3. **Run `/deepwork_jobs.learn`** after executing job steps to capture improvements +4. **Run `deepwork sync`** after any changes to regenerate commands diff --git a/.deepwork/jobs/commit/hooks/.gitkeep b/.deepwork/jobs/commit/hooks/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/.deepwork/jobs/commit/hooks/run_ruff.sh b/.deepwork/jobs/commit/hooks/run_ruff.sh new file mode 100755 index 0000000..377ce60 --- /dev/null +++ b/.deepwork/jobs/commit/hooks/run_ruff.sh @@ -0,0 +1,3 @@ +#!/bin/bash +cd /Users/noah/Documents/GitHub/deep-work +uv run ruff format --check src/ tests/ && uv run ruff check src/ tests/ diff --git a/.deepwork/jobs/commit/hooks/run_tests.sh b/.deepwork/jobs/commit/hooks/run_tests.sh new file mode 100755 index 0000000..943d177 --- /dev/null +++ b/.deepwork/jobs/commit/hooks/run_tests.sh @@ -0,0 +1,3 @@ +#!/bin/bash +cd /Users/noah/Documents/GitHub/deep-work +uv run pytest tests/ -v diff --git a/.deepwork/jobs/commit/job.yml b/.deepwork/jobs/commit/job.yml new file mode 100644 index 0000000..d7beca5 --- /dev/null +++ b/.deepwork/jobs/commit/job.yml @@ -0,0 +1,70 @@ +# DeepWork Job: commit +# +# Validates code quality, runs formatters, and pushes changes to remote. + +name: commit +version: "1.0.0" +summary: "Validate, format, and push changes with tests passing" +description: | + A pre-commit workflow that ensures code quality before pushing changes. + + This job runs through three validation and preparation steps: + 1. Runs the test suite and fixes any failures until all tests pass (max 5 attempts) + 2. Runs ruff formatting and linting, fixing issues until clean (max 5 attempts) + 3. Fetches from remote, rebases if needed, generates a simple commit message, + commits changes, and pushes to the remote branch + + Each step uses a quality validation loop to ensure it completes successfully + before moving to the next step. The format step runs as a subagent to + minimize token usage. + + Key behaviors: + - Rebase strategy when remote has changes (keeps linear history) + - Simple summary commit messages (no conventional commits format) + - Maximum 5 fix attempts before stopping + + Designed for developers who want a reliable pre-push workflow that catches + issues early and ensures consistent code quality. + +changelog: + - version: "1.0.0" + changes: "Initial job creation" + +steps: + - id: test + name: "Run Tests" + description: "Run pytest and fix any failures until all tests pass (max 5 attempts)" + instructions_file: steps/test.md + inputs: [] + outputs: [] + dependencies: [] + hooks: + after_agent: + - script: hooks/run_tests.sh + + - id: format + name: "Format Code" + description: "Run ruff formatting and linting, fix issues until clean (max 5 attempts, runs as subagent)" + instructions_file: steps/format.md + inputs: [] + outputs: [] + dependencies: + - test + hooks: + after_agent: + - script: hooks/run_ruff.sh + + - id: reconcile_and_push + name: "Reconcile and Push" + description: "Fetch remote, rebase if needed, commit with simple summary message, and push" + instructions_file: steps/reconcile_and_push.md + inputs: [] + outputs: [] + dependencies: + - format + quality_criteria: + - "Did the agent fetch from the remote to check for updates?" + - "If there were remote changes, did the agent rebase local changes on top?" + - "Did the agent generate a simple summary commit message based on the changes?" + - "Did the agent commit the changes?" + - "Did the agent push to the remote branch?" diff --git a/.deepwork/jobs/commit/steps/format.md b/.deepwork/jobs/commit/steps/format.md new file mode 100644 index 0000000..e7c4587 --- /dev/null +++ b/.deepwork/jobs/commit/steps/format.md @@ -0,0 +1,89 @@ +# Format Code + +## Objective + +Run ruff formatting and linting checks, fixing any issues until the code is clean (maximum 5 attempts). + +## Task + +Execute ruff to check code formatting and linting. If any issues are found, fix them. Continue this cycle until ruff reports no issues or you've made 5 fix attempts. + +**Note**: This step is designed to run as a subagent to minimize token usage. Focus on efficient, targeted fixes. + +### Process + +1. **Check format and lint status** + The hook automatically runs: + ```bash + uv run ruff format --check src/ tests/ + uv run ruff check src/ tests/ + ``` + +2. **Analyze ruff output** + - If both commands pass (exit code 0), you're done with this step + - If issues are reported, examine them carefully + +3. **Fix issues** (if needed) + + **For formatting issues**: + ```bash + uv run ruff format src/ tests/ + ``` + This auto-fixes formatting issues. + + **For linting issues**: + - Some can be auto-fixed: `uv run ruff check --fix src/ tests/` + - Others require manual fixes based on the error messages + - Common issues: unused imports, undefined names, line length + +4. **Repeat if necessary** + - Re-run checks after fixes + - Continue until all issues are resolved + - Track your attempts - stop after 5 fix attempts if issues remain + - If you cannot fix after 5 attempts, report remaining issues to the user + +### Common Ruff Issues and Fixes + +| Issue | Fix | +|-------|-----| +| F401 unused import | Remove the import | +| F841 unused variable | Remove or use the variable | +| E501 line too long | Break into multiple lines | +| I001 import sorting | Run `ruff check --fix` or reorder manually | +| E711 comparison to None | Use `is None` instead of `== None` | + +### Important Notes + +- **Run ruff format first** - It auto-fixes most formatting issues +- **Use --fix for lint issues** - Many lint issues can be auto-fixed +- **Minimal manual fixes** - Only manually fix what auto-fix can't handle +- **Track attempts** - Keep count of fix attempts to respect the 5-attempt limit + +## Output Format + +No file output is required. Success is determined by ruff passing all checks. + +**On success**: Report that ruff checks pass and proceed to the next step. + +**On failure after 5 attempts**: Report which issues remain and why you couldn't fix them. + +## Quality Criteria + +- `uv run ruff format --check src/ tests/` passes (exit code 0) +- `uv run ruff check src/ tests/` passes (exit code 0) +- Any fixes made don't break functionality (tests should still pass) +- If issues couldn't be fixed in 5 attempts, clear explanation provided + +## Hook Behavior + +After you complete this step, a hook will automatically run ruff format and lint checks and show you the results. + +**Interpreting the hook output:** +- **Both checks passed (exit code 0)**: The step is complete. Proceed to the next step. +- **Checks failed (exit code non-zero)**: You must fix the issues. Use `uv run ruff format src/ tests/` for formatting and `uv run ruff check --fix src/ tests/` for auto-fixable lint issues. For remaining issues, fix manually. The hook will re-run after each attempt. + +**Important**: The hook runs automatically - you don't need to run the checks yourself after fixing. Just focus on making fixes, and the hook will verify them. + +## Context + +This is the second step in the commit workflow, after tests pass. Code must be properly formatted and lint-free before committing. diff --git a/.deepwork/jobs/commit/steps/reconcile_and_push.md b/.deepwork/jobs/commit/steps/reconcile_and_push.md new file mode 100644 index 0000000..2dd8017 --- /dev/null +++ b/.deepwork/jobs/commit/steps/reconcile_and_push.md @@ -0,0 +1,108 @@ +# Reconcile and Push + +## Objective + +Fetch the latest changes from the remote, rebase if necessary, generate a commit message, commit the changes, and push to the remote branch. + +## Task + +Ensure the local branch is up-to-date with the remote, commit all staged changes with a clear summary message, and push to the remote repository. + +### Process + +1. **Fetch from remote** + ```bash + git fetch origin + ``` + +2. **Check for remote changes** + ```bash + git status + ``` + Look for "Your branch is behind" or "Your branch and 'origin/...' have diverged" + +3. **Rebase if needed** + If the remote has changes that aren't in your local branch: + ```bash + git rebase origin/ + ``` + + **If rebase conflicts occur**: + - Resolve conflicts in the affected files + - Stage resolved files: `git add ` + - Continue rebase: `git rebase --continue` + - If conflicts are too complex, abort and report to user: `git rebase --abort` + +4. **Review changes to commit** + ```bash + git status + git diff --staged + git diff + ``` + + Stage any unstaged changes that should be committed: + ```bash + git add -A + ``` + +5. **Generate commit message** + Analyze the changes and create a **simple summary** commit message: + - Look at the diff to understand what changed + - Write a clear, concise description (1-2 sentences) + - Focus on the "what" and "why", not the "how" + - No conventional commits format needed - just a clear summary + + **Good commit messages**: + - "Add user authentication with session management" + - "Fix race condition in data processing pipeline" + - "Update dependencies and fix compatibility issues" + - "Refactor database queries for better performance" + +6. **Commit the changes** + ```bash + git commit -m "" + ``` + +7. **Push to remote** + ```bash + git push origin + ``` + + If push is rejected (remote has new changes), fetch and rebase again, then retry push. + +### Handling Edge Cases + +**No changes to commit**: +- If `git status` shows nothing to commit, inform the user and skip the commit/push + +**Protected branch**: +- If push fails due to branch protection, inform the user they may need to create a PR + +**Rebase conflicts**: +- Attempt to resolve simple conflicts +- For complex conflicts, abort the rebase and ask the user for guidance + +**Diverged branches**: +- Always use rebase (not merge) to maintain linear history +- If rebase fails repeatedly, report the issue to the user + +## Output Format + +No file output is required. Success is determined by successfully pushing to the remote. + +**On success**: Report the commit hash and confirm the push succeeded. + +**On failure**: Report what went wrong (conflicts, push rejection, etc.) and suggest next steps. + +## Quality Criteria + +- Fetched latest changes from remote +- Rebased on top of remote changes if any existed +- Generated a clear, simple summary commit message +- Successfully committed all changes +- Successfully pushed to the remote branch +- When all steps complete successfully, include `✓ Quality Criteria Met` in your response + +## Context + +This is the final step in the commit workflow. By this point, tests pass and code is formatted. This step ensures your changes are properly committed with a good message and pushed to share with the team. The rebase strategy keeps the git history linear and clean. diff --git a/.deepwork/jobs/commit/steps/test.md b/.deepwork/jobs/commit/steps/test.md new file mode 100644 index 0000000..7a8eb67 --- /dev/null +++ b/.deepwork/jobs/commit/steps/test.md @@ -0,0 +1,67 @@ +# Run Tests + +## Objective + +Run the project test suite and fix any failing tests until all tests pass (maximum 5 attempts). + +## Task + +Execute pytest to run all tests. If any tests fail, analyze the failures and fix them. Continue this cycle until all tests pass or you've made 5 fix attempts. + +### Process + +1. **Run the test suite** + ```bash + uv run pytest tests/ -v + ``` + +2. **Analyze test results** + - If all tests pass, you're done with this step + - If tests fail, examine the failure output carefully + +3. **Fix failing tests** (if needed) + - Read the failing test to understand what it's testing + - Read the relevant source code + - Determine if the issue is in the test or the implementation + - Make the minimal fix needed to pass the test + - Re-run tests to verify the fix + +4. **Repeat if necessary** + - Continue the fix cycle until all tests pass + - Track your attempts - stop after 5 fix attempts if tests still fail + - If you cannot fix after 5 attempts, report the remaining failures to the user + +### Important Notes + +- **Don't skip tests** - All tests must pass before proceeding +- **Minimal fixes** - Make the smallest change needed to fix each failure +- **Understand before fixing** - Read and understand failing tests before attempting fixes +- **Track attempts** - Keep count of fix attempts to respect the 5-attempt limit + +## Output Format + +No file output is required. Success is determined by all tests passing. + +**On success**: Report that all tests pass and proceed to the next step. + +**On failure after 5 attempts**: Report which tests are still failing and why you couldn't fix them. + +## Quality Criteria + +- All tests pass (`uv run pytest tests/ -v` exits with code 0) +- Any fixes made are minimal and don't break other functionality +- If tests couldn't be fixed in 5 attempts, clear explanation provided + +## Hook Behavior + +After you complete this step, a hook will automatically run `uv run pytest tests/ -v` and show you the results. + +**Interpreting the hook output:** +- **All tests passed (exit code 0)**: The step is complete. Proceed to the next step. +- **Tests failed (exit code non-zero)**: You must fix the failing tests. Analyze the output, make fixes, and try again. The hook will re-run after each attempt. + +**Important**: The hook runs automatically - you don't need to run pytest yourself after the initial run. Just focus on making fixes when tests fail, and the hook will verify your fixes. + +## Context + +This is the first step in the commit workflow. Tests must pass before code formatting is checked, ensuring that any changes being committed are functionally correct. diff --git a/.deepwork/jobs/commit/templates/.gitkeep b/.deepwork/jobs/commit/templates/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml index f54f7f4..e87c03c 100644 --- a/.deepwork/jobs/deepwork_jobs/job.yml +++ b/.deepwork/jobs/deepwork_jobs/job.yml @@ -36,22 +36,15 @@ steps: outputs: - job.yml dependencies: [] - hooks: - after_agent: - - prompt: | - **AGENT: TAKE ACTION** - Verify the job.yml output meets ALL quality criteria before completing: - - 1. **User Understanding**: Did you fully understand the user's workflow by asking structured questions? - 2. **Structured Questions Used**: Did you ask structured questions (using the AskUserQuestion tool) to gather user input? - 3. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs? - 4. **Logical Dependencies**: Do step dependencies make sense and avoid circular references? - 5. **Concise Summary**: Is the summary under 200 characters and descriptive? - 6. **Rich Description**: Does the description provide enough context for future refinement? - 7. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)? - 8. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`? - - If ANY criterion is not met, continue working to address it. - If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. + quality_criteria: + - "**User Understanding**: Did the agent fully understand the user's workflow by asking structured questions?" + - "**Structured Questions Used**: Did the agent ask structured questions (using the AskUserQuestion tool) to gather user input?" + - "**Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs?" + - "**Logical Dependencies**: Do step dependencies make sense and avoid circular references?" + - "**Concise Summary**: Is the summary under 200 characters and descriptive?" + - "**Rich Description**: Does the description provide enough context for future refinement?" + - "**Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)?" + - "**File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`?" - id: implement name: "Implement Job Steps" @@ -95,21 +88,14 @@ steps: outputs: - AGENTS.md dependencies: [] - hooks: - after_agent: - - prompt: | - **AGENT: TAKE ACTION** - Verify the learning process meets ALL quality criteria before completing: - - 1. **Conversation Analyzed**: Did you review the conversation for DeepWork job executions? - 2. **Confusion Identified**: Did you identify points of confusion, errors, or inefficiencies? - 3. **Instructions Improved**: Were job instructions updated to address identified issues? - 4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity? - 5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files? - 6. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md? - 7. **File References Used**: Do AGENTS.md entries reference other files where appropriate? - 8. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job? - 9. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md? - 10. **Sync Complete**: Has `deepwork sync` been run if instructions were modified? - - If ANY criterion is not met, continue working to address it. - If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. + quality_criteria: + - "**Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?" + - "**Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?" + - "**Instructions Improved**: Were job instructions updated to address identified issues?" + - "**Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?" + - "**Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?" + - "**Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?" + - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?" + - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?" + - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?" + - "**Sync Complete**: Has `deepwork sync` been run if instructions were modified?" diff --git a/.deepwork/jobs/update/job.yml b/.deepwork/jobs/update/job.yml index 379257c..61bff37 100644 --- a/.deepwork/jobs/update/job.yml +++ b/.deepwork/jobs/update/job.yml @@ -32,11 +32,8 @@ steps: description: "Determine from conversation context which standard job(s) to update and what changes are needed" outputs: [] dependencies: [] - stop_hooks: - - prompt: | - **AGENT: TAKE ACTION** - Verify the update process completed successfully: - 1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/) - 2. `deepwork install --platform claude` was run - 3. Files in .deepwork/jobs/ match the source files - 4. Command files in .claude/commands/ were regenerated - If ALL criteria are met, include `✓ Quality Criteria Met`. + quality_criteria: + - "Were changes made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/)?" + - "Was `deepwork install --platform claude` run?" + - "Do files in .deepwork/jobs/ match the source files?" + - "Were command files in .claude/commands/ regenerated?" diff --git a/.gemini/commands/add_platform/index.toml b/.gemini/commands/add_platform/index.toml index 5ee8dbd..b46ed1f 100644 --- a/.gemini/commands/add_platform/index.toml +++ b/.gemini/commands/add_platform/index.toml @@ -51,27 +51,32 @@ This job has 4 step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and guide the user through the steps in sequence. 1. **Analyze user intent** from the text that follows `/add_platform` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: - research: Capture CLI configuration and hooks system documentation for the new platform - add_capabilities: Update job schema and adapters with any new hook events the platform supports - implement: Add platform adapter, templates, tests with 100% coverage, and README documentation - verify: Set up platform directories and verify deepwork install works correctly -3. **Invoke the matched step**: Tell the user to run the appropriate command: - - For research: `/add_platform:uw.research` - - For add_capabilities: `/add_platform:uw.add_capabilities` - - For implement: `/add_platform:uw.implement` - - For verify: `/add_platform:uw.verify` +3. **Guide the user through the workflow**: + - Tell the user to run the starting step command + - After each step completes, guide them to the next step in the workflow + - Continue until the workflow is complete -4. **If intent is ambiguous**, ask the user which step they want: + Step commands: + - research: `/add_platform:uw.research` + - add_capabilities: `/add_platform:uw.add_capabilities` + - implement: `/add_platform:uw.implement` + - verify: `/add_platform:uw.verify` + +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Let them choose -**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. +**Note**: Gemini CLI requires manual command invocation. After each step completes, remind the user to run the next step command. ## Context Files diff --git a/.gemini/commands/add_platform/uw.add_capabilities.toml b/.gemini/commands/add_platform/uw.add_capabilities.toml index da2fec8..fc36c70 100644 --- a/.gemini/commands/add_platform/uw.add_capabilities.toml +++ b/.gemini/commands/add_platform/uw.add_capabilities.toml @@ -213,6 +213,21 @@ Ensure all outputs are: - Well-formatted and complete - Ready for review or use by subsequent steps +## Quality Validation + +This step has quality criteria that should be verified before completion. + +### Quality Criteria + +1. Are any new hooks from the platform (for slash commands only) added to src/deepwork/schemas/job_schema.py? +2. Are all existing adapters in src/deepwork/adapters.py updated with the new hook fields? (set to None/null if the platform doesn't support that hook) +3. Are only hooks available on slash command definitions added (not general CLI hooks)? +4. Does job_schema.py remain valid Python with no syntax errors? +5. Is adapters.py consistent - all adapters have the same hook fields? +6. If no new hooks are needed, is there documentation explaining why? + +**Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. + ## Completion After completing this step: diff --git a/.gemini/commands/add_platform/uw.implement.toml b/.gemini/commands/add_platform/uw.implement.toml index 4c348a3..9144a61 100644 --- a/.gemini/commands/add_platform/uw.implement.toml +++ b/.gemini/commands/add_platform/uw.implement.toml @@ -308,6 +308,21 @@ Ensure all outputs are: - Well-formatted and complete - Ready for review or use by subsequent steps +## Quality Validation + +This step has quality criteria that should be verified before completion. + +### Quality Criteria + +1. Is the platform adapter class added to src/deepwork/adapters.py? +2. Do templates exist in src/deepwork/templates// with appropriate command structure? +3. Do tests exist for all new functionality? +4. Is test coverage 100% for new code (run: uv run pytest --cov)? +5. Do all tests pass? +6. Is README.md updated with: New platform listed in supported platforms, Installation instructions for the platform, Any platform-specific notes + +**Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. + ## Completion After completing this step: diff --git a/.gemini/commands/add_platform/uw.research.toml b/.gemini/commands/add_platform/uw.research.toml index 48f4afd..dd8e372 100644 --- a/.gemini/commands/add_platform/uw.research.toml +++ b/.gemini/commands/add_platform/uw.research.toml @@ -258,6 +258,21 @@ Ensure all outputs are: - Well-formatted and complete - Ready for review or use by subsequent steps +## Quality Validation + +This step has quality criteria that should be verified before completion. + +### Quality Criteria + +1. Do both files exist in doc/platforms//: cli_configuration.md and hooks_system.md? +2. Does each file have a comment at the top with last updated date and source URL? +3. Does cli_configuration.md cover how the platform's CLI is configured? +4. Does hooks_system.md cover hooks available for slash command definitions ONLY? +5. Is there no extraneous documentation (only these two specific topics)? +6. Is the documentation comprehensive enough to implement the platform? + +**Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. + ## Completion After completing this step: diff --git a/.gemini/commands/add_platform/uw.verify.toml b/.gemini/commands/add_platform/uw.verify.toml index acfd967..54c1dcd 100644 --- a/.gemini/commands/add_platform/uw.verify.toml +++ b/.gemini/commands/add_platform/uw.verify.toml @@ -183,6 +183,21 @@ All work for this job should be done on a dedicated work branch: No specific files are output by this command. +## Quality Validation + +This step has quality criteria that should be verified before completion. + +### Quality Criteria + +1. Are platform-specific directories/files added to the deepwork repo as needed? +2. Does running `deepwork install --platform ` complete without errors? +3. Are expected command files created in the platform's command directory? +4. Does command file content match the templates and job definitions? +5. Are established DeepWork jobs (deepwork_jobs, deepwork_policy) installed correctly? +6. Can the platform be used alongside existing platforms without conflicts? + +**Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. + ## Completion After completing this step: diff --git a/.gemini/commands/commit/index.toml b/.gemini/commands/commit/index.toml new file mode 100644 index 0000000..aa43c7b --- /dev/null +++ b/.gemini/commands/commit/index.toml @@ -0,0 +1,81 @@ +# commit +# +# Validate, format, and push changes with tests passing +# +# Generated by DeepWork - do not edit manually + +description = "Validate, format, and push changes with tests passing" + +prompt = """ +# commit + +You are executing the **commit** job. Validate, format, and push changes with tests passing + +A pre-commit workflow that ensures code quality before pushing changes. + +This job runs through three validation and preparation steps: +1. Runs the test suite and fixes any failures until all tests pass (max 5 attempts) +2. Runs ruff formatting and linting, fixing issues until clean (max 5 attempts) +3. Fetches from remote, rebases if needed, generates a simple commit message, + commits changes, and pushes to the remote branch + +Each step uses a quality validation loop to ensure it completes successfully +before moving to the next step. The format step runs as a subagent to +minimize token usage. + +Key behaviors: +- Rebase strategy when remote has changes (keeps linear history) +- Simple summary commit messages (no conventional commits format) +- Maximum 5 fix attempts before stopping + +Designed for developers who want a reliable pre-push workflow that catches +issues early and ensures consistent code quality. + + +## Available Steps + +This job has 3 step(s): + +### test +**Run Tests**: Run pytest and fix any failures until all tests pass (max 5 attempts) +- Command: `/commit:uw.test` +### format +**Format Code**: Run ruff formatting and linting, fix issues until clean (max 5 attempts, runs as subagent) +- Command: `/commit:uw.format` +- Requires: test +### reconcile_and_push +**Reconcile and Push**: Fetch remote, rebase if needed, commit with simple summary message, and push +- Command: `/commit:uw.reconcile_and_push` +- Requires: format + +## Instructions + +This is a **multi-step workflow**. Determine the starting point and guide the user through the steps in sequence. + +1. **Analyze user intent** from the text that follows `/commit` + +2. **Identify the starting step** based on intent: + - test: Run pytest and fix any failures until all tests pass (max 5 attempts) + - format: Run ruff formatting and linting, fix issues until clean (max 5 attempts, runs as subagent) + - reconcile_and_push: Fetch remote, rebase if needed, commit with simple summary message, and push + +3. **Guide the user through the workflow**: + - Tell the user to run the starting step command + - After each step completes, guide them to the next step in the workflow + - Continue until the workflow is complete + + Step commands: + - test: `/commit:uw.test` + - format: `/commit:uw.format` + - reconcile_and_push: `/commit:uw.reconcile_and_push` + +4. **If intent is ambiguous**, ask the user which step to start from: + - Present the available steps as numbered options + - Let them choose + +**Note**: Gemini CLI requires manual command invocation. After each step completes, remind the user to run the next step command. + +## Context Files + +- Job definition: `.deepwork/jobs/commit/job.yml` +""" \ No newline at end of file diff --git a/.gemini/commands/commit/uw.format.toml b/.gemini/commands/commit/uw.format.toml new file mode 100644 index 0000000..6972b7e --- /dev/null +++ b/.gemini/commands/commit/uw.format.toml @@ -0,0 +1,172 @@ +# commit:format +# +# Run ruff formatting and linting, fix issues until clean (max 5 attempts, runs as subagent) +# +# Generated by DeepWork - do not edit manually + +description = "Run ruff formatting and linting, fix issues until clean (max 5 attempts, runs as subagent)" + +prompt = """ +# commit:format + +**Step 2 of 3** in the **commit** workflow + +**Summary**: Validate, format, and push changes with tests passing + +## Job Overview + +A pre-commit workflow that ensures code quality before pushing changes. + +This job runs through three validation and preparation steps: +1. Runs the test suite and fixes any failures until all tests pass (max 5 attempts) +2. Runs ruff formatting and linting, fixing issues until clean (max 5 attempts) +3. Fetches from remote, rebases if needed, generates a simple commit message, + commits changes, and pushes to the remote branch + +Each step uses a quality validation loop to ensure it completes successfully +before moving to the next step. The format step runs as a subagent to +minimize token usage. + +Key behaviors: +- Rebase strategy when remote has changes (keeps linear history) +- Simple summary commit messages (no conventional commits format) +- Maximum 5 fix attempts before stopping + +Designed for developers who want a reliable pre-push workflow that catches +issues early and ensures consistent code quality. + + +## Prerequisites + +This step requires completion of the following step(s): +- `/commit:test` + +Please ensure these steps have been completed before proceeding. + +## Instructions + +# Format Code + +## Objective + +Run ruff formatting and linting checks, fixing any issues until the code is clean (maximum 5 attempts). + +## Task + +Execute ruff to check code formatting and linting. If any issues are found, fix them. Continue this cycle until ruff reports no issues or you've made 5 fix attempts. + +**Note**: This step is designed to run as a subagent to minimize token usage. Focus on efficient, targeted fixes. + +### Process + +1. **Check format and lint status** + The hook automatically runs: + ```bash + uv run ruff format --check src/ tests/ + uv run ruff check src/ tests/ + ``` + +2. **Analyze ruff output** + - If both commands pass (exit code 0), you're done with this step + - If issues are reported, examine them carefully + +3. **Fix issues** (if needed) + + **For formatting issues**: + ```bash + uv run ruff format src/ tests/ + ``` + This auto-fixes formatting issues. + + **For linting issues**: + - Some can be auto-fixed: `uv run ruff check --fix src/ tests/` + - Others require manual fixes based on the error messages + - Common issues: unused imports, undefined names, line length + +4. **Repeat if necessary** + - Re-run checks after fixes + - Continue until all issues are resolved + - Track your attempts - stop after 5 fix attempts if issues remain + - If you cannot fix after 5 attempts, report remaining issues to the user + +### Common Ruff Issues and Fixes + +| Issue | Fix | +|-------|-----| +| F401 unused import | Remove the import | +| F841 unused variable | Remove or use the variable | +| E501 line too long | Break into multiple lines | +| I001 import sorting | Run `ruff check --fix` or reorder manually | +| E711 comparison to None | Use `is None` instead of `== None` | + +### Important Notes + +- **Run ruff format first** - It auto-fixes most formatting issues +- **Use --fix for lint issues** - Many lint issues can be auto-fixed +- **Minimal manual fixes** - Only manually fix what auto-fix can't handle +- **Track attempts** - Keep count of fix attempts to respect the 5-attempt limit + +## Output Format + +No file output is required. Success is determined by ruff passing all checks. + +**On success**: Report that ruff checks pass and proceed to the next step. + +**On failure after 5 attempts**: Report which issues remain and why you couldn't fix them. + +## Quality Criteria + +- `uv run ruff format --check src/ tests/` passes (exit code 0) +- `uv run ruff check src/ tests/` passes (exit code 0) +- Any fixes made don't break functionality (tests should still pass) +- If issues couldn't be fixed in 5 attempts, clear explanation provided +- When all checks pass, include `✓ Quality Criteria Met` in your response + +## Context + +This is the second step in the commit workflow, after tests pass. Code must be properly formatted and lint-free before committing. The format step uses a script hook that automatically runs ruff checks, so focus on analyzing results and making fixes efficiently. + + + +## Work Branch Management + +All work for this job should be done on a dedicated work branch: + +1. **Check current branch**: + - If already on a work branch for this job (format: `deepwork/commit-[instance]-[date]`), continue using it + - If on main/master, create a new work branch + +2. **Create work branch** (if needed): + ```bash + git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d) + ``` + Replace `[instance]` with a descriptive identifier (e.g., `acme`, `q1-launch`, etc.) + +## Output Requirements + +No specific files are output by this command. + +## Completion + +After completing this step: + +1. **Verify outputs**: Confirm all required files have been created + +2. **Inform the user**: + - Step 2 of 3 is complete + - Ready to proceed to next step: `/commit:reconcile_and_push` + +## Next Step + +To continue the workflow, run: +``` +/commit:reconcile_and_push +``` + +--- + +## Context Files + +- Job definition: `.deepwork/jobs/commit/job.yml` +- Step instructions: `.deepwork/jobs/commit/steps/format.md` +""" \ No newline at end of file diff --git a/.gemini/commands/commit/uw.reconcile_and_push.toml b/.gemini/commands/commit/uw.reconcile_and_push.toml new file mode 100644 index 0000000..9e81b9b --- /dev/null +++ b/.gemini/commands/commit/uw.reconcile_and_push.toml @@ -0,0 +1,216 @@ +# commit:reconcile_and_push +# +# Fetch remote, rebase if needed, commit with simple summary message, and push +# +# Generated by DeepWork - do not edit manually + +description = "Fetch remote, rebase if needed, commit with simple summary message, and push" + +prompt = """ +# commit:reconcile_and_push + +**Step 3 of 3** in the **commit** workflow + +**Summary**: Validate, format, and push changes with tests passing + +## Job Overview + +A pre-commit workflow that ensures code quality before pushing changes. + +This job runs through three validation and preparation steps: +1. Runs the test suite and fixes any failures until all tests pass (max 5 attempts) +2. Runs ruff formatting and linting, fixing issues until clean (max 5 attempts) +3. Fetches from remote, rebases if needed, generates a simple commit message, + commits changes, and pushes to the remote branch + +Each step uses a quality validation loop to ensure it completes successfully +before moving to the next step. The format step runs as a subagent to +minimize token usage. + +Key behaviors: +- Rebase strategy when remote has changes (keeps linear history) +- Simple summary commit messages (no conventional commits format) +- Maximum 5 fix attempts before stopping + +Designed for developers who want a reliable pre-push workflow that catches +issues early and ensures consistent code quality. + + +## Prerequisites + +This step requires completion of the following step(s): +- `/commit:format` + +Please ensure these steps have been completed before proceeding. + +## Instructions + +# Reconcile and Push + +## Objective + +Fetch the latest changes from the remote, rebase if necessary, generate a commit message, commit the changes, and push to the remote branch. + +## Task + +Ensure the local branch is up-to-date with the remote, commit all staged changes with a clear summary message, and push to the remote repository. + +### Process + +1. **Fetch from remote** + ```bash + git fetch origin + ``` + +2. **Check for remote changes** + ```bash + git status + ``` + Look for "Your branch is behind" or "Your branch and 'origin/...' have diverged" + +3. **Rebase if needed** + If the remote has changes that aren't in your local branch: + ```bash + git rebase origin/ + ``` + + **If rebase conflicts occur**: + - Resolve conflicts in the affected files + - Stage resolved files: `git add ` + - Continue rebase: `git rebase --continue` + - If conflicts are too complex, abort and report to user: `git rebase --abort` + +4. **Review changes to commit** + ```bash + git status + git diff --staged + git diff + ``` + + Stage any unstaged changes that should be committed: + ```bash + git add -A + ``` + +5. **Generate commit message** + Analyze the changes and create a **simple summary** commit message: + - Look at the diff to understand what changed + - Write a clear, concise description (1-2 sentences) + - Focus on the "what" and "why", not the "how" + - No conventional commits format needed - just a clear summary + + **Good commit messages**: + - "Add user authentication with session management" + - "Fix race condition in data processing pipeline" + - "Update dependencies and fix compatibility issues" + - "Refactor database queries for better performance" + +6. **Commit the changes** + ```bash + git commit -m "" + ``` + +7. **Push to remote** + ```bash + git push origin + ``` + + If push is rejected (remote has new changes), fetch and rebase again, then retry push. + +### Handling Edge Cases + +**No changes to commit**: +- If `git status` shows nothing to commit, inform the user and skip the commit/push + +**Protected branch**: +- If push fails due to branch protection, inform the user they may need to create a PR + +**Rebase conflicts**: +- Attempt to resolve simple conflicts +- For complex conflicts, abort the rebase and ask the user for guidance + +**Diverged branches**: +- Always use rebase (not merge) to maintain linear history +- If rebase fails repeatedly, report the issue to the user + +## Output Format + +No file output is required. Success is determined by successfully pushing to the remote. + +**On success**: Report the commit hash and confirm the push succeeded. + +**On failure**: Report what went wrong (conflicts, push rejection, etc.) and suggest next steps. + +## Quality Criteria + +- Fetched latest changes from remote +- Rebased on top of remote changes if any existed +- Generated a clear, simple summary commit message +- Successfully committed all changes +- Successfully pushed to the remote branch +- When all steps complete successfully, include `✓ Quality Criteria Met` in your response + +## Context + +This is the final step in the commit workflow. By this point, tests pass and code is formatted. This step ensures your changes are properly committed with a good message and pushed to share with the team. The rebase strategy keeps the git history linear and clean. + + + +## Work Branch Management + +All work for this job should be done on a dedicated work branch: + +1. **Check current branch**: + - If already on a work branch for this job (format: `deepwork/commit-[instance]-[date]`), continue using it + - If on main/master, create a new work branch + +2. **Create work branch** (if needed): + ```bash + git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d) + ``` + Replace `[instance]` with a descriptive identifier (e.g., `acme`, `q1-launch`, etc.) + +## Output Requirements + +No specific files are output by this command. + +## Quality Validation + +This step has quality criteria that should be verified before completion. + +### Quality Criteria + +1. Did the agent fetch from the remote to check for updates? +2. If there were remote changes, did the agent rebase local changes on top? +3. Did the agent generate a simple summary commit message based on the changes? +4. Did the agent commit the changes? +5. Did the agent push to the remote branch? + +**Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. + +## Completion + +After completing this step: + +1. **Verify outputs**: Confirm all required files have been created + +2. **Inform the user**: + - Step 3 of 3 is complete + - This is the final step - the job is complete! + +## Workflow Complete + +This is the final step in the commit workflow. All outputs should now be complete and ready for review. + +Consider: +- Reviewing all work products +- Creating a pull request to merge the work branch +- Documenting any insights or learnings + +--- + +## Context Files + +- Job definition: `.deepwork/jobs/commit/job.yml` +- Step instructions: `.deepwork/jobs/commit/steps/reconcile_and_push.md` +""" \ No newline at end of file diff --git a/.gemini/commands/commit/uw.test.toml b/.gemini/commands/commit/uw.test.toml new file mode 100644 index 0000000..7766636 --- /dev/null +++ b/.gemini/commands/commit/uw.test.toml @@ -0,0 +1,144 @@ +# commit:test +# +# Run pytest and fix any failures until all tests pass (max 5 attempts) +# +# Generated by DeepWork - do not edit manually + +description = "Run pytest and fix any failures until all tests pass (max 5 attempts)" + +prompt = """ +# commit:test + +**Step 1 of 3** in the **commit** workflow + +**Summary**: Validate, format, and push changes with tests passing + +## Job Overview + +A pre-commit workflow that ensures code quality before pushing changes. + +This job runs through three validation and preparation steps: +1. Runs the test suite and fixes any failures until all tests pass (max 5 attempts) +2. Runs ruff formatting and linting, fixing issues until clean (max 5 attempts) +3. Fetches from remote, rebases if needed, generates a simple commit message, + commits changes, and pushes to the remote branch + +Each step uses a quality validation loop to ensure it completes successfully +before moving to the next step. The format step runs as a subagent to +minimize token usage. + +Key behaviors: +- Rebase strategy when remote has changes (keeps linear history) +- Simple summary commit messages (no conventional commits format) +- Maximum 5 fix attempts before stopping + +Designed for developers who want a reliable pre-push workflow that catches +issues early and ensures consistent code quality. + + + +## Instructions + +# Run Tests + +## Objective + +Run the project test suite and fix any failing tests until all tests pass (maximum 5 attempts). + +## Task + +Execute pytest to run all tests. If any tests fail, analyze the failures and fix them. Continue this cycle until all tests pass or you've made 5 fix attempts. + +### Process + +1. **Run the test suite** + ```bash + uv run pytest tests/ -v + ``` + +2. **Analyze test results** + - If all tests pass, you're done with this step + - If tests fail, examine the failure output carefully + +3. **Fix failing tests** (if needed) + - Read the failing test to understand what it's testing + - Read the relevant source code + - Determine if the issue is in the test or the implementation + - Make the minimal fix needed to pass the test + - Re-run tests to verify the fix + +4. **Repeat if necessary** + - Continue the fix cycle until all tests pass + - Track your attempts - stop after 5 fix attempts if tests still fail + - If you cannot fix after 5 attempts, report the remaining failures to the user + +### Important Notes + +- **Don't skip tests** - All tests must pass before proceeding +- **Minimal fixes** - Make the smallest change needed to fix each failure +- **Understand before fixing** - Read and understand failing tests before attempting fixes +- **Track attempts** - Keep count of fix attempts to respect the 5-attempt limit + +## Output Format + +No file output is required. Success is determined by all tests passing. + +**On success**: Report that all tests pass and proceed to the next step. + +**On failure after 5 attempts**: Report which tests are still failing and why you couldn't fix them. + +## Quality Criteria + +- All tests pass (`uv run pytest tests/ -v` exits with code 0) +- Any fixes made are minimal and don't break other functionality +- If tests couldn't be fixed in 5 attempts, clear explanation provided +- When all tests pass, include `✓ Quality Criteria Met` in your response + +## Context + +This is the first step in the commit workflow. Tests must pass before code formatting is checked, ensuring that any changes being committed are functionally correct. The test step uses a script hook that automatically runs pytest, so focus on analyzing results and making fixes. + + + +## Work Branch Management + +All work for this job should be done on a dedicated work branch: + +1. **Check current branch**: + - If already on a work branch for this job (format: `deepwork/commit-[instance]-[date]`), continue using it + - If on main/master, create a new work branch + +2. **Create work branch** (if needed): + ```bash + git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d) + ``` + Replace `[instance]` with a descriptive identifier (e.g., `acme`, `q1-launch`, etc.) + +## Output Requirements + +No specific files are output by this command. + +## Completion + +After completing this step: + +1. **Verify outputs**: Confirm all required files have been created + +2. **Inform the user**: + - Step 1 of 3 is complete + - Ready to proceed to next step: `/commit:format` + +## Next Step + +To continue the workflow, run: +``` +/commit:format +``` + +--- + +## Context Files + +- Job definition: `.deepwork/jobs/commit/job.yml` +- Step instructions: `.deepwork/jobs/commit/steps/test.md` +""" \ No newline at end of file diff --git a/.gemini/commands/deepwork_jobs/index.toml b/.gemini/commands/deepwork_jobs/index.toml index b197d4c..7ab8bcf 100644 --- a/.gemini/commands/deepwork_jobs/index.toml +++ b/.gemini/commands/deepwork_jobs/index.toml @@ -40,25 +40,30 @@ This job has 3 step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and guide the user through the steps in sequence. 1. **Analyze user intent** from the text that follows `/deepwork_jobs` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: - define: Create the job.yml specification file by understanding workflow requirements - implement: Generate instruction files for each step based on the job.yml specification - learn: Reflect on conversation to improve job instructions and capture learnings -3. **Invoke the matched step**: Tell the user to run the appropriate command: - - For define: `/deepwork_jobs:uw.define` - - For implement: `/deepwork_jobs:uw.implement` - - For learn: `/deepwork_jobs:learn` +3. **Guide the user through the workflow**: + - Tell the user to run the starting step command + - After each step completes, guide them to the next step in the workflow + - Continue until the workflow is complete -4. **If intent is ambiguous**, ask the user which step they want: + Step commands: + - define: `/deepwork_jobs:uw.define` + - implement: `/deepwork_jobs:uw.implement` + - learn: `/deepwork_jobs:learn` + +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Let them choose -**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. +**Note**: Gemini CLI requires manual command invocation. After each step completes, remind the user to run the next step command. ## Context Files diff --git a/.gemini/commands/deepwork_jobs/learn.toml b/.gemini/commands/deepwork_jobs/learn.toml index eb33d6d..3cd2aaa 100644 --- a/.gemini/commands/deepwork_jobs/learn.toml +++ b/.gemini/commands/deepwork_jobs/learn.toml @@ -351,6 +351,25 @@ Ensure all outputs are: - Well-formatted and complete - Ready for review or use by subsequent steps +## Quality Validation + +This step has quality criteria that should be verified before completion. + +### Quality Criteria + +1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions? +2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies? +3. **Instructions Improved**: Were job instructions updated to address identified issues? +4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity? +5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files? +6. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md? +7. **File References Used**: Do AGENTS.md entries reference other files where appropriate? +8. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job? +9. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md? +10. **Sync Complete**: Has `deepwork sync` been run if instructions were modified? + +**Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. + ## Completion After completing this step: diff --git a/.gemini/commands/deepwork_jobs/uw.define.toml b/.gemini/commands/deepwork_jobs/uw.define.toml index d59b3e1..13784b6 100644 --- a/.gemini/commands/deepwork_jobs/uw.define.toml +++ b/.gemini/commands/deepwork_jobs/uw.define.toml @@ -394,6 +394,23 @@ Ensure all outputs are: - Well-formatted and complete - Ready for review or use by subsequent steps +## Quality Validation + +This step has quality criteria that should be verified before completion. + +### Quality Criteria + +1. **User Understanding**: Did the agent fully understand the user's workflow by asking structured questions? +2. **Structured Questions Used**: Did the agent ask structured questions (using the AskUserQuestion tool) to gather user input? +3. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs? +4. **Logical Dependencies**: Do step dependencies make sense and avoid circular references? +5. **Concise Summary**: Is the summary under 200 characters and descriptive? +6. **Rich Description**: Does the description provide enough context for future refinement? +7. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)? +8. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`? + +**Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. + ## Completion After completing this step: diff --git a/.gemini/commands/deepwork_jobs/uw.implement.toml b/.gemini/commands/deepwork_jobs/uw.implement.toml index 4cc5a98..322cbfd 100644 --- a/.gemini/commands/deepwork_jobs/uw.implement.toml +++ b/.gemini/commands/deepwork_jobs/uw.implement.toml @@ -309,6 +309,24 @@ Ensure all outputs are: - Well-formatted and complete - Ready for review or use by subsequent steps +## Quality Validation + +This step has quality criteria that should be verified before completion. + +### Quality Criteria + +1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? +2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? +3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic? +4. **Output Examples**: Does each instruction file show what good output looks like? +5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs? +6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? +7. **Sync Complete**: Has `deepwork sync` been run successfully? +8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? +9. **Policies Considered**: Has the agent thought about whether policies would benefit this job? If relevant policies were identified, did they explain them and offer to run `/deepwork_policy.define`? Not every job needs policies - only suggest when genuinely helpful. + +**Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. + ## Completion After completing this step: diff --git a/.gemini/commands/deepwork_policy/index.toml b/.gemini/commands/deepwork_policy/index.toml index 1276544..b636376 100644 --- a/.gemini/commands/deepwork_policy/index.toml +++ b/.gemini/commands/deepwork_policy/index.toml @@ -38,21 +38,26 @@ This job has 1 step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and guide the user through the steps in sequence. 1. **Analyze user intent** from the text that follows `/deepwork_policy` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: - define: Create or update policy entries in .deepwork.policy.yml -3. **Invoke the matched step**: Tell the user to run the appropriate command: - - For define: `/deepwork_policy:uw.define` +3. **Guide the user through the workflow**: + - Tell the user to run the starting step command + - After each step completes, guide them to the next step in the workflow + - Continue until the workflow is complete -4. **If intent is ambiguous**, ask the user which step they want: + Step commands: + - define: `/deepwork_policy:uw.define` + +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Let them choose -**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. +**Note**: Gemini CLI requires manual command invocation. After each step completes, remind the user to run the next step command. ## Context Files diff --git a/.gemini/commands/update/index.toml b/.gemini/commands/update/index.toml index 5c423cd..5491f37 100644 --- a/.gemini/commands/update/index.toml +++ b/.gemini/commands/update/index.toml @@ -36,21 +36,26 @@ This job has 1 step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and guide the user through the steps in sequence. 1. **Analyze user intent** from the text that follows `/update` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: - job: Edit standard job source files and sync to installed locations -3. **Invoke the matched step**: Tell the user to run the appropriate command: - - For job: `/update:uw.job` +3. **Guide the user through the workflow**: + - Tell the user to run the starting step command + - After each step completes, guide them to the next step in the workflow + - Continue until the workflow is complete -4. **If intent is ambiguous**, ask the user which step they want: + Step commands: + - job: `/update:uw.job` + +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Let them choose -**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. +**Note**: Gemini CLI requires manual command invocation. After each step completes, remind the user to run the next step command. ## Context Files diff --git a/.gemini/commands/update/uw.job.toml b/.gemini/commands/update/uw.job.toml index c38490e..85ffc7a 100644 --- a/.gemini/commands/update/uw.job.toml +++ b/.gemini/commands/update/uw.job.toml @@ -134,6 +134,19 @@ All work for this job should be done on a dedicated work branch: No specific files are output by this command. +## Quality Validation + +This step has quality criteria that should be verified before completion. + +### Quality Criteria + +1. Were changes made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/)? +2. Was `deepwork install --platform claude` run? +3. Do files in .deepwork/jobs/ match the source files? +4. Were command files in .claude/commands/ regenerated? + +**Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. + ## Completion After completing this step: diff --git a/src/deepwork/core/generator.py b/src/deepwork/core/generator.py index d3331d8..1ef90c0 100644 --- a/src/deepwork/core/generator.py +++ b/src/deepwork/core/generator.py @@ -199,6 +199,7 @@ def _build_step_context( "is_standalone": is_standalone, "hooks": hooks, # New: all hooks by platform event name "stop_hooks": stop_hooks, # Backward compat: after_agent hooks only + "quality_criteria": step.quality_criteria, # Declarative criteria with framing } def _build_meta_command_context( diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py index c7b6a60..28eb7a0 100644 --- a/src/deepwork/core/parser.py +++ b/src/deepwork/core/parser.py @@ -111,6 +111,9 @@ class Step: # If true, step command is visible (no tilde prefix). Default: false (hidden). exposed: bool = False + # Declarative quality criteria rendered with standard evaluation framing + quality_criteria: list[str] = field(default_factory=list) + @property def stop_hooks(self) -> list[HookAction]: """ @@ -148,6 +151,7 @@ def from_dict(cls, data: dict[str, Any]) -> "Step": dependencies=data.get("dependencies", []), hooks=hooks, exposed=data.get("exposed", False), + quality_criteria=data.get("quality_criteria", []), ) diff --git a/src/deepwork/schemas/job_schema.py b/src/deepwork/schemas/job_schema.py index b66fe24..25e0e88 100644 --- a/src/deepwork/schemas/job_schema.py +++ b/src/deepwork/schemas/job_schema.py @@ -208,6 +208,14 @@ "description": "If true, step command is visible (no tilde prefix). Default: false (hidden).", "default": False, }, + "quality_criteria": { + "type": "array", + "description": "Declarative quality criteria. Rendered with standard evaluation framing.", + "items": { + "type": "string", + "minLength": 1, + }, + }, }, "additionalProperties": False, }, diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml index f54f7f4..e87c03c 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml +++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml @@ -36,22 +36,15 @@ steps: outputs: - job.yml dependencies: [] - hooks: - after_agent: - - prompt: | - **AGENT: TAKE ACTION** - Verify the job.yml output meets ALL quality criteria before completing: - - 1. **User Understanding**: Did you fully understand the user's workflow by asking structured questions? - 2. **Structured Questions Used**: Did you ask structured questions (using the AskUserQuestion tool) to gather user input? - 3. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs? - 4. **Logical Dependencies**: Do step dependencies make sense and avoid circular references? - 5. **Concise Summary**: Is the summary under 200 characters and descriptive? - 6. **Rich Description**: Does the description provide enough context for future refinement? - 7. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)? - 8. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`? - - If ANY criterion is not met, continue working to address it. - If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. + quality_criteria: + - "**User Understanding**: Did the agent fully understand the user's workflow by asking structured questions?" + - "**Structured Questions Used**: Did the agent ask structured questions (using the AskUserQuestion tool) to gather user input?" + - "**Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs?" + - "**Logical Dependencies**: Do step dependencies make sense and avoid circular references?" + - "**Concise Summary**: Is the summary under 200 characters and descriptive?" + - "**Rich Description**: Does the description provide enough context for future refinement?" + - "**Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)?" + - "**File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`?" - id: implement name: "Implement Job Steps" @@ -95,21 +88,14 @@ steps: outputs: - AGENTS.md dependencies: [] - hooks: - after_agent: - - prompt: | - **AGENT: TAKE ACTION** - Verify the learning process meets ALL quality criteria before completing: - - 1. **Conversation Analyzed**: Did you review the conversation for DeepWork job executions? - 2. **Confusion Identified**: Did you identify points of confusion, errors, or inefficiencies? - 3. **Instructions Improved**: Were job instructions updated to address identified issues? - 4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity? - 5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files? - 6. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md? - 7. **File References Used**: Do AGENTS.md entries reference other files where appropriate? - 8. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job? - 9. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md? - 10. **Sync Complete**: Has `deepwork sync` been run if instructions were modified? - - If ANY criterion is not met, continue working to address it. - If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. + quality_criteria: + - "**Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?" + - "**Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?" + - "**Instructions Improved**: Were job instructions updated to address identified issues?" + - "**Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?" + - "**Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?" + - "**Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?" + - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?" + - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?" + - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?" + - "**Sync Complete**: Has `deepwork sync` been run if instructions were modified?" diff --git a/src/deepwork/templates/claude/command-job-meta.md.jinja b/src/deepwork/templates/claude/command-job-meta.md.jinja index 8faf7ed..66de204 100644 --- a/src/deepwork/templates/claude/command-job-meta.md.jinja +++ b/src/deepwork/templates/claude/command-job-meta.md.jinja @@ -25,25 +25,28 @@ This job has {{ total_steps }} step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and run through the steps in sequence. 1. **Analyze user intent** from the text that follows `/{{ job_name }}` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: {% for step in steps %} - {{ step.id }}: {{ step.description }} {% endfor %} -3. **Invoke the matched step** using the Skill tool: - ``` - Skill: - ``` +3. **Run the workflow** starting from the identified step: + - Invoke the starting step using the Skill tool + - When that step completes, **automatically continue** to the next step in the workflow + - Continue until the workflow is complete or the user intervenes -4. **If intent is ambiguous**, ask the user which step they want: +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Use AskUserQuestion to let them choose -**Critical**: You MUST invoke the step using the Skill tool. Do not copy/paste the step's instructions. The Skill tool invocation ensures the step's quality validation hooks fire. +**Critical**: +- You MUST invoke each step using the Skill tool. Do not copy/paste step instructions. +- After each step completes, check if there's a next step and invoke it automatically. +- The workflow continues until all dependent steps are complete. ## Context Files diff --git a/src/deepwork/templates/claude/command-job-step.md.jinja b/src/deepwork/templates/claude/command-job-step.md.jinja index ff7c283..5793dc3 100644 --- a/src/deepwork/templates/claude/command-job-step.md.jinja +++ b/src/deepwork/templates/claude/command-job-step.md.jinja @@ -1,23 +1,19 @@ --- description: {{ step_description }} -{% if hooks %} +{% if quality_criteria or hooks %} hooks: -{% for event_name, event_hooks in hooks.items() %} - {{ event_name }}: +{% if quality_criteria %} + Stop: - hooks: -{% for hook in event_hooks %} -{% if hook.type == "script" %} - - type: command - command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}" -{% else %} - type: prompt prompt: | -{% if event_name == "Stop" %} You must evaluate whether Claude has met all the below quality criteria for the request. ## Quality Criteria - {{ hook.content | indent(12) }} +{% for criterion in quality_criteria %} + {{ loop.index }}. {{ criterion }} +{% endfor %} ## Instructions @@ -27,13 +23,24 @@ hooks: If the agent has included `✓ Quality Criteria Met` in their response AND all criteria appear to be met, respond with: {"ok": true} - If criteria are NOT met AND the promise tag is missing, respond with: - {"ok": false, "reason": "Continue working. [specific feedback on what's wrong]"} + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} +{% endif %} +{% for event_name, event_hooks in hooks.items() %} +{% if not (event_name == "Stop" and quality_criteria) %} + {{ event_name }}: + - hooks: +{% for hook in event_hooks %} +{% if hook.type == "script" %} + - type: command + command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}" {% else %} + - type: prompt + prompt: | {{ hook.content | indent(12) }} {% endif %} -{% endif %} {% endfor %} +{% endif %} {% endfor %} {% endif %} --- @@ -122,20 +129,24 @@ Ensure all outputs are: No specific files are output by this command. {% endif %} -{% if stop_hooks %} +{% if quality_criteria or stop_hooks %} ## Quality Validation Loop This step uses an iterative quality validation loop. After completing your work, stop hook(s) will evaluate whether the outputs meet quality criteria. If criteria are not met, you will be prompted to continue refining. +{% if quality_criteria %} +### Quality Criteria + +{% for criterion in quality_criteria %} +{{ loop.index }}. {{ criterion }} +{% endfor %} +{% endif %} + {% for hook in stop_hooks %} {% if hook.type == "script" %} **Validation Script**: `.deepwork/jobs/{{ job_name }}/{{ hook.path }}` The validation script will be executed automatically when you attempt to complete this step. -{% else %} -### Quality Criteria{% if stop_hooks | length > 1 %} ({{ loop.index }}){% endif %} - -{{ hook.content }} {% endif %} {% endfor %} diff --git a/src/deepwork/templates/gemini/command-job-meta.toml.jinja b/src/deepwork/templates/gemini/command-job-meta.toml.jinja index 24c09ef..d0ffefa 100644 --- a/src/deepwork/templates/gemini/command-job-meta.toml.jinja +++ b/src/deepwork/templates/gemini/command-job-meta.toml.jinja @@ -30,25 +30,30 @@ This job has {{ total_steps }} step(s): ## Instructions -Determine what the user wants to do and route to the appropriate step. +This is a **multi-step workflow**. Determine the starting point and guide the user through the steps in sequence. 1. **Analyze user intent** from the text that follows `/{{ job_name }}` -2. **Match intent to a step**: +2. **Identify the starting step** based on intent: {% for step in steps %} - {{ step.id }}: {{ step.description }} {% endfor %} -3. **Invoke the matched step**: Tell the user to run the appropriate command: +3. **Guide the user through the workflow**: + - Tell the user to run the starting step command + - After each step completes, guide them to the next step in the workflow + - Continue until the workflow is complete + + Step commands: {% for step in steps %} - - For {{ step.id }}: `/{{ step.command_name }}` + - {{ step.id }}: `/{{ step.command_name }}` {% endfor %} -4. **If intent is ambiguous**, ask the user which step they want: +4. **If intent is ambiguous**, ask the user which step to start from: - Present the available steps as numbered options - Let them choose -**Note**: Gemini CLI doesn't support programmatic command invocation, so guide the user to run the appropriate step command. +**Note**: Gemini CLI requires manual command invocation. After each step completes, remind the user to run the next step command. ## Context Files diff --git a/src/deepwork/templates/gemini/command-job-step.toml.jinja b/src/deepwork/templates/gemini/command-job-step.toml.jinja index bb5d69b..47081b8 100644 --- a/src/deepwork/templates/gemini/command-job-step.toml.jinja +++ b/src/deepwork/templates/gemini/command-job-step.toml.jinja @@ -92,18 +92,18 @@ Ensure all outputs are: No specific files are output by this command. {% endif %} -{% if stop_hooks %} +{% if quality_criteria or stop_hooks %} ## Quality Validation This step has quality criteria that should be verified before completion. -{% for hook in stop_hooks %} -{% if hook.type != "script" %} -### Quality Criteria{% if stop_hooks | length > 1 %} ({{ loop.index }}){% endif %} +{% if quality_criteria %} +### Quality Criteria -{{ hook.content }} -{% endif %} +{% for criterion in quality_criteria %} +{{ loop.index }}. {{ criterion }} {% endfor %} +{% endif %} **Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. diff --git a/tests/unit/test_stop_hooks.py b/tests/unit/test_stop_hooks.py index 0e30117..7a6ecaf 100644 --- a/tests/unit/test_stop_hooks.py +++ b/tests/unit/test_stop_hooks.py @@ -620,3 +620,369 @@ def test_build_context_multiple_hooks( assert context["stop_hooks"][0]["type"] == "prompt" assert context["stop_hooks"][1]["type"] == "script" assert context["stop_hooks"][2]["type"] == "prompt" + + +class TestQualityCriteriaSchema: + """Tests for quality_criteria schema validation.""" + + def test_valid_quality_criteria(self) -> None: + """Test schema accepts valid quality_criteria array.""" + job_data = { + "name": "test_job", + "version": "1.0.0", + "summary": "Test job", + "steps": [ + { + "id": "step1", + "name": "Step 1", + "description": "A step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "quality_criteria": [ + "First criterion", + "Second criterion", + "Third criterion", + ], + } + ], + } + validate_against_schema(job_data, JOB_SCHEMA) + + def test_empty_quality_criteria(self) -> None: + """Test schema accepts empty quality_criteria array.""" + job_data = { + "name": "test_job", + "version": "1.0.0", + "summary": "Test job", + "steps": [ + { + "id": "step1", + "name": "Step 1", + "description": "A step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "quality_criteria": [], + } + ], + } + validate_against_schema(job_data, JOB_SCHEMA) + + def test_quality_criteria_with_hooks(self) -> None: + """Test schema accepts both quality_criteria and hooks.""" + job_data = { + "name": "test_job", + "version": "1.0.0", + "summary": "Test job", + "steps": [ + { + "id": "step1", + "name": "Step 1", + "description": "A step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "quality_criteria": ["First criterion"], + "hooks": { + "after_agent": [{"script": "hooks/run_tests.sh"}], + }, + } + ], + } + validate_against_schema(job_data, JOB_SCHEMA) + + def test_invalid_quality_criteria_not_array(self) -> None: + """Test schema rejects non-array quality_criteria.""" + job_data = { + "name": "test_job", + "version": "1.0.0", + "summary": "Test job", + "steps": [ + { + "id": "step1", + "name": "Step 1", + "description": "A step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "quality_criteria": "not an array", + } + ], + } + with pytest.raises(ValidationError): + validate_against_schema(job_data, JOB_SCHEMA) + + def test_invalid_quality_criteria_empty_string(self) -> None: + """Test schema rejects empty string in quality_criteria.""" + job_data = { + "name": "test_job", + "version": "1.0.0", + "summary": "Test job", + "steps": [ + { + "id": "step1", + "name": "Step 1", + "description": "A step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "quality_criteria": ["Valid criterion", ""], + } + ], + } + with pytest.raises(ValidationError): + validate_against_schema(job_data, JOB_SCHEMA) + + +class TestStepQualityCriteria: + """Tests for Step with quality_criteria.""" + + def test_step_with_no_quality_criteria(self) -> None: + """Test step without quality_criteria.""" + step = Step( + id="test", + name="Test Step", + description="A test step", + instructions_file="steps/test.md", + outputs=["output.md"], + ) + assert step.quality_criteria == [] + + def test_step_with_quality_criteria(self) -> None: + """Test step with quality_criteria.""" + step = Step( + id="test", + name="Test Step", + description="A test step", + instructions_file="steps/test.md", + outputs=["output.md"], + quality_criteria=["First criterion", "Second criterion"], + ) + assert len(step.quality_criteria) == 2 + assert step.quality_criteria[0] == "First criterion" + assert step.quality_criteria[1] == "Second criterion" + + def test_step_from_dict_with_quality_criteria(self) -> None: + """Test Step.from_dict parses quality_criteria.""" + data = { + "id": "test", + "name": "Test Step", + "description": "A test step", + "instructions_file": "steps/test.md", + "outputs": ["output.md"], + "quality_criteria": ["Criterion 1", "Criterion 2", "Criterion 3"], + } + step = Step.from_dict(data) + assert len(step.quality_criteria) == 3 + assert step.quality_criteria[0] == "Criterion 1" + assert step.quality_criteria[2] == "Criterion 3" + + def test_step_from_dict_without_quality_criteria(self) -> None: + """Test Step.from_dict with no quality_criteria returns empty list.""" + data = { + "id": "test", + "name": "Test Step", + "description": "A test step", + "instructions_file": "steps/test.md", + "outputs": ["output.md"], + } + step = Step.from_dict(data) + assert step.quality_criteria == [] + + def test_step_with_both_quality_criteria_and_hooks(self) -> None: + """Test step with both quality_criteria and hooks.""" + step = Step( + id="test", + name="Test Step", + description="A test step", + instructions_file="steps/test.md", + outputs=["output.md"], + quality_criteria=["Check this", "Check that"], + hooks={"after_agent": [HookAction(script="hooks/run_tests.sh")]}, + ) + assert len(step.quality_criteria) == 2 + assert len(step.stop_hooks) == 1 + assert step.stop_hooks[0].is_script() + + +class TestGeneratorQualityCriteria: + """Tests for generator quality_criteria context building.""" + + @pytest.fixture + def generator(self, tmp_path: Path) -> CommandGenerator: + """Create generator with temp templates.""" + templates_dir = tmp_path / "templates" + claude_dir = templates_dir / "claude" + claude_dir.mkdir(parents=True) + + # Create minimal template + template_content = """--- +description: {{ step_description }} +{% if quality_criteria or hooks %} +hooks: +{% if quality_criteria %} + Stop: + - hooks: + - type: prompt + prompt: | + Quality Criteria: +{% for criterion in quality_criteria %} + {{ loop.index }}. {{ criterion }} +{% endfor %} +{% endif %} +{% for event_name, event_hooks in hooks.items() %} + {{ event_name }}: + - hooks: +{% for hook in event_hooks %} +{% if hook.type == "script" %} + - type: command + command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}" +{% else %} + - type: prompt + prompt: "{{ hook.content }}" +{% endif %} +{% endfor %} +{% endfor %} +{% endif %} +--- +# {{ job_name }}.{{ step_id }} +{{ instructions_content }} +""" + (claude_dir / "command-job-step.md.jinja").write_text(template_content) + return CommandGenerator(templates_dir) + + def test_build_context_with_quality_criteria( + self, generator: CommandGenerator, tmp_path: Path + ) -> None: + """Test context building includes quality_criteria.""" + job_dir = tmp_path / "test_job" + job_dir.mkdir() + steps_dir = job_dir / "steps" + steps_dir.mkdir() + (steps_dir / "step1.md").write_text("# Step 1 Instructions") + + job = JobDefinition( + name="test_job", + version="1.0.0", + summary="Test job", + description="A test job", + steps=[ + Step( + id="step1", + name="Step 1", + description="First step", + instructions_file="steps/step1.md", + outputs=["output.md"], + quality_criteria=["Check A", "Check B"], + ), + ], + job_dir=job_dir, + ) + + adapter = ClaudeAdapter() + context = generator._build_step_context(job, job.steps[0], 0, adapter) + assert "quality_criteria" in context + assert len(context["quality_criteria"]) == 2 + assert context["quality_criteria"][0] == "Check A" + assert context["quality_criteria"][1] == "Check B" + + def test_build_context_quality_criteria_empty( + self, generator: CommandGenerator, tmp_path: Path + ) -> None: + """Test context with no quality_criteria.""" + job_dir = tmp_path / "test_job" + job_dir.mkdir() + steps_dir = job_dir / "steps" + steps_dir.mkdir() + (steps_dir / "step1.md").write_text("# Step 1") + + job = JobDefinition( + name="test_job", + version="1.0.0", + summary="Test", + description="Test", + steps=[ + Step( + id="step1", + name="Step 1", + description="Step", + instructions_file="steps/step1.md", + outputs=["out.md"], + ) + ], + job_dir=job_dir, + ) + + adapter = ClaudeAdapter() + context = generator._build_step_context(job, job.steps[0], 0, adapter) + assert context["quality_criteria"] == [] + + def test_build_context_quality_criteria_and_hooks( + self, generator: CommandGenerator, tmp_path: Path + ) -> None: + """Test context with both quality_criteria and hooks.""" + job_dir = tmp_path / "test_job" + job_dir.mkdir() + steps_dir = job_dir / "steps" + steps_dir.mkdir() + (steps_dir / "step1.md").write_text("# Step 1") + + job = JobDefinition( + name="test_job", + version="1.0.0", + summary="Test", + description="Test", + steps=[ + Step( + id="step1", + name="Step 1", + description="Step", + instructions_file="steps/step1.md", + outputs=["out.md"], + quality_criteria=["Criterion A", "Criterion B"], + hooks={"after_agent": [HookAction(script="hooks/test.sh")]}, + ) + ], + job_dir=job_dir, + ) + + adapter = ClaudeAdapter() + context = generator._build_step_context(job, job.steps[0], 0, adapter) + assert len(context["quality_criteria"]) == 2 + assert len(context["stop_hooks"]) == 1 + assert context["stop_hooks"][0]["type"] == "script" + + def test_generate_command_with_quality_criteria( + self, generator: CommandGenerator, tmp_path: Path + ) -> None: + """Test command generation includes quality_criteria in output.""" + job_dir = tmp_path / "test_job" + job_dir.mkdir() + steps_dir = job_dir / "steps" + steps_dir.mkdir() + (steps_dir / "step1.md").write_text("# Step 1 Instructions") + + job = JobDefinition( + name="test_job", + version="1.0.0", + summary="Test job", + description="A test job", + steps=[ + Step( + id="step1", + name="Step 1", + description="First step", + instructions_file="steps/step1.md", + outputs=["output.md"], + quality_criteria=["Check output format", "Verify completeness"], + ), + ], + job_dir=job_dir, + ) + + output_dir = tmp_path / "output" + output_dir.mkdir() + + adapter = ClaudeAdapter() + command_path = generator.generate_step_command(job, job.steps[0], adapter, output_dir) + content = command_path.read_text() + + assert "Quality Criteria:" in content + assert "1. Check output format" in content + assert "2. Verify completeness" in content From 58e712f2485bd690c78edcb57b4ff5b34376c8cd Mon Sep 17 00:00:00 2001 From: Noah Horton Date: Fri, 16 Jan 2026 18:14:17 -0700 Subject: [PATCH 6/6] Prevent mixing hook types in job schema validation Updates the job schema to enforce that each hook event (after_agent, before_tool, before_prompt) can only contain hooks of a single type - either all scripts, all prompts, or all prompt_files. Mixing types like having both a script and a prompt in the same event is now rejected. This prevents confusing configurations where script output would be followed by a prompt evaluation, which doesn't work as expected. Adds tests to verify: - Mixed script/prompt hooks are rejected - Mixed script/prompt_file hooks are rejected - Multiple scripts in same event are allowed - Multiple prompts in same event are allowed Co-Authored-By: Claude Opus 4.5 --- .claude/commands/deepwork_rules.md | 59 +++ .claude/commands/update.md | 2 +- .claude/commands/uw.add_platform.verify.md | 8 +- .claude/commands/uw.commit.format.md | 21 +- .claude/commands/uw.commit.test.md | 21 +- .../commands/uw.deepwork_jobs.implement.md | 42 +-- .claude/commands/uw.deepwork_rules.define.md | 339 +++++++++++++++++ .deepwork/jobs/add_platform/job.yml | 19 +- .deepwork/jobs/deepwork_jobs/job.yml | 29 +- .gemini/commands/add_platform/uw.verify.toml | 2 +- .gemini/commands/commit/uw.format.toml | 13 +- .gemini/commands/commit/uw.test.toml | 13 +- .../commands/deepwork_jobs/uw.implement.toml | 2 +- .gemini/commands/deepwork_rules/index.toml | 65 ++++ .../commands/deepwork_rules/uw.define.toml | 346 ++++++++++++++++++ .gemini/commands/update/index.toml | 2 +- src/deepwork/schemas/job_schema.py | 81 +++- .../standard_jobs/deepwork_jobs/job.yml | 29 +- tests/unit/test_stop_hooks.py | 98 +++++ 19 files changed, 1077 insertions(+), 114 deletions(-) create mode 100644 .claude/commands/deepwork_rules.md create mode 100644 .claude/commands/uw.deepwork_rules.define.md create mode 100644 .gemini/commands/deepwork_rules/index.toml create mode 100644 .gemini/commands/deepwork_rules/uw.define.toml diff --git a/.claude/commands/deepwork_rules.md b/.claude/commands/deepwork_rules.md new file mode 100644 index 0000000..7535c6e --- /dev/null +++ b/.claude/commands/deepwork_rules.md @@ -0,0 +1,59 @@ +--- +description: Rules enforcement for AI agent sessions +--- + +# deepwork_rules + +You are executing the **deepwork_rules** job. Rules enforcement for AI agent sessions + +Manages rules that automatically trigger when certain files change during an AI agent session. +Rules help ensure that code changes follow team guidelines, documentation is updated, +and architectural decisions are respected. + +Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/` +directory. Each rule file specifies: +- Detection mode: trigger/safety, set (bidirectional), or pair (directional) +- Patterns: Glob patterns for matching files, with optional variable capture +- Instructions: Markdown content describing what the agent should do + +Example use cases: +- Update installation docs when configuration files change +- Require security review when authentication code is modified +- Ensure API documentation stays in sync with API code +- Enforce source/test file pairing + + +## Available Steps + +This job has 1 step(s): + +### define +**Define Rule**: Create a new rule file in .deepwork/rules/ +- Command: `uw.deepwork_rules.define` + +## Instructions + +This is a **multi-step workflow**. Determine the starting point and run through the steps in sequence. + +1. **Analyze user intent** from the text that follows `/deepwork_rules` + +2. **Identify the starting step** based on intent: + - define: Create a new rule file in .deepwork/rules/ + +3. **Run the workflow** starting from the identified step: + - Invoke the starting step using the Skill tool + - When that step completes, **automatically continue** to the next step in the workflow + - Continue until the workflow is complete or the user intervenes + +4. **If intent is ambiguous**, ask the user which step to start from: + - Present the available steps as numbered options + - Use AskUserQuestion to let them choose + +**Critical**: +- You MUST invoke each step using the Skill tool. Do not copy/paste step instructions. +- After each step completes, check if there's a next step and invoke it automatically. +- The workflow continues until all dependent steps are complete. + +## Context Files + +- Job definition: `.deepwork/jobs/deepwork_rules/job.yml` \ No newline at end of file diff --git a/.claude/commands/update.md b/.claude/commands/update.md index 47e84ce..3c7b270 100644 --- a/.claude/commands/update.md +++ b/.claude/commands/update.md @@ -7,7 +7,7 @@ description: Update standard jobs in src/ and sync to installed locations You are executing the **update** job. Update standard jobs in src/ and sync to installed locations A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs -(like `deepwork_jobs` and `deepwork_policy`) are source-controlled in +(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in `src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/` or `.claude/commands/` directly. diff --git a/.claude/commands/uw.add_platform.verify.md b/.claude/commands/uw.add_platform.verify.md index ac8ab1b..72e7bc0 100644 --- a/.claude/commands/uw.add_platform.verify.md +++ b/.claude/commands/uw.add_platform.verify.md @@ -13,7 +13,7 @@ hooks: 2. Does running `deepwork install --platform ` complete without errors? 3. Are expected command files created in the platform's command directory? 4. Does command file content match the templates and job definitions? - 5. Are established DeepWork jobs (deepwork_jobs, deepwork_policy) installed correctly? + 5. Are established DeepWork jobs (deepwork_jobs, deepwork_rules) installed correctly? 6. Can the platform be used alongside existing platforms without conflicts? ## Instructions @@ -117,7 +117,7 @@ Ensure the implementation step is complete: - `deepwork_jobs.define.md` exists (or equivalent for the platform) - `deepwork_jobs.implement.md` exists - `deepwork_jobs.refine.md` exists - - `deepwork_policy.define.md` exists + - `deepwork_rules.define.md` exists - All expected step commands exist 4. **Validate command file content** @@ -147,7 +147,7 @@ Ensure the implementation step is complete: - `deepwork install --platform ` completes without errors - All expected command files are created: - deepwork_jobs.define, implement, refine - - deepwork_policy.define + - deepwork_rules.define - Any other standard job commands - Command file content is correct: - Matches platform's expected format @@ -214,7 +214,7 @@ This step uses an iterative quality validation loop. After completing your work, 2. Does running `deepwork install --platform ` complete without errors? 3. Are expected command files created in the platform's command directory? 4. Does command file content match the templates and job definitions? -5. Are established DeepWork jobs (deepwork_jobs, deepwork_policy) installed correctly? +5. Are established DeepWork jobs (deepwork_jobs, deepwork_rules) installed correctly? 6. Can the platform be used alongside existing platforms without conflicts? diff --git a/.claude/commands/uw.commit.format.md b/.claude/commands/uw.commit.format.md index 3b2032c..fde93d4 100644 --- a/.claude/commands/uw.commit.format.md +++ b/.claude/commands/uw.commit.format.md @@ -5,14 +5,6 @@ hooks: - hooks: - type: command command: ".deepwork/jobs/commit/hooks/run_ruff.sh" - - type: prompt - prompt: | - Evaluate the ruff format and lint check output above. - - **If ruff reported issues (exit code non-zero)**: Start your response with "**AGENT: TAKE ACTION** -" followed by what needs to be fixed. - - **If ruff reported no issues (exit code 0)**: Confirm the agent included `✓ Quality Criteria Met`. Allow completion. - --- # commit.format @@ -128,11 +120,20 @@ No file output is required. Success is determined by ruff passing all checks. - `uv run ruff check src/ tests/` passes (exit code 0) - Any fixes made don't break functionality (tests should still pass) - If issues couldn't be fixed in 5 attempts, clear explanation provided -- When all checks pass, include `✓ Quality Criteria Met` in your response + +## Hook Behavior + +After you complete this step, a hook will automatically run ruff format and lint checks and show you the results. + +**Interpreting the hook output:** +- **Both checks passed (exit code 0)**: The step is complete. Proceed to the next step. +- **Checks failed (exit code non-zero)**: You must fix the issues. Use `uv run ruff format src/ tests/` for formatting and `uv run ruff check --fix src/ tests/` for auto-fixable lint issues. For remaining issues, fix manually. The hook will re-run after each attempt. + +**Important**: The hook runs automatically - you don't need to run the checks yourself after fixing. Just focus on making fixes, and the hook will verify them. ## Context -This is the second step in the commit workflow, after tests pass. Code must be properly formatted and lint-free before committing. The format step uses a script hook that automatically runs ruff checks, so focus on analyzing results and making fixes efficiently. +This is the second step in the commit workflow, after tests pass. Code must be properly formatted and lint-free before committing. diff --git a/.claude/commands/uw.commit.test.md b/.claude/commands/uw.commit.test.md index 31fd454..e61a49f 100644 --- a/.claude/commands/uw.commit.test.md +++ b/.claude/commands/uw.commit.test.md @@ -5,14 +5,6 @@ hooks: - hooks: - type: command command: ".deepwork/jobs/commit/hooks/run_tests.sh" - - type: prompt - prompt: | - Evaluate the pytest output above. - - **If any tests failed**: Start your response with "**AGENT: TAKE ACTION** -" followed by which tests failed and why. - - **If ALL tests passed**: Confirm the agent included `✓ Quality Criteria Met`. Allow completion. - --- # commit.test @@ -100,11 +92,20 @@ No file output is required. Success is determined by all tests passing. - All tests pass (`uv run pytest tests/ -v` exits with code 0) - Any fixes made are minimal and don't break other functionality - If tests couldn't be fixed in 5 attempts, clear explanation provided -- When all tests pass, include `✓ Quality Criteria Met` in your response + +## Hook Behavior + +After you complete this step, a hook will automatically run `uv run pytest tests/ -v` and show you the results. + +**Interpreting the hook output:** +- **All tests passed (exit code 0)**: The step is complete. Proceed to the next step. +- **Tests failed (exit code non-zero)**: You must fix the failing tests. Analyze the output, make fixes, and try again. The hook will re-run after each attempt. + +**Important**: The hook runs automatically - you don't need to run pytest yourself after the initial run. Just focus on making fixes when tests fail, and the hook will verify your fixes. ## Context -This is the first step in the commit workflow. Tests must pass before code formatting is checked, ensuring that any changes being committed are functionally correct. The test step uses a script hook that automatically runs pytest, so focus on analyzing results and making fixes. +This is the first step in the commit workflow. Tests must pass before code formatting is checked, ensuring that any changes being committed are functionally correct. diff --git a/.claude/commands/uw.deepwork_jobs.implement.md b/.claude/commands/uw.deepwork_jobs.implement.md index e766e4c..2ad241f 100644 --- a/.claude/commands/uw.deepwork_jobs.implement.md +++ b/.claude/commands/uw.deepwork_jobs.implement.md @@ -17,7 +17,7 @@ hooks: 6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? 7. **Sync Complete**: Has `deepwork sync` been run successfully? 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? - 9. **Policies Considered**: Has the agent thought about whether policies would benefit this job? If relevant policies were identified, did they explain them and offer to run `/deepwork_policy.define`? Not every job needs policies - only suggest when genuinely helpful. + 9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful. ## Instructions @@ -192,19 +192,19 @@ This will: After running `deepwork sync`, look at the "To use the new commands" section in the output. **Relay these exact reload instructions to the user** so they know how to pick up the new commands. Don't just reference the sync output - tell them directly what they need to do (e.g., "Type 'exit' then run 'claude --resume'" for Claude Code, or "Run '/memory refresh'" for Gemini CLI). -### Step 7: Consider Policies for the New Job +### Step 7: Consider Rules for the New Job -After implementing the job, consider whether there are **policies** that would help enforce quality or consistency when working with this job's domain. +After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain. -**What are policies?** +**What are rules?** -Policies are automated guardrails defined in `.deepwork.policy.yml` that trigger when certain files change during an AI session. They help ensure: +Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure: - Documentation stays in sync with code - Team guidelines are followed - Architectural decisions are respected - Quality standards are maintained -**When to suggest policies:** +**When to suggest rules:** Think about the job you just implemented and ask: - Does this job produce outputs that other files depend on? @@ -212,28 +212,28 @@ Think about the job you just implemented and ask: - Are there quality checks or reviews that should happen when certain files in this domain change? - Could changes to the job's output files impact other parts of the project? -**Examples of policies that might make sense:** +**Examples of rules that might make sense:** -| Job Type | Potential Policy | -|----------|------------------| +| Job Type | Potential Rule | +|----------|----------------| | API Design | "Update API docs when endpoint definitions change" | | Database Schema | "Review migrations when schema files change" | | Competitive Research | "Update strategy docs when competitor analysis changes" | | Feature Development | "Update changelog when feature files change" | | Configuration Management | "Update install guide when config files change" | -**How to offer policy creation:** +**How to offer rule creation:** -If you identify one or more policies that would benefit the user, explain: -1. **What the policy would do** - What triggers it and what action it prompts +If you identify one or more rules that would benefit the user, explain: +1. **What the rule would do** - What triggers it and what action it prompts 2. **Why it would help** - How it prevents common mistakes or keeps things in sync 3. **What files it would watch** - The trigger patterns Then ask the user: -> "Would you like me to create this policy for you? I can run `/deepwork_policy.define` to set it up." +> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up." -If the user agrees, invoke the `/deepwork_policy.define` command to guide them through creating the policy. +If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule. **Example dialogue:** @@ -242,15 +242,15 @@ Based on the competitive_research job you just created, I noticed that when competitor analysis files change, it would be helpful to remind you to update your strategy documentation. -I'd suggest a policy like: +I'd suggest a rule like: - **Name**: "Update strategy when competitor analysis changes" - **Trigger**: `**/positioning_report.md` - **Action**: Prompt to review and update `docs/strategy.md` -Would you like me to create this policy? I can run `/deepwork_policy.define` to set it up. +Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up. ``` -**Note:** Not every job needs policies. Only suggest them when they would genuinely help maintain consistency or quality. Don't force policies where they don't make sense. +**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense. ## Example Implementation @@ -284,8 +284,8 @@ Before marking this step complete, ensure: - [ ] `deepwork sync` executed successfully - [ ] Commands generated in platform directory - [ ] User informed to follow reload instructions from `deepwork sync` -- [ ] Considered whether policies would benefit this job (Step 7) -- [ ] If policies suggested, offered to run `/deepwork_policy.define` +- [ ] Considered whether rules would benefit this job (Step 7) +- [ ] If rules suggested, offered to run `/deepwork_rules.define` ## Quality Criteria @@ -297,7 +297,7 @@ Before marking this step complete, ensure: - Steps with user inputs explicitly use "ask structured questions" phrasing - Sync completed successfully - Commands available for use -- Thoughtfully considered relevant policies for the job domain +- Thoughtfully considered relevant rules for the job domain ## Inputs @@ -346,7 +346,7 @@ This step uses an iterative quality validation loop. After completing your work, 6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? 7. **Sync Complete**: Has `deepwork sync` been run successfully? 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? -9. **Policies Considered**: Has the agent thought about whether policies would benefit this job? If relevant policies were identified, did they explain them and offer to run `/deepwork_policy.define`? Not every job needs policies - only suggest when genuinely helpful. +9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful. ### Completion Promise diff --git a/.claude/commands/uw.deepwork_rules.define.md b/.claude/commands/uw.deepwork_rules.define.md new file mode 100644 index 0000000..148247f --- /dev/null +++ b/.claude/commands/uw.deepwork_rules.define.md @@ -0,0 +1,339 @@ +--- +description: Create a new rule file in .deepwork/rules/ +--- + +# deepwork_rules.define + +**Standalone command** in the **deepwork_rules** job - can be run anytime + +**Summary**: Rules enforcement for AI agent sessions + +## Job Overview + +Manages rules that automatically trigger when certain files change during an AI agent session. +Rules help ensure that code changes follow team guidelines, documentation is updated, +and architectural decisions are respected. + +Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/` +directory. Each rule file specifies: +- Detection mode: trigger/safety, set (bidirectional), or pair (directional) +- Patterns: Glob patterns for matching files, with optional variable capture +- Instructions: Markdown content describing what the agent should do + +Example use cases: +- Update installation docs when configuration files change +- Require security review when authentication code is modified +- Ensure API documentation stays in sync with API code +- Enforce source/test file pairing + + + +## Instructions + +# Define Rule + +## Objective + +Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change. + +## Task + +Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.** + +**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices. + +### Step 1: Understand the Rule Purpose + +Start by asking structured questions to understand what the user wants to enforce: + +1. **What guideline or constraint should this rule enforce?** + - What situation triggers the need for action? + - What files or directories, when changed, should trigger this rule? + - Examples: "When config files change", "When API code changes", "When database schema changes" + +2. **What action should be taken?** + - What should the agent do when the rule triggers? + - Update documentation? Perform a security review? Update tests? + - Is there a specific file or process that needs attention? + +3. **Are there any "safety" conditions?** + - Are there files that, if also changed, mean the rule doesn't need to fire? + - For example: If config changes AND install_guide.md changes, assume docs are already updated + - This prevents redundant prompts when the user has already done the right thing + +### Step 2: Choose the Detection Mode + +Help the user select the appropriate detection mode: + +**Trigger/Safety Mode** (most common): +- Fires when trigger patterns match AND no safety patterns match +- Use for: "When X changes, check Y" rules +- Example: When config changes, verify install docs + +**Set Mode** (bidirectional correspondence): +- Fires when files that should change together don't all change +- Use for: Source/test pairing, model/migration sync +- Example: `src/foo.py` and `tests/foo_test.py` should change together + +**Pair Mode** (directional correspondence): +- Fires when a trigger file changes but expected files don't +- Changes to expected files alone do NOT trigger +- Use for: API code requires documentation updates (but docs can update independently) + +### Step 3: Define the Patterns + +Help the user define glob patterns for files. + +**Common patterns:** +- `src/**/*.py` - All Python files in src directory (recursive) +- `app/config/**/*` - All files in app/config directory +- `*.md` - All markdown files in root +- `src/api/**/*` - All files in the API directory +- `migrations/**/*.sql` - All SQL migrations + +**Variable patterns (for set/pair modes):** +- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`) +- `tests/{path}_test.py` - Uses same path variable in corresponding file +- `{name}` matches single segment, `{path}` matches multiple segments + +**Pattern syntax:** +- `*` - Matches any characters within a single path segment +- `**` - Matches any characters across multiple path segments (recursive) +- `?` - Matches a single character + +### Step 4: Choose the Comparison Mode (Optional) + +The `compare_to` field controls what baseline is used when detecting "changed files": + +**Options:** +- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches. +- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production. +- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes. + +Most rules should use the default (`base`) and don't need to specify `compare_to`. + +### Step 5: Write the Instructions + +Create clear, actionable instructions for what the agent should do when the rule fires. + +**Good instructions include:** +- What to check or review +- What files might need updating +- Specific actions to take +- Quality criteria for completion + +**Template variables available in instructions:** +- `{trigger_files}` - Files that triggered the rule +- `{expected_files}` - Expected corresponding files (for set/pair modes) + +### Step 6: Create the Rule File + +Create a new file in `.deepwork/rules/` with a kebab-case filename: + +**File Location**: `.deepwork/rules/{rule-name}.md` + +**Format for Trigger/Safety Mode:** +```markdown +--- +name: Friendly Name for the Rule +trigger: "glob/pattern/**/*" # or array: ["pattern1", "pattern2"] +safety: "optional/pattern" # optional, or array +compare_to: base # optional: "base" (default), "default_tip", or "prompt" +--- +Instructions for the agent when this rule fires. + +Multi-line markdown content is supported. +``` + +**Format for Set Mode (bidirectional):** +```markdown +--- +name: Source/Test Pairing +set: + - src/{path}.py + - tests/{path}_test.py +--- +Source and test files should change together. + +Modified: {trigger_files} +Expected: {expected_files} +``` + +**Format for Pair Mode (directional):** +```markdown +--- +name: API Documentation +pair: + trigger: api/{path}.py + expects: docs/api/{path}.md +--- +API code requires documentation updates. + +Changed API: {trigger_files} +Update docs: {expected_files} +``` + +### Step 7: Verify the Rule + +After creating the rule: + +1. **Check the YAML frontmatter** - Ensure valid YAML formatting +2. **Test trigger patterns** - Verify patterns match intended files +3. **Review instructions** - Ensure they're clear and actionable +4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones + +## Example Rules + +### Update Documentation on Config Changes +`.deepwork/rules/config-docs.md`: +```markdown +--- +name: Update Install Guide on Config Changes +trigger: app/config/**/* +safety: docs/install_guide.md +--- +Configuration files have been modified. Please review docs/install_guide.md +and update it if any installation instructions need to change based on the +new configuration. +``` + +### Security Review for Auth Code +`.deepwork/rules/security-review.md`: +```markdown +--- +name: Security Review for Authentication Changes +trigger: + - src/auth/**/* + - src/security/**/* +safety: + - SECURITY.md + - docs/security_audit.md +--- +Authentication or security code has been changed. Please: + +1. Review for hardcoded credentials or secrets +2. Check input validation on user inputs +3. Verify access control logic is correct +4. Update security documentation if needed +``` + +### Source/Test Pairing +`.deepwork/rules/source-test-pairing.md`: +```markdown +--- +name: Source/Test Pairing +set: + - src/{path}.py + - tests/{path}_test.py +--- +Source and test files should change together. + +When modifying source code, ensure corresponding tests are updated. +When adding tests, ensure they test actual source code. + +Modified: {trigger_files} +Expected: {expected_files} +``` + +### API Documentation Sync +`.deepwork/rules/api-docs.md`: +```markdown +--- +name: API Documentation Update +pair: + trigger: src/api/{path}.py + expects: docs/api/{path}.md +--- +API code has changed. Please verify that API documentation in docs/api/ +is up to date with the code changes. Pay special attention to: + +- New or changed endpoints +- Modified request/response schemas +- Updated authentication requirements + +Changed API: {trigger_files} +Update: {expected_files} +``` + +## Output Format + +### .deepwork/rules/{rule-name}.md +Create a new file with the rule definition using YAML frontmatter and markdown body. + +## Quality Criteria + +- Asked structured questions to understand user requirements +- Rule name is clear and descriptive (used in promise tags) +- Correct detection mode selected for the use case +- Patterns accurately match the intended files +- Safety patterns prevent unnecessary triggering (if applicable) +- Instructions are actionable and specific +- YAML frontmatter is valid + +## Context + +Rules are evaluated automatically when the agent finishes a task. The system: +1. Determines which files have changed based on each rule's `compare_to` setting +2. Evaluates rules based on their detection mode (trigger/safety, set, or pair) +3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched +4. Prompts you with instructions for any triggered rules + +You can mark a rule as addressed by including `Rule Name` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements. + + +## Inputs + +### User Parameters + +Please gather the following information from the user: +- **rule_purpose**: What guideline or constraint should this rule enforce? + + +## Work Branch Management + +All work for this job should be done on a dedicated work branch: + +1. **Check current branch**: + - If already on a work branch for this job (format: `deepwork/deepwork_rules-[instance]-[date]`), continue using it + - If on main/master, create a new work branch + +2. **Create work branch** (if needed): + ```bash + git checkout -b deepwork/deepwork_rules-[instance]-$(date +%Y%m%d) + ``` + Replace `[instance]` with a descriptive identifier (e.g., `acme`, `q1-launch`, etc.) + +## Output Requirements + +Create the following output(s): +- `.deepwork/rules/{rule-name}.md` +Ensure all outputs are: +- Well-formatted and complete +- Ready for review or use by subsequent steps + +## Completion + +After completing this step: + +1. **Verify outputs**: Confirm all required files have been created + +2. **Inform the user**: + - The define command is complete + - Outputs created: .deepwork/rules/{rule-name}.md + - This command can be run again anytime to make further changes + +## Command Complete + +This is a standalone command that can be run anytime. The outputs are ready for use. + +Consider: +- Reviewing the outputs +- Running `deepwork sync` if job definitions were changed +- Re-running this command later if further changes are needed + +--- + +## Context Files + +- Job definition: `.deepwork/jobs/deepwork_rules/job.yml` +- Step instructions: `.deepwork/jobs/deepwork_rules/steps/define.md` \ No newline at end of file diff --git a/.deepwork/jobs/add_platform/job.yml b/.deepwork/jobs/add_platform/job.yml index ac51019..bde55d9 100644 --- a/.deepwork/jobs/add_platform/job.yml +++ b/.deepwork/jobs/add_platform/job.yml @@ -103,15 +103,10 @@ steps: outputs: [] dependencies: - implement - hooks: - after_agent: - - prompt: | - **AGENT: TAKE ACTION** - Verify the installation meets ALL criteria: - 1. Platform-specific directories/files are added to the deepwork repo as needed - 2. Running `deepwork install --platform ` completes without errors - 3. Expected command files are created in the platform's command directory - 4. Command file content matches the templates and job definitions - 5. Established DeepWork jobs (deepwork_jobs, deepwork_rules) are installed correctly - 6. The platform can be used alongside existing platforms without conflicts - - If ALL criteria are met, include `✓ Quality Criteria Met`. + quality_criteria: + - "Are platform-specific directories/files added to the deepwork repo as needed?" + - "Does running `deepwork install --platform ` complete without errors?" + - "Are expected command files created in the platform's command directory?" + - "Does command file content match the templates and job definitions?" + - "Are established DeepWork jobs (deepwork_jobs, deepwork_rules) installed correctly?" + - "Can the platform be used alongside existing platforms without conflicts?" diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml index e87c03c..d311ab2 100644 --- a/.deepwork/jobs/deepwork_jobs/job.yml +++ b/.deepwork/jobs/deepwork_jobs/job.yml @@ -57,25 +57,16 @@ steps: - steps/ dependencies: - define - hooks: - after_agent: - - prompt: | - **AGENT: TAKE ACTION** - Verify the implementation meets ALL quality criteria before completing: - - 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? - 2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? - 3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic? - 4. **Output Examples**: Does each instruction file show what good output looks like? - 5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs? - 6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? - 7. **Sync Complete**: Has `deepwork sync` been run successfully? - 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? - 9. **Rules Considered**: Have you thought about whether rules would benefit this job? - - If relevant rules were identified, did you explain them and offer to run `/deepwork_rules.define`? - - Not every job needs rules - only suggest when genuinely helpful. - - If ANY criterion is not met, continue working to address it. - If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. + quality_criteria: + - "**Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?" + - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?" + - "**Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?" + - "**Output Examples**: Does each instruction file show what good output looks like?" + - "**Quality Criteria**: Does each instruction file define quality criteria for its outputs?" + - "**Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase \"ask structured questions\"?" + - "**Sync Complete**: Has `deepwork sync` been run successfully?" + - "**Commands Available**: Are the slash-commands generated in `.claude/commands/`?" + - "**Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful." - id: learn name: "Learn from Job Execution" diff --git a/.gemini/commands/add_platform/uw.verify.toml b/.gemini/commands/add_platform/uw.verify.toml index 54c1dcd..38eaf83 100644 --- a/.gemini/commands/add_platform/uw.verify.toml +++ b/.gemini/commands/add_platform/uw.verify.toml @@ -193,7 +193,7 @@ This step has quality criteria that should be verified before completion. 2. Does running `deepwork install --platform ` complete without errors? 3. Are expected command files created in the platform's command directory? 4. Does command file content match the templates and job definitions? -5. Are established DeepWork jobs (deepwork_jobs, deepwork_policy) installed correctly? +5. Are established DeepWork jobs (deepwork_jobs, deepwork_rules) installed correctly? 6. Can the platform be used alongside existing platforms without conflicts? **Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. diff --git a/.gemini/commands/commit/uw.format.toml b/.gemini/commands/commit/uw.format.toml index 6972b7e..437d60d 100644 --- a/.gemini/commands/commit/uw.format.toml +++ b/.gemini/commands/commit/uw.format.toml @@ -120,11 +120,20 @@ No file output is required. Success is determined by ruff passing all checks. - `uv run ruff check src/ tests/` passes (exit code 0) - Any fixes made don't break functionality (tests should still pass) - If issues couldn't be fixed in 5 attempts, clear explanation provided -- When all checks pass, include `✓ Quality Criteria Met` in your response + +## Hook Behavior + +After you complete this step, a hook will automatically run ruff format and lint checks and show you the results. + +**Interpreting the hook output:** +- **Both checks passed (exit code 0)**: The step is complete. Proceed to the next step. +- **Checks failed (exit code non-zero)**: You must fix the issues. Use `uv run ruff format src/ tests/` for formatting and `uv run ruff check --fix src/ tests/` for auto-fixable lint issues. For remaining issues, fix manually. The hook will re-run after each attempt. + +**Important**: The hook runs automatically - you don't need to run the checks yourself after fixing. Just focus on making fixes, and the hook will verify them. ## Context -This is the second step in the commit workflow, after tests pass. Code must be properly formatted and lint-free before committing. The format step uses a script hook that automatically runs ruff checks, so focus on analyzing results and making fixes efficiently. +This is the second step in the commit workflow, after tests pass. Code must be properly formatted and lint-free before committing. diff --git a/.gemini/commands/commit/uw.test.toml b/.gemini/commands/commit/uw.test.toml index 7766636..7086f09 100644 --- a/.gemini/commands/commit/uw.test.toml +++ b/.gemini/commands/commit/uw.test.toml @@ -92,11 +92,20 @@ No file output is required. Success is determined by all tests passing. - All tests pass (`uv run pytest tests/ -v` exits with code 0) - Any fixes made are minimal and don't break other functionality - If tests couldn't be fixed in 5 attempts, clear explanation provided -- When all tests pass, include `✓ Quality Criteria Met` in your response + +## Hook Behavior + +After you complete this step, a hook will automatically run `uv run pytest tests/ -v` and show you the results. + +**Interpreting the hook output:** +- **All tests passed (exit code 0)**: The step is complete. Proceed to the next step. +- **Tests failed (exit code non-zero)**: You must fix the failing tests. Analyze the output, make fixes, and try again. The hook will re-run after each attempt. + +**Important**: The hook runs automatically - you don't need to run pytest yourself after the initial run. Just focus on making fixes when tests fail, and the hook will verify your fixes. ## Context -This is the first step in the commit workflow. Tests must pass before code formatting is checked, ensuring that any changes being committed are functionally correct. The test step uses a script hook that automatically runs pytest, so focus on analyzing results and making fixes. +This is the first step in the commit workflow. Tests must pass before code formatting is checked, ensuring that any changes being committed are functionally correct. diff --git a/.gemini/commands/deepwork_jobs/uw.implement.toml b/.gemini/commands/deepwork_jobs/uw.implement.toml index 322cbfd..4170a65 100644 --- a/.gemini/commands/deepwork_jobs/uw.implement.toml +++ b/.gemini/commands/deepwork_jobs/uw.implement.toml @@ -323,7 +323,7 @@ This step has quality criteria that should be verified before completion. 6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? 7. **Sync Complete**: Has `deepwork sync` been run successfully? 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? -9. **Policies Considered**: Has the agent thought about whether policies would benefit this job? If relevant policies were identified, did they explain them and offer to run `/deepwork_policy.define`? Not every job needs policies - only suggest when genuinely helpful. +9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful. **Note**: Gemini CLI does not support automated validation hooks. Please manually verify the criteria above before proceeding. diff --git a/.gemini/commands/deepwork_rules/index.toml b/.gemini/commands/deepwork_rules/index.toml new file mode 100644 index 0000000..b3c6a0f --- /dev/null +++ b/.gemini/commands/deepwork_rules/index.toml @@ -0,0 +1,65 @@ +# deepwork_rules +# +# Rules enforcement for AI agent sessions +# +# Generated by DeepWork - do not edit manually + +description = "Rules enforcement for AI agent sessions" + +prompt = """ +# deepwork_rules + +You are executing the **deepwork_rules** job. Rules enforcement for AI agent sessions + +Manages rules that automatically trigger when certain files change during an AI agent session. +Rules help ensure that code changes follow team guidelines, documentation is updated, +and architectural decisions are respected. + +Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/` +directory. Each rule file specifies: +- Detection mode: trigger/safety, set (bidirectional), or pair (directional) +- Patterns: Glob patterns for matching files, with optional variable capture +- Instructions: Markdown content describing what the agent should do + +Example use cases: +- Update installation docs when configuration files change +- Require security review when authentication code is modified +- Ensure API documentation stays in sync with API code +- Enforce source/test file pairing + + +## Available Steps + +This job has 1 step(s): + +### define +**Define Rule**: Create a new rule file in .deepwork/rules/ +- Command: `/deepwork_rules:uw.define` + +## Instructions + +This is a **multi-step workflow**. Determine the starting point and guide the user through the steps in sequence. + +1. **Analyze user intent** from the text that follows `/deepwork_rules` + +2. **Identify the starting step** based on intent: + - define: Create a new rule file in .deepwork/rules/ + +3. **Guide the user through the workflow**: + - Tell the user to run the starting step command + - After each step completes, guide them to the next step in the workflow + - Continue until the workflow is complete + + Step commands: + - define: `/deepwork_rules:uw.define` + +4. **If intent is ambiguous**, ask the user which step to start from: + - Present the available steps as numbered options + - Let them choose + +**Note**: Gemini CLI requires manual command invocation. After each step completes, remind the user to run the next step command. + +## Context Files + +- Job definition: `.deepwork/jobs/deepwork_rules/job.yml` +""" \ No newline at end of file diff --git a/.gemini/commands/deepwork_rules/uw.define.toml b/.gemini/commands/deepwork_rules/uw.define.toml new file mode 100644 index 0000000..28d6d5b --- /dev/null +++ b/.gemini/commands/deepwork_rules/uw.define.toml @@ -0,0 +1,346 @@ +# deepwork_rules:define +# +# Create a new rule file in .deepwork/rules/ +# +# Generated by DeepWork - do not edit manually + +description = "Create a new rule file in .deepwork/rules/" + +prompt = """ +# deepwork_rules:define + +**Standalone command** in the **deepwork_rules** job - can be run anytime + +**Summary**: Rules enforcement for AI agent sessions + +## Job Overview + +Manages rules that automatically trigger when certain files change during an AI agent session. +Rules help ensure that code changes follow team guidelines, documentation is updated, +and architectural decisions are respected. + +Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/` +directory. Each rule file specifies: +- Detection mode: trigger/safety, set (bidirectional), or pair (directional) +- Patterns: Glob patterns for matching files, with optional variable capture +- Instructions: Markdown content describing what the agent should do + +Example use cases: +- Update installation docs when configuration files change +- Require security review when authentication code is modified +- Ensure API documentation stays in sync with API code +- Enforce source/test file pairing + + + +## Instructions + +# Define Rule + +## Objective + +Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change. + +## Task + +Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.** + +**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices. + +### Step 1: Understand the Rule Purpose + +Start by asking structured questions to understand what the user wants to enforce: + +1. **What guideline or constraint should this rule enforce?** + - What situation triggers the need for action? + - What files or directories, when changed, should trigger this rule? + - Examples: "When config files change", "When API code changes", "When database schema changes" + +2. **What action should be taken?** + - What should the agent do when the rule triggers? + - Update documentation? Perform a security review? Update tests? + - Is there a specific file or process that needs attention? + +3. **Are there any "safety" conditions?** + - Are there files that, if also changed, mean the rule doesn't need to fire? + - For example: If config changes AND install_guide.md changes, assume docs are already updated + - This prevents redundant prompts when the user has already done the right thing + +### Step 2: Choose the Detection Mode + +Help the user select the appropriate detection mode: + +**Trigger/Safety Mode** (most common): +- Fires when trigger patterns match AND no safety patterns match +- Use for: "When X changes, check Y" rules +- Example: When config changes, verify install docs + +**Set Mode** (bidirectional correspondence): +- Fires when files that should change together don't all change +- Use for: Source/test pairing, model/migration sync +- Example: `src/foo.py` and `tests/foo_test.py` should change together + +**Pair Mode** (directional correspondence): +- Fires when a trigger file changes but expected files don't +- Changes to expected files alone do NOT trigger +- Use for: API code requires documentation updates (but docs can update independently) + +### Step 3: Define the Patterns + +Help the user define glob patterns for files. + +**Common patterns:** +- `src/**/*.py` - All Python files in src directory (recursive) +- `app/config/**/*` - All files in app/config directory +- `*.md` - All markdown files in root +- `src/api/**/*` - All files in the API directory +- `migrations/**/*.sql` - All SQL migrations + +**Variable patterns (for set/pair modes):** +- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`) +- `tests/{path}_test.py` - Uses same path variable in corresponding file +- `{name}` matches single segment, `{path}` matches multiple segments + +**Pattern syntax:** +- `*` - Matches any characters within a single path segment +- `**` - Matches any characters across multiple path segments (recursive) +- `?` - Matches a single character + +### Step 4: Choose the Comparison Mode (Optional) + +The `compare_to` field controls what baseline is used when detecting "changed files": + +**Options:** +- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches. +- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production. +- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes. + +Most rules should use the default (`base`) and don't need to specify `compare_to`. + +### Step 5: Write the Instructions + +Create clear, actionable instructions for what the agent should do when the rule fires. + +**Good instructions include:** +- What to check or review +- What files might need updating +- Specific actions to take +- Quality criteria for completion + +**Template variables available in instructions:** +- `{trigger_files}` - Files that triggered the rule +- `{expected_files}` - Expected corresponding files (for set/pair modes) + +### Step 6: Create the Rule File + +Create a new file in `.deepwork/rules/` with a kebab-case filename: + +**File Location**: `.deepwork/rules/{rule-name}.md` + +**Format for Trigger/Safety Mode:** +```markdown +--- +name: Friendly Name for the Rule +trigger: "glob/pattern/**/*" # or array: ["pattern1", "pattern2"] +safety: "optional/pattern" # optional, or array +compare_to: base # optional: "base" (default), "default_tip", or "prompt" +--- +Instructions for the agent when this rule fires. + +Multi-line markdown content is supported. +``` + +**Format for Set Mode (bidirectional):** +```markdown +--- +name: Source/Test Pairing +set: + - src/{path}.py + - tests/{path}_test.py +--- +Source and test files should change together. + +Modified: {trigger_files} +Expected: {expected_files} +``` + +**Format for Pair Mode (directional):** +```markdown +--- +name: API Documentation +pair: + trigger: api/{path}.py + expects: docs/api/{path}.md +--- +API code requires documentation updates. + +Changed API: {trigger_files} +Update docs: {expected_files} +``` + +### Step 7: Verify the Rule + +After creating the rule: + +1. **Check the YAML frontmatter** - Ensure valid YAML formatting +2. **Test trigger patterns** - Verify patterns match intended files +3. **Review instructions** - Ensure they're clear and actionable +4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones + +## Example Rules + +### Update Documentation on Config Changes +`.deepwork/rules/config-docs.md`: +```markdown +--- +name: Update Install Guide on Config Changes +trigger: app/config/**/* +safety: docs/install_guide.md +--- +Configuration files have been modified. Please review docs/install_guide.md +and update it if any installation instructions need to change based on the +new configuration. +``` + +### Security Review for Auth Code +`.deepwork/rules/security-review.md`: +```markdown +--- +name: Security Review for Authentication Changes +trigger: + - src/auth/**/* + - src/security/**/* +safety: + - SECURITY.md + - docs/security_audit.md +--- +Authentication or security code has been changed. Please: + +1. Review for hardcoded credentials or secrets +2. Check input validation on user inputs +3. Verify access control logic is correct +4. Update security documentation if needed +``` + +### Source/Test Pairing +`.deepwork/rules/source-test-pairing.md`: +```markdown +--- +name: Source/Test Pairing +set: + - src/{path}.py + - tests/{path}_test.py +--- +Source and test files should change together. + +When modifying source code, ensure corresponding tests are updated. +When adding tests, ensure they test actual source code. + +Modified: {trigger_files} +Expected: {expected_files} +``` + +### API Documentation Sync +`.deepwork/rules/api-docs.md`: +```markdown +--- +name: API Documentation Update +pair: + trigger: src/api/{path}.py + expects: docs/api/{path}.md +--- +API code has changed. Please verify that API documentation in docs/api/ +is up to date with the code changes. Pay special attention to: + +- New or changed endpoints +- Modified request/response schemas +- Updated authentication requirements + +Changed API: {trigger_files} +Update: {expected_files} +``` + +## Output Format + +### .deepwork/rules/{rule-name}.md +Create a new file with the rule definition using YAML frontmatter and markdown body. + +## Quality Criteria + +- Asked structured questions to understand user requirements +- Rule name is clear and descriptive (used in promise tags) +- Correct detection mode selected for the use case +- Patterns accurately match the intended files +- Safety patterns prevent unnecessary triggering (if applicable) +- Instructions are actionable and specific +- YAML frontmatter is valid + +## Context + +Rules are evaluated automatically when the agent finishes a task. The system: +1. Determines which files have changed based on each rule's `compare_to` setting +2. Evaluates rules based on their detection mode (trigger/safety, set, or pair) +3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched +4. Prompts you with instructions for any triggered rules + +You can mark a rule as addressed by including `Rule Name` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements. + + +## Inputs + +### User Parameters + +Please gather the following information from the user: +- **rule_purpose**: What guideline or constraint should this rule enforce? + + +## Work Branch Management + +All work for this job should be done on a dedicated work branch: + +1. **Check current branch**: + - If already on a work branch for this job (format: `deepwork/deepwork_rules-[instance]-[date]`), continue using it + - If on main/master, create a new work branch + +2. **Create work branch** (if needed): + ```bash + git checkout -b deepwork/deepwork_rules-[instance]-$(date +%Y%m%d) + ``` + Replace `[instance]` with a descriptive identifier (e.g., `acme`, `q1-launch`, etc.) + +## Output Requirements + +Create the following output(s): +- `.deepwork/rules/{rule-name}.md` + +Ensure all outputs are: +- Well-formatted and complete +- Ready for review or use by subsequent steps + +## Completion + +After completing this step: + +1. **Verify outputs**: Confirm all required files have been created + +2. **Inform the user**: + - The define command is complete + - Outputs created: .deepwork/rules/{rule-name}.md + - This command can be run again anytime to make further changes + +## Command Complete + +This is a standalone command that can be run anytime. The outputs are ready for use. + +Consider: +- Reviewing the outputs +- Running `deepwork sync` if job definitions were changed +- Re-running this command later if further changes are needed + +--- + +## Context Files + +- Job definition: `.deepwork/jobs/deepwork_rules/job.yml` +- Step instructions: `.deepwork/jobs/deepwork_rules/steps/define.md` +""" \ No newline at end of file diff --git a/.gemini/commands/update/index.toml b/.gemini/commands/update/index.toml index 5491f37..f564b18 100644 --- a/.gemini/commands/update/index.toml +++ b/.gemini/commands/update/index.toml @@ -12,7 +12,7 @@ prompt = """ You are executing the **update** job. Update standard jobs in src/ and sync to installed locations A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs -(like `deepwork_jobs` and `deepwork_policy`) are source-controlled in +(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in `src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/` or `.claude/commands/` directly. diff --git a/src/deepwork/schemas/job_schema.py b/src/deepwork/schemas/job_schema.py index 25e0e88..6ad4c4e 100644 --- a/src/deepwork/schemas/job_schema.py +++ b/src/deepwork/schemas/job_schema.py @@ -6,7 +6,69 @@ # These values must match CommandLifecycleHook enum in adapters.py LIFECYCLE_HOOK_EVENTS = ["after_agent", "before_tool", "before_prompt"] -# Schema definition for a single hook action (prompt, prompt_file, or script) +# Schema definitions for hook actions - each type is separate to enforce no mixing +PROMPT_HOOK_SCHEMA: dict[str, Any] = { + "type": "object", + "required": ["prompt"], + "properties": { + "prompt": { + "type": "string", + "minLength": 1, + "description": "Inline prompt for validation/action", + }, + }, + "additionalProperties": False, +} + +PROMPT_FILE_HOOK_SCHEMA: dict[str, Any] = { + "type": "object", + "required": ["prompt_file"], + "properties": { + "prompt_file": { + "type": "string", + "minLength": 1, + "description": "Path to prompt file (relative to job directory)", + }, + }, + "additionalProperties": False, +} + +SCRIPT_HOOK_SCHEMA: dict[str, Any] = { + "type": "object", + "required": ["script"], + "properties": { + "script": { + "type": "string", + "minLength": 1, + "description": "Path to shell script (relative to job directory)", + }, + }, + "additionalProperties": False, +} + +# Schema for a hook event - must be all one type (no mixing script and prompt hooks) +# Each hook event is an array containing only one type of hook action +HOOK_EVENT_SCHEMA: dict[str, Any] = { + "oneOf": [ + { + "type": "array", + "description": "Array of script hooks only", + "items": SCRIPT_HOOK_SCHEMA, + }, + { + "type": "array", + "description": "Array of prompt hooks only", + "items": PROMPT_HOOK_SCHEMA, + }, + { + "type": "array", + "description": "Array of prompt_file hooks only", + "items": PROMPT_FILE_HOOK_SCHEMA, + }, + ], +} + +# Legacy schema for backward compatibility in stop_hooks (allows mixing) HOOK_ACTION_SCHEMA: dict[str, Any] = { "type": "object", "oneOf": [ @@ -177,22 +239,19 @@ }, "hooks": { "type": "object", - "description": "Lifecycle hooks for this step, keyed by event type", + "description": "Lifecycle hooks for this step, keyed by event type. Each event must use only one hook type (script, prompt, or prompt_file) - mixing is not allowed.", "properties": { "after_agent": { - "type": "array", - "description": "Hooks triggered after the agent finishes (quality validation)", - "items": HOOK_ACTION_SCHEMA, + **HOOK_EVENT_SCHEMA, + "description": "Hooks triggered after the agent finishes (quality validation). Must be all scripts OR all prompts.", }, "before_tool": { - "type": "array", - "description": "Hooks triggered before a tool is used", - "items": HOOK_ACTION_SCHEMA, + **HOOK_EVENT_SCHEMA, + "description": "Hooks triggered before a tool is used. Must be all scripts OR all prompts.", }, "before_prompt": { - "type": "array", - "description": "Hooks triggered when user submits a prompt", - "items": HOOK_ACTION_SCHEMA, + **HOOK_EVENT_SCHEMA, + "description": "Hooks triggered when user submits a prompt. Must be all scripts OR all prompts.", }, }, "additionalProperties": False, diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml index e87c03c..d311ab2 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml +++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml @@ -57,25 +57,16 @@ steps: - steps/ dependencies: - define - hooks: - after_agent: - - prompt: | - **AGENT: TAKE ACTION** - Verify the implementation meets ALL quality criteria before completing: - - 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? - 2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? - 3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic? - 4. **Output Examples**: Does each instruction file show what good output looks like? - 5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs? - 6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? - 7. **Sync Complete**: Has `deepwork sync` been run successfully? - 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? - 9. **Rules Considered**: Have you thought about whether rules would benefit this job? - - If relevant rules were identified, did you explain them and offer to run `/deepwork_rules.define`? - - Not every job needs rules - only suggest when genuinely helpful. - - If ANY criterion is not met, continue working to address it. - If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. + quality_criteria: + - "**Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?" + - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?" + - "**Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?" + - "**Output Examples**: Does each instruction file show what good output looks like?" + - "**Quality Criteria**: Does each instruction file define quality criteria for its outputs?" + - "**Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase \"ask structured questions\"?" + - "**Sync Complete**: Has `deepwork sync` been run successfully?" + - "**Commands Available**: Are the slash-commands generated in `.claude/commands/`?" + - "**Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful." - id: learn name: "Learn from Job Execution" diff --git a/tests/unit/test_stop_hooks.py b/tests/unit/test_stop_hooks.py index 7a6ecaf..b34a3ab 100644 --- a/tests/unit/test_stop_hooks.py +++ b/tests/unit/test_stop_hooks.py @@ -355,6 +355,104 @@ def test_valid_hooks_with_script_action(self) -> None: } validate_against_schema(job_data, JOB_SCHEMA) + def test_invalid_hooks_mixed_script_and_prompt(self) -> None: + """Test schema rejects mixing script and prompt hooks in same event.""" + job_data = { + "name": "test_job", + "version": "1.0.0", + "summary": "Test job", + "steps": [ + { + "id": "step1", + "name": "Step 1", + "description": "A step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "hooks": { + "after_agent": [ + {"script": "hooks/run_tests.sh"}, + {"prompt": "Evaluate the test results"}, + ], + }, + } + ], + } + with pytest.raises(ValidationError): + validate_against_schema(job_data, JOB_SCHEMA) + + def test_invalid_hooks_mixed_script_and_prompt_file(self) -> None: + """Test schema rejects mixing script and prompt_file hooks in same event.""" + job_data = { + "name": "test_job", + "version": "1.0.0", + "summary": "Test job", + "steps": [ + { + "id": "step1", + "name": "Step 1", + "description": "A step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "hooks": { + "after_agent": [ + {"script": "hooks/run_tests.sh"}, + {"prompt_file": "hooks/evaluate.md"}, + ], + }, + } + ], + } + with pytest.raises(ValidationError): + validate_against_schema(job_data, JOB_SCHEMA) + + def test_valid_hooks_multiple_scripts_same_event(self) -> None: + """Test schema accepts multiple scripts in same event (no mixing).""" + job_data = { + "name": "test_job", + "version": "1.0.0", + "summary": "Test job", + "steps": [ + { + "id": "step1", + "name": "Step 1", + "description": "A step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "hooks": { + "after_agent": [ + {"script": "hooks/run_tests.sh"}, + {"script": "hooks/run_lint.sh"}, + ], + }, + } + ], + } + validate_against_schema(job_data, JOB_SCHEMA) + + def test_valid_hooks_multiple_prompts_same_event(self) -> None: + """Test schema accepts multiple prompts in same event (no mixing).""" + job_data = { + "name": "test_job", + "version": "1.0.0", + "summary": "Test job", + "steps": [ + { + "id": "step1", + "name": "Step 1", + "description": "A step", + "instructions_file": "steps/step1.md", + "outputs": ["output.md"], + "hooks": { + "after_agent": [ + {"prompt": "Check quality criteria 1"}, + {"prompt": "Check quality criteria 2"}, + ], + }, + } + ], + } + validate_against_schema(job_data, JOB_SCHEMA) + class TestGeneratorStopHooks: """Tests for generator stop hooks context building."""