From 22687d2c2bd24cd647ad3b788654bc11f44af705 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 17:54:10 +0000
Subject: [PATCH 01/10] Add policy system v2 design documentation

Design docs for next-generation policy system with:
- File correspondence matching (sets and pairs)
- Idempotent command execution
- Queue-based state tracking with detector/evaluator pattern
- Folder-based policy storage using frontmatter markdown files

Key changes from current system:
- Policies move from single .deepwork.policy.yml to .deepwork/policies/*.md
- YAML frontmatter for config, markdown body for instructions
- New 'set' syntax for bidirectional file relationships
- New 'pair' syntax for directional file relationships
- New 'action' field for running commands instead of prompts
- Queue system prevents duplicate policy triggers across sessions
---
 doc/policy_syntax.md        | 691 ++++++++++++++++++++++++++++++++++++
 doc/policy_system_design.md | 580 ++++++++++++++++++++++++++++++
 doc/test_scenarios.md       | 509 ++++++++++++++++++++++++++
 3 files changed, 1780 insertions(+)
 create mode 100644 doc/policy_syntax.md
 create mode 100644 doc/policy_system_design.md
 create mode 100644 doc/test_scenarios.md

diff --git a/doc/policy_syntax.md b/doc/policy_syntax.md
new file mode 100644
index 0000000..72654b0
--- /dev/null
+++ b/doc/policy_syntax.md
@@ -0,0 +1,691 @@
+# Policy Configuration Syntax
+
+This document describes the syntax for policy files in the `.deepwork/policies/` directory.
+
+## Directory Structure
+
+Policies are stored as individual markdown files with YAML frontmatter:
+
+```
+.deepwork/
+└── policies/
+    ├── readme-accuracy.md
+    ├── source-test-pairing.md
+    ├── api-documentation.md
+    └── python-formatting.md
+```
+
+Each file has:
+- **Frontmatter**: YAML configuration between `---` delimiters
+- **Body**: Instructions (for prompt policies) or description (for command policies)
+
+This structure enables code files to reference policies:
+```python
+# Read the policy `.deepwork/policies/source-test-pairing.md` before editing
+class AuthService:
+    ...
+```
+
+## Quick Reference
+
+### Instruction Policy
+
+`.deepwork/policies/readme-accuracy.md`:
+```markdown
+---
+trigger: src/**/*
+safety: README.md
+---
+Source code changed. Please verify README.md is accurate.
+
+Check that:
+- All public APIs are documented
+- Examples are up to date
+- Installation instructions are correct
+```
+
+### Correspondence Set (bidirectional)
+
+`.deepwork/policies/source-test-pairing.md`:
+```markdown
+---
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+Source and test files should change together.
+
+When modifying source code, ensure corresponding tests are updated.
+When adding tests, ensure they test actual source code.
+```
+
+### Correspondence Pair (directional)
+
+`.deepwork/policies/api-documentation.md`:
+```markdown
+---
+pair:
+  trigger: api/{path}.py
+  expects: docs/api/{path}.md
+---
+API changes require documentation updates.
+
+When modifying an API endpoint, update its documentation to reflect:
+- Parameter changes
+- Response format changes
+- New error conditions
+```
+
+### Command Policy
+
+`.deepwork/policies/python-formatting.md`:
+```markdown
+---
+trigger: "**/*.py"
+action:
+  command: ruff format {file}
+---
+Automatically formats Python files using ruff.
+
+This policy runs `ruff format` on any changed Python files to ensure
+consistent code style across the codebase.
+```
+
+## Policy Types
+
+### Instruction Policies
+
+Instruction policies prompt the AI agent with guidance when certain files change.
+
+**Frontmatter fields:**
+```yaml
+---
+trigger: pattern              # Required: file pattern(s) that trigger
+safety: pattern               # Optional: file pattern(s) that suppress
+compare_to: base              # Optional: comparison baseline
+priority: normal              # Optional: output priority
+---
+```
+
+The markdown body contains the instructions shown to the agent.
+
+**Example:** `.deepwork/policies/security-review.md`
+
+```markdown
+---
+trigger:
+  - src/auth/**/*
+  - src/crypto/**/*
+safety: SECURITY.md
+compare_to: base
+priority: critical
+---
+Security-sensitive code has been modified.
+
+Please verify:
+1. No credentials are hardcoded
+2. Input validation is present
+3. Authentication checks are correct
+```
+
+### Correspondence Sets
+
+Sets define bidirectional relationships between files. When any file in a correspondence group changes, all related files should also change.
+
+**Frontmatter fields:**
+```yaml
+---
+set:                            # Required: list of corresponding patterns
+  - pattern1/{path}.ext1
+  - pattern2/{path}.ext2
+---
+```
+
+The markdown body contains instructions for when correspondence is incomplete.
+
+**How it works:**
+
+1. A file changes that matches one pattern in the set
+2. System extracts the variable portions (e.g., `{path}`)
+3. System generates expected files by substituting into other patterns
+4. If ALL expected files also changed: policy is satisfied (no trigger)
+5. If ANY expected file is missing: policy triggers with instructions
+
+**Example:** `.deepwork/policies/source-test-pairing.md`
+
+```markdown
+---
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+Source and test files should change together.
+
+Changed: {trigger_file}
+Expected: {expected_files}
+
+Please ensure both source and test are updated.
+```
+
+If `src/auth/login.py` changes:
+- Extracts `{path}` = `auth/login`
+- Expects `tests/auth/login_test.py` to also change
+- If test didn't change, shows instructions
+
+If `tests/auth/login_test.py` changes:
+- Extracts `{path}` = `auth/login`
+- Expects `src/auth/login.py` to also change
+- If source didn't change, shows instructions
+
+**Example:** `.deepwork/policies/model-schema-migration.md`
+
+```markdown
+---
+set:
+  - models/{name}.py
+  - schemas/{name}.py
+  - migrations/{name}.sql
+---
+Models, schemas, and migrations should stay in sync.
+
+When modifying database models, ensure:
+- Schema definitions are updated
+- Migration files are created or updated
+```
+
+### Correspondence Pairs
+
+Pairs define directional relationships. Changes to trigger files require corresponding expected files to change, but not vice versa.
+
+**Frontmatter fields:**
+```yaml
+---
+pair:
+  trigger: pattern/{path}.ext     # Required: pattern that triggers
+  expects: pattern/{path}.ext     # Required: expected to also change
+---
+```
+
+Can also specify multiple expected patterns:
+
+```yaml
+---
+pair:
+  trigger: pattern/{path}.ext
+  expects:
+    - pattern1/{path}.ext
+    - pattern2/{path}.ext
+---
+```
+
+**Example:** `.deepwork/policies/api-documentation.md`
+
+```markdown
+---
+pair:
+  trigger: api/{module}/{name}.py
+  expects: docs/api/{module}/{name}.md
+---
+API endpoint changed without documentation update.
+
+Changed: {trigger_file}
+Please update: {expected_files}
+
+Ensure the documentation covers:
+- Endpoint URL and method
+- Request parameters
+- Response format
+- Error cases
+```
+
+If `api/users/create.py` changes:
+- Expects `docs/api/users/create.md` to also change
+- If doc didn't change, shows instructions
+
+If `docs/api/users/create.md` changes alone:
+- No trigger (documentation can be updated independently)
+
+### Command Policies
+
+Command policies run idempotent commands instead of prompting the agent.
+
+**Frontmatter fields:**
+```yaml
+---
+trigger: pattern                  # Required: files that trigger
+safety: pattern                   # Optional: files that suppress
+action:
+  command: command {file}         # Required: command to run
+  run_for: each_match             # Optional: each_match (default) or all_matches
+---
+```
+
+The markdown body serves as a description of what the command does (shown in logs, not to agent).
+
+**Template Variables in Commands:**
+
+| Variable | Description | Available When |
+|----------|-------------|----------------|
+| `{file}` | Single file path | `run_for: each_match` |
+| `{files}` | Space-separated file paths | `run_for: all_matches` |
+| `{repo_root}` | Repository root directory | Always |
+
+**Example:** `.deepwork/policies/python-formatting.md`
+
+```markdown
+---
+trigger: "**/*.py"
+safety: "*.pyi"
+action:
+  command: ruff format {file}
+  run_for: each_match
+---
+Automatically formats Python files using ruff.
+
+This ensures consistent code style without requiring manual formatting.
+Stub files (*.pyi) are excluded as they have different formatting rules.
+```
+
+**Example:** `.deepwork/policies/eslint-check.md`
+
+```markdown
+---
+trigger: "**/*.{js,ts,tsx}"
+action:
+  command: eslint --fix {files}
+  run_for: all_matches
+---
+Runs ESLint with auto-fix on all changed JavaScript/TypeScript files.
+```
+
+**Idempotency Requirement:**
+
+Commands MUST be idempotent. The system verifies this by:
+1. Running the command
+2. Checking for changes
+3. If changes occurred, running again
+4. If more changes occur, marking as failed
+
+## Pattern Syntax
+
+### Basic Glob Patterns
+
+Standard glob patterns work in `trigger` and `safety` fields:
+
+| Pattern | Matches |
+|---------|---------|
+| `*.py` | Python files in current directory |
+| `**/*.py` | Python files in any directory |
+| `src/**/*` | All files under src/ |
+| `test_*.py` | Files starting with `test_` |
+| `*.{js,ts}` | JavaScript and TypeScript files |
+
+### Variable Patterns
+
+Variable patterns use `{name}` syntax to capture path segments:
+
+| Pattern | Captures | Example Match |
+|---------|----------|---------------|
+| `src/{path}.py` | `{path}` = multi-segment path | `src/foo/bar.py` → `path=foo/bar` |
+| `src/{name}.py` | `{name}` = single segment | `src/utils.py` → `name=utils` |
+| `{module}/{name}.py` | Both variables | `auth/login.py` → `module=auth, name=login` |
+
+**Variable Naming Conventions:**
+
+- `{path}` - Conventional name for multi-segment captures (`**/*`)
+- `{name}` - Conventional name for single-segment captures (`*`)
+- Custom names allowed: `{module}`, `{component}`, etc.
+
+**Multi-Segment vs Single-Segment:**
+
+By default, `{path}` matches multiple path segments and `{name}` matches one:
+
+```yaml
+# {path} matches: foo, foo/bar, foo/bar/baz
+- "src/{path}.py"  # src/foo.py, src/foo/bar.py, src/a/b/c.py
+
+# {name} matches only single segment
+- "src/{name}.py"  # src/foo.py (NOT src/foo/bar.py)
+```
+
+To explicitly control this, use `{**name}` for multi-segment or `{*name}` for single:
+
+```yaml
+- "src/{**module}/index.py"   # src/foo/bar/index.py → module=foo/bar
+- "src/{*component}.py"       # src/Button.py → component=Button
+```
+
+## Field Reference
+
+### File Naming
+
+Policy files are named using kebab-case with `.md` extension:
+- `readme-accuracy.md`
+- `source-test-pairing.md`
+- `api-documentation.md`
+
+The filename (without extension) serves as the policy's unique identifier for logging and promise tags.
+
+### trigger (instruction/command policies)
+
+File patterns that cause the policy to fire. Can be string or array.
+
+```yaml
+---
+# Single pattern
+trigger: src/**/*.py
+---
+
+---
+# Multiple patterns
+trigger:
+  - src/**/*.py
+  - lib/**/*.py
+---
+```
+
+### safety (optional)
+
+File patterns that suppress the policy. If ANY changed file matches a safety pattern, the policy does not fire.
+
+```yaml
+---
+# Single pattern
+safety: CHANGELOG.md
+---
+
+---
+# Multiple patterns
+safety:
+  - CHANGELOG.md
+  - docs/**/*
+---
+```
+
+### set (correspondence sets)
+
+List of patterns defining bidirectional file relationships.
+
+```yaml
+---
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+```
+
+### pair (correspondence pairs)
+
+Object with `trigger` and `expects` patterns for directional relationships.
+
+```yaml
+---
+pair:
+  trigger: api/{path}.py
+  expects: docs/api/{path}.md
+---
+
+---
+# Or with multiple expects
+pair:
+  trigger: api/{path}.py
+  expects:
+    - docs/api/{path}.md
+    - schemas/{path}.json
+---
+```
+
+### Markdown Body (instructions)
+
+The markdown content after the frontmatter serves as instructions shown to the agent when the policy fires.
+
+**Template Variables in Instructions:**
+
+| Variable | Description |
+|----------|-------------|
+| `{trigger_file}` | The file that triggered the policy |
+| `{trigger_files}` | All files that matched trigger patterns |
+| `{expected_files}` | Expected corresponding files (for sets/pairs) |
+| `{safety_files}` | Files that would suppress the policy |
+
+### action (command policies)
+
+Specifies a command to run instead of prompting.
+
+```yaml
+---
+action:
+  command: ruff format {file}
+  run_for: each_match  # or all_matches
+---
+```
+
+### compare_to (optional)
+
+Determines the baseline for detecting file changes.
+
+| Value | Description |
+|-------|-------------|
+| `base` (default) | Compare to merge-base with default branch |
+| `default_tip` | Compare to current tip of default branch |
+| `prompt` | Compare to state at last prompt submission |
+
+```yaml
+---
+compare_to: prompt
+---
+```
+
+### priority (optional)
+
+Controls output ordering and visibility.
+
+| Value | Behavior |
+|-------|----------|
+| `critical` | Always shown first, blocks progress |
+| `high` | Shown prominently |
+| `normal` (default) | Standard display |
+| `low` | Shown in summary, may be collapsed |
+
+```yaml
+---
+priority: critical
+---
+```
+
+### defer (optional)
+
+When `true`, policy output is deferred to end of session.
+
+```yaml
+---
+defer: true
+---
+```
+
+## Complete Examples
+
+### Example 1: Test Coverage Policy
+
+`.deepwork/policies/test-coverage.md`:
+```markdown
+---
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+compare_to: base
+---
+Source code was modified without corresponding test updates.
+
+Modified source: {trigger_file}
+Expected test: {expected_files}
+
+Please either:
+1. Add/update tests for the changed code
+2. Explain why tests are not needed (and mark with <promise>)
+```
+
+### Example 2: Documentation Sync
+
+`.deepwork/policies/api-documentation-sync.md`:
+```markdown
+---
+pair:
+  trigger: src/api/{module}/{endpoint}.py
+  expects:
+    - docs/api/{module}/{endpoint}.md
+    - openapi/{module}.yaml
+priority: high
+---
+API endpoint changed. Please update:
+- Documentation: {expected_files}
+- Ensure OpenAPI spec is current
+
+If this is an internal-only change, mark as addressed.
+```
+
+### Example 3: Auto-formatting Pipeline
+
+`.deepwork/policies/python-black-formatting.md`:
+```markdown
+---
+trigger: "**/*.py"
+safety:
+  - "**/*.pyi"
+  - "**/migrations/**"
+action:
+  command: black {file}
+  run_for: each_match
+---
+Formats Python files using Black.
+
+Excludes:
+- Type stub files (*.pyi)
+- Database migration files
+```
+
+`.deepwork/policies/typescript-prettier.md`:
+```markdown
+---
+trigger: "**/*.{ts,tsx}"
+action:
+  command: prettier --write {file}
+  run_for: each_match
+---
+Formats TypeScript files using Prettier.
+```
+
+### Example 4: Multi-file Correspondence
+
+`.deepwork/policies/full-stack-feature-sync.md`:
+```markdown
+---
+set:
+  - backend/api/{feature}/routes.py
+  - backend/api/{feature}/models.py
+  - frontend/src/api/{feature}.ts
+  - frontend/src/components/{feature}/**/*
+---
+Feature files should be updated together across the stack.
+
+When modifying a feature, ensure:
+- Backend routes are updated
+- Backend models are updated
+- Frontend API client is updated
+- Frontend components are updated
+
+Changed: {trigger_files}
+Expected: {expected_files}
+```
+
+### Example 5: Conditional Safety
+
+`.deepwork/policies/version-bump-required.md`:
+```markdown
+---
+trigger:
+  - src/**/*.py
+  - pyproject.toml
+safety:
+  - pyproject.toml
+  - CHANGELOG.md
+compare_to: base
+priority: low
+defer: true
+---
+Code changes detected. Before merging, ensure:
+- Version is bumped in pyproject.toml (if needed)
+- CHANGELOG.md is updated
+
+This policy is suppressed if you've already modified pyproject.toml
+or CHANGELOG.md, as that indicates you're handling versioning.
+```
+
+## Promise Tags
+
+When a policy fires but should be dismissed, use promise tags in the conversation:
+
+```
+<promise>policy-filename</promise>
+```
+
+Use the policy filename (without `.md` extension) as the identifier:
+
+```
+<promise>test-coverage</promise>
+<promise>api-documentation-sync</promise>
+```
+
+This tells the system the policy has been addressed (either by action or explicit acknowledgment).
+
+## Validation
+
+Policy files are validated on load. Common errors:
+
+**Invalid frontmatter:**
+```
+Error: .deepwork/policies/my-policy.md - invalid YAML frontmatter
+```
+
+**Missing required field:**
+```
+Error: .deepwork/policies/my-policy.md - must have 'trigger', 'set', or 'pair'
+```
+
+**Invalid pattern:**
+```
+Error: .deepwork/policies/test-coverage.md - invalid pattern "src/{path" - unclosed brace
+```
+
+**Conflicting fields:**
+```
+Error: .deepwork/policies/my-policy.md - has both 'trigger' and 'set' - use one or the other
+```
+
+**Empty body:**
+```
+Error: .deepwork/policies/my-policy.md - instruction policies require markdown body
+```
+
+## Referencing Policies in Code
+
+A key benefit of the `.deepwork/policies/` folder structure is that code files can reference policies directly:
+
+```python
+# Read `.deepwork/policies/source-test-pairing.md` before editing this file
+
+class UserService:
+    """Service for user management."""
+    pass
+```
+
+```typescript
+// This file is governed by `.deepwork/policies/api-documentation.md`
+// Any changes here require corresponding documentation updates
+
+export async function createUser(data: UserInput): Promise<User> {
+    // ...
+}
+```
+
+This helps AI agents and human developers understand which policies apply to specific files.
diff --git a/doc/policy_system_design.md b/doc/policy_system_design.md
new file mode 100644
index 0000000..d62d78b
--- /dev/null
+++ b/doc/policy_system_design.md
@@ -0,0 +1,580 @@
+# Policy System Design
+
+## Overview
+
+The deepwork policy system enables automated enforcement of development standards during AI-assisted coding sessions. This document describes the architecture for the next-generation policy system with support for:
+
+1. **File correspondence matching** (sets and pairs)
+2. **Idempotent command execution**
+3. **Stateful evaluation with queue-based processing**
+4. **Efficient agent output management**
+
+## Core Concepts
+
+### Policy Types
+
+The system supports three policy types:
+
+| Type | Purpose | Trigger Direction |
+|------|---------|-------------------|
+| **Instruction policies** | Prompt agent with instructions | Any matched file |
+| **Command policies** | Run idempotent commands | Any matched file |
+| **Correspondence policies** | Enforce file relationships | When relationship is incomplete |
+
+### File Correspondence
+
+Correspondence policies define relationships between files that should change together.
+
+**Sets (Bidirectional)**
+- Define N patterns that share a common variable path
+- If ANY file matching one pattern changes, ALL corresponding files should change
+- Example: Source files and their tests
+
+**Pairs (Directional)**
+- Define a trigger pattern and one or more expected patterns
+- Changes to trigger files require corresponding expected files to also change
+- Changes to expected files alone do not trigger the policy
+- Example: API code requires documentation updates
+
+### Pattern Variables
+
+Patterns use `{name}` syntax for capturing variable path segments:
+
+```
+src/{path}.py          # {path} captures everything between src/ and .py
+tests/{path}_test.py   # {path} must match the same value
+```
+
+Special variable names:
+- `{path}` - Matches any path segments (equivalent to `**/*`)
+- `{name}` - Matches a single path segment (equivalent to `*`)
+- `{**}` - Explicit multi-segment wildcard
+- `{*}` - Explicit single-segment wildcard
+
+### Actions
+
+Policies can specify two types of actions:
+
+**Prompt Action (default)**
+```yaml
+action:
+  type: prompt
+  instructions: |
+    Please review the changes...
+```
+
+**Command Action**
+```yaml
+action:
+  type: command
+  command: "ruff format {file}"
+  run_for: each_match
+```
+
+Command actions execute idempotent commands. The system verifies idempotency by running the command twice and checking that no additional changes occur.
+
+## Architecture
+
+### Component Overview
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        Policy System                             │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                  │
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐      │
+│  │   Detector   │───▶│    Queue     │◀───│  Evaluator   │      │
+│  │              │    │              │    │              │      │
+│  │ - Watch files│    │ .deepwork/   │    │ - Process    │      │
+│  │ - Match pols │    │ tmp/policy/  │    │   queued     │      │
+│  │ - Create     │    │ queue/       │    │ - Run action │      │
+│  │   entries    │    │              │    │ - Update     │      │
+│  └──────────────┘    └──────────────┘    │   status     │      │
+│                                          └──────────────┘      │
+│                                                                  │
+│  ┌──────────────┐    ┌──────────────┐                          │
+│  │   Matcher    │    │   Resolver   │                          │
+│  │              │    │              │                          │
+│  │ - Pattern    │    │ - Variable   │                          │
+│  │   matching   │    │   extraction │                          │
+│  │ - Glob       │    │ - Path       │                          │
+│  │   expansion  │    │   generation │                          │
+│  └──────────────┘    └──────────────┘                          │
+│                                                                  │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### Detector
+
+The detector identifies when policies should be evaluated:
+
+1. **Trigger Detection**: Monitors for file changes that match policy triggers
+2. **Deduplication**: Computes a hash to avoid re-processing identical triggers
+3. **Queue Entry Creation**: Creates entries for the evaluator to process
+
+**Trigger Hash Computation**:
+```python
+hash_input = f"{policy_name}:{sorted(trigger_files)}:{baseline_ref}"
+trigger_hash = sha256(hash_input.encode()).hexdigest()[:12]
+```
+
+The baseline_ref varies by `compare_to` mode:
+- `base`: merge-base commit hash
+- `default_tip`: remote tip commit hash
+- `prompt`: timestamp of last prompt submission
+
+### Queue
+
+The queue persists policy trigger state in `.deepwork/tmp/policy/queue/`:
+
+```
+.deepwork/tmp/policy/queue/
+├── {hash}.queued.json      # Detected, awaiting evaluation
+├── {hash}.passed.json      # Evaluated, policy satisfied
+├── {hash}.failed.json      # Evaluated, policy not satisfied
+└── {hash}.skipped.json     # Safety pattern matched, skipped
+```
+
+**Queue Entry Schema**:
+```json
+{
+  "policy_name": "string",
+  "trigger_hash": "string",
+  "status": "queued|passed|failed|skipped",
+  "created_at": "ISO8601 timestamp",
+  "evaluated_at": "ISO8601 timestamp or null",
+  "baseline_ref": "string",
+  "trigger_files": ["array", "of", "files"],
+  "expected_files": ["array", "of", "files"],
+  "matched_files": ["array", "of", "files"],
+  "action_result": {
+    "type": "prompt|command",
+    "output": "string or null",
+    "exit_code": "number or null"
+  }
+}
+```
+
+**Queue Cleanup**:
+- Entries older than 24 hours are automatically pruned
+- `passed` and `skipped` entries are pruned after 1 hour
+- Manual cleanup via `deepwork policy clear-queue`
+
+### Evaluator
+
+The evaluator processes queued entries:
+
+1. **Load Entry**: Read queued entry from disk
+2. **Verify Still Relevant**: Re-check that trigger conditions still apply
+3. **Execute Action**:
+   - For prompts: Format message and return to hook system
+   - For commands: Execute command, verify idempotency
+4. **Update Status**: Mark as passed, failed, or skipped
+5. **Report Results**: Return appropriate response to caller
+
+### Matcher
+
+Pattern matching with variable extraction:
+
+**Algorithm**:
+```python
+def match_pattern(pattern: str, filepath: str) -> dict[str, str] | None:
+    """
+    Match filepath against pattern, extracting variables.
+
+    Returns dict of {variable_name: captured_value} or None if no match.
+    """
+    # Convert pattern to regex with named groups
+    # {path} -> (?P<path>.+)
+    # {name} -> (?P<name>[^/]+)
+    # Literal parts are escaped
+    regex = pattern_to_regex(pattern)
+    match = re.fullmatch(regex, filepath)
+    if match:
+        return match.groupdict()
+    return None
+```
+
+**Pattern Compilation**:
+```python
+def pattern_to_regex(pattern: str) -> str:
+    """Convert pattern with {var} placeholders to regex."""
+    result = []
+    for segment in parse_pattern(pattern):
+        if segment.is_variable:
+            if segment.name in ('path', '**'):
+                result.append(f'(?P<{segment.name}>.+)')
+            else:
+                result.append(f'(?P<{segment.name}>[^/]+)')
+        else:
+            result.append(re.escape(segment.value))
+    return ''.join(result)
+```
+
+### Resolver
+
+Generates expected filepaths from patterns and captured variables:
+
+```python
+def resolve_pattern(pattern: str, variables: dict[str, str]) -> str:
+    """
+    Substitute variables into pattern to generate filepath.
+
+    Example:
+        resolve_pattern("tests/{path}_test.py", {"path": "foo/bar"})
+        -> "tests/foo/bar_test.py"
+    """
+    result = pattern
+    for name, value in variables.items():
+        result = result.replace(f'{{{name}}}', value)
+    return result
+```
+
+## Evaluation Flow
+
+### Standard Instruction Policy
+
+```
+1. Detector: File changes detected
+2. Detector: Check each policy's trigger patterns
+3. Detector: For matching policy, compute trigger hash
+4. Detector: If hash not in queue, create .queued entry
+5. Evaluator: Process queued entry
+6. Evaluator: Check safety patterns against changed files
+7. Evaluator: If safety matches, mark .skipped
+8. Evaluator: If no safety match, return instructions to agent
+9. Agent: Addresses policy, includes <promise> tag
+10. Evaluator: On next check, mark .passed (promise found)
+```
+
+### Correspondence Policy (Set)
+
+```
+1. Detector: File src/foo/bar.py changed
+2. Matcher: Matches pattern "src/{path}.py" with {path}="foo/bar"
+3. Resolver: Generate expected files from other patterns:
+   - "tests/{path}_test.py" -> "tests/foo/bar_test.py"
+4. Detector: Check if tests/foo/bar_test.py also changed
+5. Detector: If yes, mark .skipped (correspondence satisfied)
+6. Detector: If no, create .queued entry
+7. Evaluator: Return instructions prompting for test update
+```
+
+### Correspondence Policy (Pair)
+
+```
+1. Detector: File api/users.py changed (trigger pattern)
+2. Matcher: Matches "api/{path}.py" with {path}="users"
+3. Resolver: Generate expected: "docs/api/users.md"
+4. Detector: Check if docs/api/users.md also changed
+5. Detector: If yes, mark .skipped
+6. Detector: If no, create .queued entry
+7. Evaluator: Return instructions
+
+Note: If only docs/api/users.md changed (not api/users.py),
+the pair policy does NOT trigger (directional).
+```
+
+### Command Policy
+
+```
+1. Detector: Python file changed, matches "**/*.py"
+2. Detector: Create .queued entry for format policy
+3. Evaluator: Execute "ruff format {file}"
+4. Evaluator: Run git diff to check for changes
+5. Evaluator: If changes made, re-run command (idempotency check)
+6. Evaluator: If no additional changes, mark .passed
+7. Evaluator: If changes keep occurring, mark .failed, alert user
+```
+
+## Agent Output Management
+
+### Problem
+
+When many policies trigger, the agent receives excessive output, degrading performance.
+
+### Solution
+
+**1. Output Batching**
+Group related policies into single messages:
+
+```
+The following policies require attention:
+
+## File Correspondence Issues (3)
+
+1. **Source/Test Pairing**: src/auth/login.py changed without tests/auth/login_test.py
+2. **Source/Test Pairing**: src/api/users.py changed without tests/api/users_test.py
+3. **API Documentation**: api/users.py changed without docs/api/users.md
+
+## Code Quality (1)
+
+4. **README Accuracy**: Source files changed, please verify README.md
+```
+
+**2. Priority Levels**
+Policies can specify priority (critical, high, normal, low):
+
+```yaml
+- name: "Security Review"
+  trigger: "src/auth/**/*"
+  priority: critical
+```
+
+Only critical and high priority shown immediately. Normal/low shown in summary.
+
+**3. Deferred Policies**
+Low-priority policies can be deferred to end of session:
+
+```yaml
+- name: "Documentation Check"
+  trigger: "src/**/*"
+  priority: low
+  defer: true  # Show at session end, not immediately
+```
+
+**4. Collapsed Instructions**
+Long instructions are truncated with expansion available:
+
+```
+## README Accuracy
+
+Source code changed. Please verify README.md is accurate.
+
+[+] Show full instructions (15 lines)
+```
+
+## State Persistence
+
+### Directory Structure
+
+```
+.deepwork/
+├── policies/                # Policy definitions (frontmatter markdown)
+│   ├── readme-accuracy.md
+│   ├── source-test-pairing.md
+│   ├── api-documentation.md
+│   └── python-formatting.md
+├── tmp/
+│   └── policy/
+│       ├── queue/           # Queue entries
+│       │   ├── abc123.queued.json
+│       │   └── def456.passed.json
+│       ├── baselines/       # Cached baseline states
+│       │   └── prompt_1705420800.json
+│       └── cache/           # Pattern matching cache
+│           └── patterns.json
+└── policy_state.json        # Session state summary
+```
+
+### Policy File Format
+
+Each policy is a markdown file with YAML frontmatter:
+
+```markdown
+---
+trigger: src/**/*.py
+safety: README.md
+priority: normal
+---
+Instructions shown to the agent when this policy fires.
+
+These can be multi-line with full markdown formatting.
+```
+
+This format enables:
+1. Code files to reference policies in comments
+2. Human-readable policy documentation
+3. Easy editing with any markdown editor
+4. Clear separation of configuration and content
+
+### Baseline Management
+
+For `compare_to: prompt`, baselines are captured at prompt submission:
+
+```json
+{
+  "timestamp": "2024-01-16T12:00:00Z",
+  "commit": "abc123",
+  "staged_files": ["file1.py", "file2.py"],
+  "untracked_files": ["file3.py"]
+}
+```
+
+Multiple baselines can exist for different prompts in a session.
+
+### Queue Lifecycle
+
+```
+                  ┌─────────┐
+                  │ Created │
+                  │ .queued │
+                  └────┬────┘
+                       │
+         ┌─────────────┼─────────────┐
+         │             │             │
+         ▼             ▼             ▼
+    ┌─────────┐   ┌─────────┐   ┌─────────┐
+    │ .passed │   │ .failed │   │.skipped │
+    └─────────┘   └─────────┘   └─────────┘
+         │             │             │
+         └─────────────┼─────────────┘
+                       │
+                       ▼
+                  ┌─────────┐
+                  │ Pruned  │
+                  │(cleanup)│
+                  └─────────┘
+```
+
+## Error Handling
+
+### Pattern Errors
+
+Invalid patterns are caught at policy load time:
+
+```python
+class PatternError(PolicyError):
+    """Invalid pattern syntax."""
+    pass
+
+# Validation
+def validate_pattern(pattern: str) -> None:
+    # Check for unbalanced braces
+    # Check for invalid variable names
+    # Check for unsupported syntax
+```
+
+### Command Errors
+
+Command execution errors are captured and reported:
+
+```json
+{
+  "status": "failed",
+  "action_result": {
+    "type": "command",
+    "command": "ruff format {file}",
+    "exit_code": 1,
+    "stdout": "",
+    "stderr": "error: invalid syntax in foo.py:10"
+  }
+}
+```
+
+### Queue Corruption
+
+If queue entries become corrupted:
+1. Log error with entry details
+2. Remove corrupted entry
+3. Re-detect triggers on next evaluation
+
+## Configuration
+
+### Policy Files
+
+Policies are stored in `.deepwork/policies/` as individual markdown files with YAML frontmatter. See `doc/policy_syntax.md` for complete syntax documentation.
+
+**Loading Order:**
+1. All `.md` files in `.deepwork/policies/` are loaded
+2. Files are processed in alphabetical order
+3. Filename (without extension) becomes policy identifier
+
+**Policy Discovery:**
+```python
+def load_policies(policies_dir: Path) -> list[Policy]:
+    """Load all policies from the policies directory."""
+    policies = []
+    for path in sorted(policies_dir.glob("*.md")):
+        policy = parse_policy_file(path)
+        policy.name = path.stem  # filename without .md
+        policies.append(policy)
+    return policies
+```
+
+### System Configuration
+
+In `.deepwork/config.yml`:
+
+```yaml
+policy:
+  enabled: true
+  policies_dir: .deepwork/policies  # Can be customized
+  queue_retention_hours: 24
+  max_queued_entries: 100
+  output_mode: batched  # batched, individual, summary
+  priority_threshold: normal  # Show this priority and above
+```
+
+## Performance Considerations
+
+### Caching
+
+- Pattern compilation is cached per-session
+- Baseline diffs are cached by commit hash
+- Queue lookups use hash-based O(1) access
+
+### Lazy Evaluation
+
+- Patterns only compiled when needed
+- File lists only computed for triggered policies
+- Instructions only loaded when policy fires
+
+### Parallel Processing
+
+- Multiple queue entries can be processed in parallel
+- Command actions can run concurrently (with file locking)
+- Pattern matching is parallelized across policies
+
+## Migration from Legacy System
+
+The legacy system used a single `.deepwork.policy.yml` file with array of policies. The new system uses individual markdown files in `.deepwork/policies/`.
+
+**Breaking Changes:**
+- Single YAML file replaced with folder of markdown files
+- Policy `name` field replaced with filename
+- `instructions` / `instructions_file` replaced with markdown body
+- New features: sets, pairs, commands, queue-based state
+
+**No backwards compatibility is provided.** Existing `.deepwork.policy.yml` files must be converted manually.
+
+**Conversion Example:**
+
+Old format (`.deepwork.policy.yml`):
+```yaml
+- name: "README Accuracy"
+  trigger: "src/**/*"
+  safety: "README.md"
+  instructions: |
+    Please verify README.md is accurate.
+```
+
+New format (`.deepwork/policies/readme-accuracy.md`):
+```markdown
+---
+trigger: src/**/*
+safety: README.md
+---
+Please verify README.md is accurate.
+```
+
+## Security Considerations
+
+### Command Execution
+
+- Commands run in sandboxed subprocess
+- No shell expansion (arguments passed as array)
+- Working directory is always repo root
+- Environment variables are filtered
+
+### Queue File Permissions
+
+- Queue directory: 700 (owner only)
+- Queue files: 600 (owner only)
+- No sensitive data in queue entries
+
+### Input Validation
+
+- All policy files validated against schema
+- Pattern variables sanitized before use
+- File paths normalized and validated
diff --git a/doc/test_scenarios.md b/doc/test_scenarios.md
new file mode 100644
index 0000000..c9460f7
--- /dev/null
+++ b/doc/test_scenarios.md
@@ -0,0 +1,509 @@
+# Policy System Test Scenarios
+
+This document describes test scenarios for validating the policy system implementation.
+
+## 1. Pattern Matching
+
+### 1.1 Basic Glob Patterns
+
+| ID | Scenario | Pattern | File | Expected |
+|----|----------|---------|------|----------|
+| PM-1.1.1 | Exact match | `README.md` | `README.md` | Match |
+| PM-1.1.2 | Exact no match | `README.md` | `readme.md` | No match |
+| PM-1.1.3 | Single wildcard | `*.py` | `main.py` | Match |
+| PM-1.1.4 | Single wildcard nested | `*.py` | `src/main.py` | No match |
+| PM-1.1.5 | Double wildcard | `**/*.py` | `src/main.py` | Match |
+| PM-1.1.6 | Double wildcard deep | `**/*.py` | `src/a/b/c/main.py` | Match |
+| PM-1.1.7 | Double wildcard root | `**/*.py` | `main.py` | Match |
+| PM-1.1.8 | Directory prefix | `src/**/*` | `src/foo.py` | Match |
+| PM-1.1.9 | Directory prefix deep | `src/**/*` | `src/a/b/c.py` | Match |
+| PM-1.1.10 | Directory no match | `src/**/*` | `lib/foo.py` | No match |
+| PM-1.1.11 | Brace expansion | `*.{js,ts}` | `app.ts` | Match |
+| PM-1.1.12 | Brace expansion second | `*.{js,ts}` | `app.js` | Match |
+| PM-1.1.13 | Brace expansion no match | `*.{js,ts}` | `app.py` | No match |
+
+### 1.2 Variable Patterns
+
+| ID | Scenario | Pattern | File | Expected Variables |
+|----|----------|---------|------|-------------------|
+| PM-1.2.1 | Single var path | `src/{path}.py` | `src/foo/bar.py` | `{path: "foo/bar"}` |
+| PM-1.2.2 | Single var name | `src/{name}.py` | `src/utils.py` | `{name: "utils"}` |
+| PM-1.2.3 | Name no nested | `src/{name}.py` | `src/foo/bar.py` | No match |
+| PM-1.2.4 | Two variables | `{dir}/{name}.py` | `src/main.py` | `{dir: "src", name: "main"}` |
+| PM-1.2.5 | Prefix and suffix | `test_{name}_test.py` | `test_foo_test.py` | `{name: "foo"}` |
+| PM-1.2.6 | Nested path | `src/{path}/index.py` | `src/a/b/index.py` | `{path: "a/b"}` |
+| PM-1.2.7 | Explicit multi | `src/{**mod}/main.py` | `src/a/b/c/main.py` | `{mod: "a/b/c"}` |
+| PM-1.2.8 | Explicit single | `src/{*name}.py` | `src/utils.py` | `{name: "utils"}` |
+| PM-1.2.9 | Mixed explicit | `{*dir}/{**path}.py` | `src/a/b/c.py` | `{dir: "src", path: "a/b/c"}` |
+
+### 1.3 Pattern Resolution
+
+| ID | Scenario | Pattern | Variables | Expected Output |
+|----|----------|---------|-----------|-----------------|
+| PM-1.3.1 | Simple substitution | `tests/{path}_test.py` | `{path: "foo"}` | `tests/foo_test.py` |
+| PM-1.3.2 | Nested path | `tests/{path}_test.py` | `{path: "a/b/c"}` | `tests/a/b/c_test.py` |
+| PM-1.3.3 | Multiple vars | `{dir}/test_{name}.py` | `{dir: "tests", name: "foo"}` | `tests/test_foo.py` |
+
+## 2. Instruction Policies
+
+### 2.1 Basic Trigger/Safety
+
+| ID | Scenario | Changed Files | Trigger | Safety | Expected |
+|----|----------|---------------|---------|--------|----------|
+| IP-2.1.1 | Trigger match, no safety | `["src/main.py"]` | `src/**/*.py` | None | Fire |
+| IP-2.1.2 | Trigger match, safety match | `["src/main.py", "README.md"]` | `src/**/*.py` | `README.md` | No fire |
+| IP-2.1.3 | Trigger no match | `["docs/readme.md"]` | `src/**/*.py` | None | No fire |
+| IP-2.1.4 | Multiple triggers, one match | `["lib/utils.py"]` | `["src/**/*.py", "lib/**/*.py"]` | None | Fire |
+| IP-2.1.5 | Safety match only | `["README.md"]` | `src/**/*.py` | `README.md` | No fire |
+| IP-2.1.6 | Multiple safety, one match | `["src/main.py", "CHANGELOG.md"]` | `src/**/*.py` | `["README.md", "CHANGELOG.md"]` | No fire |
+| IP-2.1.7 | Multiple triggers, multiple files | `["src/a.py", "lib/b.py"]` | `["src/**/*.py", "lib/**/*.py"]` | None | Fire |
+
+### 2.2 Compare Modes
+
+```
+Setup: Branch diverged 3 commits ago from main
+- Commit 1: Added src/feature.py
+- Commit 2: Modified src/feature.py
+- Commit 3: Added tests/feature_test.py
+- Unstaged: Modified src/utils.py
+```
+
+| ID | Scenario | compare_to | Expected Changed Files |
+|----|----------|------------|----------------------|
+| IP-2.2.1 | Base comparison | `base` | `["src/feature.py", "tests/feature_test.py", "src/utils.py"]` |
+| IP-2.2.2 | Default tip (main ahead 1) | `default_tip` | All base + main's changes |
+| IP-2.2.3 | Prompt baseline (captured after commit 2) | `prompt` | `["tests/feature_test.py", "src/utils.py"]` |
+
+### 2.3 Promise Tags
+
+Policy names are now derived from filenames (without `.md` extension).
+
+| ID | Scenario | Conversation Contains | Policy File | Expected |
+|----|----------|----------------------|-------------|----------|
+| IP-2.3.1 | Exact promise | `<promise>readme-accuracy</promise>` | `readme-accuracy.md` | Suppressed |
+| IP-2.3.2 | Promise with checkmark | `<promise>✓ readme-accuracy</promise>` | `readme-accuracy.md` | Suppressed |
+| IP-2.3.3 | Case insensitive | `<promise>README-ACCURACY</promise>` | `readme-accuracy.md` | Suppressed |
+| IP-2.3.4 | Whitespace | `<promise>  readme-accuracy  </promise>` | `readme-accuracy.md` | Suppressed |
+| IP-2.3.5 | No promise | (none) | `readme-accuracy.md` | Not suppressed |
+| IP-2.3.6 | Wrong promise | `<promise>other-policy</promise>` | `readme-accuracy.md` | Not suppressed |
+| IP-2.3.7 | Multiple promises | `<promise>a</promise><promise>b</promise>` | `a.md` | Suppressed |
+
+## 3. Correspondence Sets
+
+### 3.1 Two-Pattern Sets
+
+```yaml
+set:
+  - "src/{path}.py"
+  - "tests/{path}_test.py"
+```
+
+| ID | Scenario | Changed Files | Expected |
+|----|----------|---------------|----------|
+| CS-3.1.1 | Both changed | `["src/foo.py", "tests/foo_test.py"]` | No fire (satisfied) |
+| CS-3.1.2 | Only source | `["src/foo.py"]` | Fire (missing test) |
+| CS-3.1.3 | Only test | `["tests/foo_test.py"]` | Fire (missing source) |
+| CS-3.1.4 | Nested both | `["src/a/b.py", "tests/a/b_test.py"]` | No fire |
+| CS-3.1.5 | Nested only source | `["src/a/b.py"]` | Fire |
+| CS-3.1.6 | Unrelated file | `["docs/readme.md"]` | No fire |
+| CS-3.1.7 | Source + unrelated | `["src/foo.py", "docs/readme.md"]` | Fire |
+| CS-3.1.8 | Both + unrelated | `["src/foo.py", "tests/foo_test.py", "docs/readme.md"]` | No fire |
+
+### 3.2 Three-Pattern Sets
+
+```yaml
+set:
+  - "models/{name}.py"
+  - "schemas/{name}.py"
+  - "migrations/{name}.sql"
+```
+
+| ID | Scenario | Changed Files | Expected |
+|----|----------|---------------|----------|
+| CS-3.2.1 | All three | `["models/user.py", "schemas/user.py", "migrations/user.sql"]` | No fire |
+| CS-3.2.2 | Two of three | `["models/user.py", "schemas/user.py"]` | Fire (missing migration) |
+| CS-3.2.3 | One of three | `["models/user.py"]` | Fire (missing 2) |
+| CS-3.2.4 | Different names | `["models/user.py", "schemas/order.py"]` | Fire (both incomplete) |
+
+### 3.3 Edge Cases
+
+| ID | Scenario | Changed Files | Expected |
+|----|----------|---------------|----------|
+| CS-3.3.1 | File matches both patterns | `["src/test_foo_test.py"]` | Depends on pattern specificity |
+| CS-3.3.2 | Empty path variable | (N/A - patterns require content) | Pattern validation error |
+| CS-3.3.3 | Multiple files same pattern | `["src/a.py", "src/b.py"]` | Fire for each without corresponding test |
+
+## 4. Correspondence Pairs
+
+### 4.1 Basic Pairs
+
+```yaml
+pair:
+  trigger: "api/{path}.py"
+  expects: "docs/api/{path}.md"
+```
+
+| ID | Scenario | Changed Files | Expected |
+|----|----------|---------------|----------|
+| CP-4.1.1 | Both changed | `["api/users.py", "docs/api/users.md"]` | No fire |
+| CP-4.1.2 | Only trigger | `["api/users.py"]` | Fire |
+| CP-4.1.3 | Only expected | `["docs/api/users.md"]` | No fire (directional) |
+| CP-4.1.4 | Trigger + unrelated | `["api/users.py", "README.md"]` | Fire |
+| CP-4.1.5 | Expected + unrelated | `["docs/api/users.md", "README.md"]` | No fire |
+
+### 4.2 Multi-Expects Pairs
+
+```yaml
+pair:
+  trigger: "api/{path}.py"
+  expects:
+    - "docs/api/{path}.md"
+    - "openapi/{path}.yaml"
+```
+
+| ID | Scenario | Changed Files | Expected |
+|----|----------|---------------|----------|
+| CP-4.2.1 | All three | `["api/users.py", "docs/api/users.md", "openapi/users.yaml"]` | No fire |
+| CP-4.2.2 | Trigger + one expect | `["api/users.py", "docs/api/users.md"]` | Fire (missing openapi) |
+| CP-4.2.3 | Only trigger | `["api/users.py"]` | Fire (missing both) |
+| CP-4.2.4 | Both expects only | `["docs/api/users.md", "openapi/users.yaml"]` | No fire |
+
+## 5. Command Policies
+
+### 5.1 Basic Commands
+
+```yaml
+- name: "Format Python"
+  trigger: "**/*.py"
+  action:
+    command: "ruff format {file}"
+    run_for: each_match
+```
+
+| ID | Scenario | Changed Files | Expected Behavior |
+|----|----------|---------------|-------------------|
+| CMD-5.1.1 | Single file | `["src/main.py"]` | Run `ruff format src/main.py` |
+| CMD-5.1.2 | Multiple files | `["src/a.py", "src/b.py"]` | Run command for each file |
+| CMD-5.1.3 | Non-matching | `["README.md"]` | No command run |
+
+### 5.2 All Matches Mode
+
+```yaml
+action:
+  command: "eslint --fix {files}"
+  run_for: all_matches
+```
+
+| ID | Scenario | Changed Files | Expected Command |
+|----|----------|---------------|------------------|
+| CMD-5.2.1 | Multiple files | `["a.js", "b.js", "c.js"]` | `eslint --fix a.js b.js c.js` |
+| CMD-5.2.2 | Single file | `["a.js"]` | `eslint --fix a.js` |
+
+### 5.3 Idempotency Verification
+
+| ID | Scenario | First Run | Second Run | Expected Result |
+|----|----------|-----------|------------|-----------------|
+| CMD-5.3.1 | Truly idempotent | Changes files | No changes | Pass |
+| CMD-5.3.2 | Not idempotent | Changes files | Changes files | Fail |
+| CMD-5.3.3 | No changes needed | No changes | (not run) | Pass |
+
+### 5.4 Command Errors
+
+| ID | Scenario | Command Result | Expected |
+|----|----------|----------------|----------|
+| CMD-5.4.1 | Exit code 0 | Success | Pass |
+| CMD-5.4.2 | Exit code 1 | Failure | Fail, show stderr |
+| CMD-5.4.3 | Timeout | Command hangs | Fail, timeout error |
+| CMD-5.4.4 | Command not found | Not executable | Fail, not found error |
+
+## 6. Queue System
+
+### 6.1 Queue Entry Lifecycle
+
+| ID | Scenario | Initial State | Action | Final State |
+|----|----------|---------------|--------|-------------|
+| QS-6.1.1 | New trigger | (none) | Trigger detected | `.queued` |
+| QS-6.1.2 | Safety suppression | `.queued` | Safety pattern matches | `.skipped` |
+| QS-6.1.3 | Prompt addressed | `.queued` | Promise tag found | `.passed` |
+| QS-6.1.4 | Command success | `.queued` | Command passes | `.passed` |
+| QS-6.1.5 | Command failure | `.queued` | Command fails | `.failed` |
+| QS-6.1.6 | Re-trigger same | `.passed` | Same files changed | No new entry |
+| QS-6.1.7 | Re-trigger different | `.passed` | Different files | New `.queued` |
+
+### 6.2 Hash Calculation
+
+| ID | Scenario | Policy | Files | Baseline | Expected Hash Differs? |
+|----|----------|--------|-------|----------|------------------------|
+| QS-6.2.1 | Same everything | PolicyA | `[a.py]` | commit1 | Same hash |
+| QS-6.2.2 | Different files | PolicyA | `[a.py]` vs `[b.py]` | commit1 | Different |
+| QS-6.2.3 | Different baseline | PolicyA | `[a.py]` | commit1 vs commit2 | Different |
+| QS-6.2.4 | Different policy | PolicyA vs PolicyB | `[a.py]` | commit1 | Different |
+
+### 6.3 Queue Cleanup
+
+| ID | Scenario | Entry Age | Entry Status | Expected |
+|----|----------|-----------|--------------|----------|
+| QS-6.3.1 | Old queued | 25 hours | `.queued` | Pruned |
+| QS-6.3.2 | Recent queued | 1 hour | `.queued` | Kept |
+| QS-6.3.3 | Old passed | 2 hours | `.passed` | Pruned |
+| QS-6.3.4 | Recent passed | 30 min | `.passed` | Kept |
+| QS-6.3.5 | Old failed | 25 hours | `.failed` | Pruned |
+
+### 6.4 Concurrent Access
+
+| ID | Scenario | Process A | Process B | Expected |
+|----|----------|-----------|-----------|----------|
+| QS-6.4.1 | Simultaneous create | Creates entry | Creates entry | One wins, other no-ops |
+| QS-6.4.2 | Create during eval | Creating | Evaluating existing | A creates new, B continues |
+| QS-6.4.3 | Both evaluate same | Evaluating | Evaluating | File locking prevents race |
+
+## 7. Output Management
+
+### 7.1 Priority Ordering
+
+```
+Policies:
+- Critical: "Security Review"
+- High: "API Documentation"
+- Normal: "README Accuracy"
+- Low: "Code Style"
+```
+
+| ID | Scenario | Triggered Policies | Expected Order |
+|----|----------|-------------------|----------------|
+| OM-7.1.1 | All priorities | All 4 | Security, API, README, Style |
+| OM-7.1.2 | Mixed | High, Low | API, Style |
+| OM-7.1.3 | Same priority | 3 Normal | Alphabetical within priority |
+
+### 7.2 Output Batching
+
+| ID | Scenario | Triggered Policies | Expected Output |
+|----|----------|-------------------|-----------------|
+| OM-7.2.1 | Single policy | 1 | Full instructions |
+| OM-7.2.2 | Two policies | 2 | Both, numbered |
+| OM-7.2.3 | Many policies | 10 | Batched with summary |
+| OM-7.2.4 | Same type | 3 Source/Test pairs | Grouped under heading |
+
+### 7.3 Deferred Policies
+
+| ID | Scenario | Policy defer Setting | Agent Action | Expected |
+|----|----------|---------------------|--------------|----------|
+| OM-7.3.1 | Deferred, stop | `defer: true` | Stop | Not shown |
+| OM-7.3.2 | Deferred, session end | `defer: true` | Session ends | Shown |
+| OM-7.3.3 | Not deferred | `defer: false` | Stop | Shown |
+
+## 8. Schema Validation
+
+### 8.1 Required Fields
+
+| ID | Scenario | Missing Field | Expected Error |
+|----|----------|---------------|----------------|
+| SV-8.1.1 | Missing name | `name` | "required field 'name'" |
+| SV-8.1.2 | Missing trigger (instruction) | `trigger` | "required 'trigger', 'set', or 'pair'" |
+| SV-8.1.3 | Missing instructions | `instructions` | "required 'instructions' or 'instructions_file'" |
+| SV-8.1.4 | Missing set patterns | `set` is empty | "set requires at least 2 patterns" |
+
+### 8.2 Mutually Exclusive Fields
+
+| ID | Scenario | Fields Present | Expected Error |
+|----|----------|----------------|----------------|
+| SV-8.2.1 | Both instructions types | `instructions` + `instructions_file` | "use one or the other" |
+| SV-8.2.2 | Both trigger types | `trigger` + `set` | "use trigger, set, or pair" |
+| SV-8.2.3 | All trigger types | `trigger` + `set` + `pair` | "use one policy type" |
+
+### 8.3 Pattern Validation
+
+| ID | Scenario | Pattern | Expected Error |
+|----|----------|---------|----------------|
+| SV-8.3.1 | Unclosed brace | `src/{path.py` | "unclosed brace" |
+| SV-8.3.2 | Empty variable | `src/{}.py` | "empty variable name" |
+| SV-8.3.3 | Invalid chars in var | `src/{path/name}.py` | "invalid variable name" |
+| SV-8.3.4 | Duplicate variable | `{path}/{path}.py` | "duplicate variable 'path'" |
+
+### 8.4 Value Validation
+
+| ID | Scenario | Field | Value | Expected Error |
+|----|----------|-------|-------|----------------|
+| SV-8.4.1 | Invalid compare_to | `compare_to` | `"yesterday"` | "must be base, default_tip, or prompt" |
+| SV-8.4.2 | Invalid priority | `priority` | `"urgent"` | "must be critical, high, normal, or low" |
+| SV-8.4.3 | Invalid run_for | `run_for` | `"first_match"` | "must be each_match or all_matches" |
+
+## 9. Integration Tests
+
+### 9.1 End-to-End Instruction Policy
+
+```
+Given: Policy requiring tests for source changes
+When: User modifies src/auth/login.py without test
+Then:
+  1. Stop hook fires
+  2. Detector creates queue entry
+  3. Evaluator returns instructions
+  4. Agent sees policy message
+  5. Agent adds tests
+  6. Agent includes promise tag
+  7. Next stop: queue entry marked passed
+  8. Agent can stop successfully
+```
+
+### 9.2 End-to-End Command Policy
+
+```
+Given: Auto-format policy for Python files
+When: User creates unformatted src/new_file.py
+Then:
+  1. Stop hook fires
+  2. Detector creates queue entry
+  3. Evaluator runs formatter
+  4. Formatter modifies file
+  5. Evaluator verifies idempotency
+  6. Queue entry marked passed
+  7. Agent notified of formatting changes
+```
+
+### 9.3 End-to-End Correspondence Set
+
+```
+Given: Source/test pairing policy
+When: User modifies src/utils.py only
+Then:
+  1. Detector matches src/utils.py to pattern
+  2. Resolver calculates expected tests/utils_test.py
+  3. tests/utils_test.py not in changed files
+  4. Queue entry created for incomplete correspondence
+  5. Evaluator returns instructions
+  6. Agent sees "expected tests/utils_test.py to change"
+```
+
+### 9.4 Multiple Policies Same File
+
+```
+Given:
+  - Policy A: "Format Python" (command)
+  - Policy B: "Test Coverage" (set)
+  - Policy C: "README Accuracy" (instruction)
+When: User modifies src/main.py
+Then:
+  1. All three policies trigger
+  2. Command policy runs first
+  3. Set policy checks for test
+  4. Instruction policy prepares message
+  5. Agent sees batched output with all requirements
+```
+
+### 9.5 Safety Pattern Across Policies
+
+```
+Given:
+  - Policy A: trigger=src/**/*.py, safety=CHANGELOG.md
+  - Policy B: trigger=src/**/*.py, safety=README.md
+When: User modifies src/main.py and CHANGELOG.md
+Then:
+  1. Policy A: safety match, skipped
+  2. Policy B: no safety match, fires
+  3. Only Policy B instructions shown
+```
+
+## 10. Performance Tests
+
+### 10.1 Large File Count
+
+| ID | Scenario | File Count | Expected |
+|----|----------|------------|----------|
+| PT-10.1.1 | Many changed files | 100 | < 1s evaluation |
+| PT-10.1.2 | Very many files | 1000 | < 5s evaluation |
+| PT-10.1.3 | Pattern-heavy | 50 policies, 100 files | < 2s evaluation |
+
+### 10.2 Queue Size
+
+| ID | Scenario | Queue Entries | Expected |
+|----|----------|---------------|----------|
+| PT-10.2.1 | Moderate queue | 100 entries | < 100ms load |
+| PT-10.2.2 | Large queue | 1000 entries | < 500ms load |
+| PT-10.2.3 | Cleanup performance | 10000 old entries | < 1s cleanup |
+
+### 10.3 Pattern Matching
+
+| ID | Scenario | Patterns | Files | Expected |
+|----|----------|----------|-------|----------|
+| PT-10.3.1 | Simple patterns | 10 | 100 | < 10ms |
+| PT-10.3.2 | Complex patterns | 50 with variables | 100 | < 50ms |
+| PT-10.3.3 | Deep recursion | `**/**/**/*.py` | 1000 | < 100ms |
+
+## Test Data Fixtures
+
+### Sample Policy Files
+
+Policies are stored as individual markdown files in `.deepwork/policies/`:
+
+**`.deepwork/policies/readme-accuracy.md`**
+```markdown
+---
+trigger: src/**/*
+safety: README.md
+---
+Please review README.md for accuracy.
+```
+
+**`.deepwork/policies/source-test-pairing.md`**
+```markdown
+---
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+Source and test should change together.
+```
+
+**`.deepwork/policies/api-documentation.md`**
+```markdown
+---
+pair:
+  trigger: api/{module}.py
+  expects: docs/api/{module}.md
+---
+API changes need documentation.
+```
+
+**`.deepwork/policies/python-formatting.md`**
+```markdown
+---
+trigger: "**/*.py"
+action:
+  command: black {file}
+  run_for: each_match
+---
+Auto-formats Python files with Black.
+```
+
+### Sample Queue Entry
+
+```json
+{
+  "policy_name": "source-test-pairing",
+  "trigger_hash": "abc123def456",
+  "status": "queued",
+  "created_at": "2024-01-16T10:00:00Z",
+  "evaluated_at": null,
+  "baseline_ref": "abc123",
+  "trigger_files": ["src/auth/login.py"],
+  "expected_files": ["tests/auth/login_test.py"],
+  "matched_files": [],
+  "action_result": null
+}
+```
+
+### Directory Structure for Tests
+
+```
+.deepwork/
+├── policies/
+│   ├── readme-accuracy.md
+│   ├── source-test-pairing.md
+│   ├── api-documentation.md
+│   └── python-formatting.md
+└── tmp/
+    └── policy/
+        └── queue/
+            └── (queue entries created during tests)
+```

From a0cf300b1347e9b801a21c6bdc87c43980b645b0 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Fri, 16 Jan 2026 11:26:58 -0700
Subject: [PATCH 02/10] Feedback from review

---
 doc/policy_syntax.md        | 2 +-
 doc/policy_system_design.md | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/doc/policy_syntax.md b/doc/policy_syntax.md
index 72654b0..ec1c1a7 100644
--- a/doc/policy_syntax.md
+++ b/doc/policy_syntax.md
@@ -36,7 +36,7 @@ class AuthService:
 trigger: src/**/*
 safety: README.md
 ---
-Source code changed. Please verify README.md is accurate.
+Source code changed. Please verify README.md is accurate. Note that this is called only once even if there are many changes, so verify all changes.
 
 Check that:
 - All public APIs are documented
diff --git a/doc/policy_system_design.md b/doc/policy_system_design.md
index d62d78b..b7a158d 100644
--- a/doc/policy_system_design.md
+++ b/doc/policy_system_design.md
@@ -306,6 +306,14 @@ The following policies require attention:
 1. **Source/Test Pairing**: src/auth/login.py changed without tests/auth/login_test.py
 2. **Source/Test Pairing**: src/api/users.py changed without tests/api/users_test.py
 3. **API Documentation**: api/users.py changed without docs/api/users.md
+ 
+[FEEDBACK] - these sections should all be shorter. Ex:
+## Source/Test Pairings
+src/auth/login.py changed without tests/auth/login_test.py
+src/api/users.py changed without tests/api/users_test.py
+
+## API Documentation
+api/users.py changed without docs/api/users.md
 
 ## Code Quality (1)
 

From 5dfbd8339f32f59fda834f2570963353fda7368e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 18:32:23 +0000
Subject: [PATCH 03/10] Simplify policy system design based on feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Key changes:
- Restructure taxonomy: detection modes (trigger/safety, set, pair) + action types (prompt, command)
- Add required `name` field for human-friendly promise tag display (e.g., "✓ Source/Test Pairing")
- Remove priority and defer features (not needed yet)
- Clarify .deepwork/tmp is gitignored, so cleanup is not critical
- Shorten output format - group by policy name, use simple arrow notation for correspondence
- Update all examples to include name field
---
 doc/policy_syntax.md        | 318 +++++++++++-------------------------
 doc/policy_system_design.md | 126 +++++---------
 doc/test_scenarios.md       |  82 ++++------
 3 files changed, 177 insertions(+), 349 deletions(-)

diff --git a/doc/policy_syntax.md b/doc/policy_syntax.md
index ec1c1a7..382e966 100644
--- a/doc/policy_syntax.md
+++ b/doc/policy_syntax.md
@@ -17,7 +17,7 @@ Policies are stored as individual markdown files with YAML frontmatter:
 
 Each file has:
 - **Frontmatter**: YAML configuration between `---` delimiters
-- **Body**: Instructions (for prompt policies) or description (for command policies)
+- **Body**: Instructions (for prompt actions) or description (for command actions)
 
 This structure enables code files to reference policies:
 ```python
@@ -28,15 +28,16 @@ class AuthService:
 
 ## Quick Reference
 
-### Instruction Policy
+### Simple Trigger with Prompt
 
 `.deepwork/policies/readme-accuracy.md`:
 ```markdown
 ---
+name: README Accuracy
 trigger: src/**/*
 safety: README.md
 ---
-Source code changed. Please verify README.md is accurate. Note that this is called only once even if there are many changes, so verify all changes.
+Source code changed. Please verify README.md is accurate.
 
 Check that:
 - All public APIs are documented
@@ -49,6 +50,7 @@ Check that:
 `.deepwork/policies/source-test-pairing.md`:
 ```markdown
 ---
+name: Source/Test Pairing
 set:
   - src/{path}.py
   - tests/{path}_test.py
@@ -64,6 +66,7 @@ When adding tests, ensure they test actual source code.
 `.deepwork/policies/api-documentation.md`:
 ```markdown
 ---
+name: API Documentation
 pair:
   trigger: api/{path}.py
   expects: docs/api/{path}.md
@@ -76,11 +79,12 @@ When modifying an API endpoint, update its documentation to reflect:
 - New error conditions
 ```
 
-### Command Policy
+### Command Action
 
 `.deepwork/policies/python-formatting.md`:
 ```markdown
 ---
+name: Python Formatting
 trigger: "**/*.py"
 action:
   command: ruff format {file}
@@ -91,212 +95,145 @@ This policy runs `ruff format` on any changed Python files to ensure
 consistent code style across the codebase.
 ```
 
-## Policy Types
+## Policy Structure
 
-### Instruction Policies
+Every policy has two orthogonal aspects:
 
-Instruction policies prompt the AI agent with guidance when certain files change.
+### Detection Mode
 
-**Frontmatter fields:**
-```yaml
----
-trigger: pattern              # Required: file pattern(s) that trigger
-safety: pattern               # Optional: file pattern(s) that suppress
-compare_to: base              # Optional: comparison baseline
-priority: normal              # Optional: output priority
----
-```
+How the policy decides when to fire:
 
-The markdown body contains the instructions shown to the agent.
+| Mode | Field | Description |
+|------|-------|-------------|
+| **Trigger/Safety** | `trigger`, `safety` | Fire when trigger matches and safety doesn't |
+| **Set** | `set` | Fire when file correspondence is incomplete (bidirectional) |
+| **Pair** | `pair` | Fire when file correspondence is incomplete (directional) |
 
-**Example:** `.deepwork/policies/security-review.md`
+### Action Type
 
-```markdown
+What happens when the policy fires:
+
+| Type | Field | Description |
+|------|-------|-------------|
+| **Prompt** (default) | (markdown body) | Show instructions to the agent |
+| **Command** | `action.command` | Run an idempotent command |
+
+## Detection Modes
+
+### Trigger/Safety Mode
+
+The simplest detection mode. Fires when changed files match `trigger` patterns and no changed files match `safety` patterns.
+
+```yaml
 ---
+name: Security Review
 trigger:
   - src/auth/**/*
   - src/crypto/**/*
 safety: SECURITY.md
 compare_to: base
-priority: critical
 ---
-Security-sensitive code has been modified.
-
-Please verify:
-1. No credentials are hardcoded
-2. Input validation is present
-3. Authentication checks are correct
 ```
 
-### Correspondence Sets
+### Set Mode (Bidirectional Correspondence)
 
-Sets define bidirectional relationships between files. When any file in a correspondence group changes, all related files should also change.
+Defines files that should change together. If ANY file in a correspondence group changes, ALL related files should also change.
 
-**Frontmatter fields:**
 ```yaml
 ---
-set:                            # Required: list of corresponding patterns
-  - pattern1/{path}.ext1
-  - pattern2/{path}.ext2
+name: Source/Test Pairing
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
 ---
 ```
 
-The markdown body contains instructions for when correspondence is incomplete.
-
 **How it works:**
 
 1. A file changes that matches one pattern in the set
 2. System extracts the variable portions (e.g., `{path}`)
 3. System generates expected files by substituting into other patterns
 4. If ALL expected files also changed: policy is satisfied (no trigger)
-5. If ANY expected file is missing: policy triggers with instructions
-
-**Example:** `.deepwork/policies/source-test-pairing.md`
-
-```markdown
----
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-Changed: {trigger_file}
-Expected: {expected_files}
-
-Please ensure both source and test are updated.
-```
+5. If ANY expected file is missing: policy fires
 
 If `src/auth/login.py` changes:
 - Extracts `{path}` = `auth/login`
 - Expects `tests/auth/login_test.py` to also change
-- If test didn't change, shows instructions
+- If test didn't change, fires with instructions
 
 If `tests/auth/login_test.py` changes:
 - Extracts `{path}` = `auth/login`
 - Expects `src/auth/login.py` to also change
-- If source didn't change, shows instructions
-
-**Example:** `.deepwork/policies/model-schema-migration.md`
-
-```markdown
----
-set:
-  - models/{name}.py
-  - schemas/{name}.py
-  - migrations/{name}.sql
----
-Models, schemas, and migrations should stay in sync.
-
-When modifying database models, ensure:
-- Schema definitions are updated
-- Migration files are created or updated
-```
+- If source didn't change, fires with instructions
 
-### Correspondence Pairs
+### Pair Mode (Directional Correspondence)
 
-Pairs define directional relationships. Changes to trigger files require corresponding expected files to change, but not vice versa.
+Defines directional relationships. Changes to trigger files require corresponding expected files to change, but not vice versa.
 
-**Frontmatter fields:**
 ```yaml
 ---
+name: API Documentation
 pair:
-  trigger: pattern/{path}.ext     # Required: pattern that triggers
-  expects: pattern/{path}.ext     # Required: expected to also change
+  trigger: api/{module}/{name}.py
+  expects: docs/api/{module}/{name}.md
 ---
 ```
 
-Can also specify multiple expected patterns:
+Can specify multiple expected patterns:
 
 ```yaml
 ---
 pair:
-  trigger: pattern/{path}.ext
+  trigger: api/{path}.py
   expects:
-    - pattern1/{path}.ext
-    - pattern2/{path}.ext
+    - docs/api/{path}.md
+    - schemas/{path}.json
 ---
 ```
 
-**Example:** `.deepwork/policies/api-documentation.md`
-
-```markdown
----
-pair:
-  trigger: api/{module}/{name}.py
-  expects: docs/api/{module}/{name}.md
----
-API endpoint changed without documentation update.
-
-Changed: {trigger_file}
-Please update: {expected_files}
-
-Ensure the documentation covers:
-- Endpoint URL and method
-- Request parameters
-- Response format
-- Error cases
-```
-
 If `api/users/create.py` changes:
 - Expects `docs/api/users/create.md` to also change
-- If doc didn't change, shows instructions
+- If doc didn't change, fires with instructions
 
 If `docs/api/users/create.md` changes alone:
 - No trigger (documentation can be updated independently)
 
-### Command Policies
+## Action Types
 
-Command policies run idempotent commands instead of prompting the agent.
+### Prompt Action (Default)
 
-**Frontmatter fields:**
-```yaml
----
-trigger: pattern                  # Required: files that trigger
-safety: pattern                   # Optional: files that suppress
-action:
-  command: command {file}         # Required: command to run
-  run_for: each_match             # Optional: each_match (default) or all_matches
----
-```
+The markdown body after frontmatter serves as instructions shown to the agent. This is the default when no `action` field is specified.
 
-The markdown body serves as a description of what the command does (shown in logs, not to agent).
+**Template Variables in Instructions:**
 
-**Template Variables in Commands:**
+| Variable | Description |
+|----------|-------------|
+| `{trigger_file}` | The file that triggered the policy |
+| `{trigger_files}` | All files that matched trigger patterns |
+| `{expected_files}` | Expected corresponding files (for sets/pairs) |
 
-| Variable | Description | Available When |
-|----------|-------------|----------------|
-| `{file}` | Single file path | `run_for: each_match` |
-| `{files}` | Space-separated file paths | `run_for: all_matches` |
-| `{repo_root}` | Repository root directory | Always |
+### Command Action
 
-**Example:** `.deepwork/policies/python-formatting.md`
+Runs an idempotent command instead of prompting the agent.
 
-```markdown
+```yaml
 ---
+name: Python Formatting
 trigger: "**/*.py"
 safety: "*.pyi"
 action:
   command: ruff format {file}
   run_for: each_match
 ---
-Automatically formats Python files using ruff.
-
-This ensures consistent code style without requiring manual formatting.
-Stub files (*.pyi) are excluded as they have different formatting rules.
 ```
 
-**Example:** `.deepwork/policies/eslint-check.md`
+**Template Variables in Commands:**
 
-```markdown
----
-trigger: "**/*.{js,ts,tsx}"
-action:
-  command: eslint --fix {files}
-  run_for: all_matches
----
-Runs ESLint with auto-fix on all changed JavaScript/TypeScript files.
-```
+| Variable | Description | Available When |
+|----------|-------------|----------------|
+| `{file}` | Single file path | `run_for: each_match` |
+| `{files}` | Space-separated file paths | `run_for: all_matches` |
+| `{repo_root}` | Repository root directory | Always |
 
 **Idempotency Requirement:**
 
@@ -357,6 +294,16 @@ To explicitly control this, use `{**name}` for multi-segment or `{*name}` for si
 
 ## Field Reference
 
+### name (required)
+
+Human-friendly name for the policy. Displayed in promise tags and output.
+
+```yaml
+---
+name: Source/Test Pairing
+---
+```
+
 ### File Naming
 
 Policy files are named using kebab-case with `.md` extension:
@@ -364,20 +311,18 @@ Policy files are named using kebab-case with `.md` extension:
 - `source-test-pairing.md`
 - `api-documentation.md`
 
-The filename (without extension) serves as the policy's unique identifier for logging and promise tags.
+The filename serves as the policy's identifier in the queue system.
 
-### trigger (instruction/command policies)
+### trigger
 
-File patterns that cause the policy to fire. Can be string or array.
+File patterns that cause the policy to fire (trigger/safety mode). Can be string or array.
 
 ```yaml
 ---
-# Single pattern
 trigger: src/**/*.py
 ---
 
 ---
-# Multiple patterns
 trigger:
   - src/**/*.py
   - lib/**/*.py
@@ -390,21 +335,19 @@ File patterns that suppress the policy. If ANY changed file matches a safety pat
 
 ```yaml
 ---
-# Single pattern
 safety: CHANGELOG.md
 ---
 
 ---
-# Multiple patterns
 safety:
   - CHANGELOG.md
   - docs/**/*
 ---
 ```
 
-### set (correspondence sets)
+### set
 
-List of patterns defining bidirectional file relationships.
+List of patterns defining bidirectional file relationships (set mode).
 
 ```yaml
 ---
@@ -414,9 +357,9 @@ set:
 ---
 ```
 
-### pair (correspondence pairs)
+### pair
 
-Object with `trigger` and `expects` patterns for directional relationships.
+Object with `trigger` and `expects` patterns for directional relationships (pair mode).
 
 ```yaml
 ---
@@ -426,7 +369,6 @@ pair:
 ---
 
 ---
-# Or with multiple expects
 pair:
   trigger: api/{path}.py
   expects:
@@ -435,20 +377,7 @@ pair:
 ---
 ```
 
-### Markdown Body (instructions)
-
-The markdown content after the frontmatter serves as instructions shown to the agent when the policy fires.
-
-**Template Variables in Instructions:**
-
-| Variable | Description |
-|----------|-------------|
-| `{trigger_file}` | The file that triggered the policy |
-| `{trigger_files}` | All files that matched trigger patterns |
-| `{expected_files}` | Expected corresponding files (for sets/pairs) |
-| `{safety_files}` | Files that would suppress the policy |
-
-### action (command policies)
+### action (optional)
 
 Specifies a command to run instead of prompting.
 
@@ -476,33 +405,6 @@ compare_to: prompt
 ---
 ```
 
-### priority (optional)
-
-Controls output ordering and visibility.
-
-| Value | Behavior |
-|-------|----------|
-| `critical` | Always shown first, blocks progress |
-| `high` | Shown prominently |
-| `normal` (default) | Standard display |
-| `low` | Shown in summary, may be collapsed |
-
-```yaml
----
-priority: critical
----
-```
-
-### defer (optional)
-
-When `true`, policy output is deferred to end of session.
-
-```yaml
----
-defer: true
----
-```
-
 ## Complete Examples
 
 ### Example 1: Test Coverage Policy
@@ -510,10 +412,10 @@ defer: true
 `.deepwork/policies/test-coverage.md`:
 ```markdown
 ---
+name: Test Coverage
 set:
   - src/{path}.py
   - tests/{path}_test.py
-compare_to: base
 ---
 Source code was modified without corresponding test updates.
 
@@ -522,7 +424,7 @@ Expected test: {expected_files}
 
 Please either:
 1. Add/update tests for the changed code
-2. Explain why tests are not needed (and mark with <promise>)
+2. Explain why tests are not needed
 ```
 
 ### Example 2: Documentation Sync
@@ -530,18 +432,16 @@ Please either:
 `.deepwork/policies/api-documentation-sync.md`:
 ```markdown
 ---
+name: API Documentation Sync
 pair:
   trigger: src/api/{module}/{endpoint}.py
   expects:
     - docs/api/{module}/{endpoint}.md
     - openapi/{module}.yaml
-priority: high
 ---
 API endpoint changed. Please update:
 - Documentation: {expected_files}
 - Ensure OpenAPI spec is current
-
-If this is an internal-only change, mark as addressed.
 ```
 
 ### Example 3: Auto-formatting Pipeline
@@ -549,6 +449,7 @@ If this is an internal-only change, mark as addressed.
 `.deepwork/policies/python-black-formatting.md`:
 ```markdown
 ---
+name: Python Black Formatting
 trigger: "**/*.py"
 safety:
   - "**/*.pyi"
@@ -564,22 +465,12 @@ Excludes:
 - Database migration files
 ```
 
-`.deepwork/policies/typescript-prettier.md`:
-```markdown
----
-trigger: "**/*.{ts,tsx}"
-action:
-  command: prettier --write {file}
-  run_for: each_match
----
-Formats TypeScript files using Prettier.
-```
-
 ### Example 4: Multi-file Correspondence
 
 `.deepwork/policies/full-stack-feature-sync.md`:
 ```markdown
 ---
+name: Full Stack Feature Sync
 set:
   - backend/api/{feature}/routes.py
   - backend/api/{feature}/models.py
@@ -593,9 +484,6 @@ When modifying a feature, ensure:
 - Backend models are updated
 - Frontend API client is updated
 - Frontend components are updated
-
-Changed: {trigger_files}
-Expected: {expected_files}
 ```
 
 ### Example 5: Conditional Safety
@@ -603,15 +491,13 @@ Expected: {expected_files}
 `.deepwork/policies/version-bump-required.md`:
 ```markdown
 ---
+name: Version Bump Required
 trigger:
   - src/**/*.py
   - pyproject.toml
 safety:
   - pyproject.toml
   - CHANGELOG.md
-compare_to: base
-priority: low
-defer: true
 ---
 Code changes detected. Before merging, ensure:
 - Version is bumped in pyproject.toml (if needed)
@@ -623,20 +509,14 @@ or CHANGELOG.md, as that indicates you're handling versioning.
 
 ## Promise Tags
 
-When a policy fires but should be dismissed, use promise tags in the conversation:
-
-```
-<promise>policy-filename</promise>
-```
-
-Use the policy filename (without `.md` extension) as the identifier:
+When a policy fires but should be dismissed, use promise tags in the conversation. The tag content should be human-readable, using the policy's `name` field with a checkmark:
 
 ```
-<promise>test-coverage</promise>
-<promise>api-documentation-sync</promise>
+<promise>✓ Source/Test Pairing</promise>
+<promise>✓ API Documentation Sync</promise>
 ```
 
-This tells the system the policy has been addressed (either by action or explicit acknowledgment).
+The checkmark and friendly name make promise tags easy to read when displayed in the conversation. The system matches promise tags to policies using case-insensitive comparison of the `name` field (ignoring the checkmark prefix).
 
 ## Validation
 
diff --git a/doc/policy_system_design.md b/doc/policy_system_design.md
index b7a158d..93f4989 100644
--- a/doc/policy_system_design.md
+++ b/doc/policy_system_design.md
@@ -11,26 +11,37 @@ The deepwork policy system enables automated enforcement of development standard
 
 ## Core Concepts
 
-### Policy Types
+### Policy Structure
 
-The system supports three policy types:
+Every policy has two orthogonal aspects:
 
-| Type | Purpose | Trigger Direction |
-|------|---------|-------------------|
-| **Instruction policies** | Prompt agent with instructions | Any matched file |
-| **Command policies** | Run idempotent commands | Any matched file |
-| **Correspondence policies** | Enforce file relationships | When relationship is incomplete |
+**Detection Mode** - How the policy decides when to fire:
 
-### File Correspondence
+| Mode | Field | Description |
+|------|-------|-------------|
+| **Trigger/Safety** | `trigger`, `safety` | Fire when trigger matches and safety doesn't |
+| **Set** | `set` | Fire when file correspondence is incomplete (bidirectional) |
+| **Pair** | `pair` | Fire when file correspondence is incomplete (directional) |
 
-Correspondence policies define relationships between files that should change together.
+**Action Type** - What happens when the policy fires:
 
-**Sets (Bidirectional)**
+| Type | Field | Description |
+|------|-------|-------------|
+| **Prompt** (default) | (markdown body) | Show instructions to the agent |
+| **Command** | `action.command` | Run an idempotent command |
+
+### Detection Modes
+
+**Trigger/Safety Mode**
+- Simplest mode: fire when files match `trigger` and none match `safety`
+- Good for general checks like "source changed, verify README"
+
+**Set Mode (Bidirectional Correspondence)**
 - Define N patterns that share a common variable path
 - If ANY file matching one pattern changes, ALL corresponding files should change
 - Example: Source files and their tests
 
-**Pairs (Directional)**
+**Pair Mode (Directional Correspondence)**
 - Define a trigger pattern and one or more expected patterns
 - Changes to trigger files require corresponding expected files to also change
 - Changes to expected files alone do not trigger the policy
@@ -51,22 +62,14 @@ Special variable names:
 - `{**}` - Explicit multi-segment wildcard
 - `{*}` - Explicit single-segment wildcard
 
-### Actions
-
-Policies can specify two types of actions:
+### Action Types
 
 **Prompt Action (default)**
-```yaml
-action:
-  type: prompt
-  instructions: |
-    Please review the changes...
-```
+The markdown body of the policy file serves as instructions shown to the agent.
 
 **Command Action**
 ```yaml
 action:
-  type: command
   command: "ruff format {file}"
   run_for: each_match
 ```
@@ -156,9 +159,7 @@ The queue persists policy trigger state in `.deepwork/tmp/policy/queue/`:
 ```
 
 **Queue Cleanup**:
-- Entries older than 24 hours are automatically pruned
-- `passed` and `skipped` entries are pruned after 1 hour
-- Manual cleanup via `deepwork policy clear-queue`
+Since `.deepwork/tmp/` is gitignored, queue entries are transient local state. No aggressive cleanup is required—entries can accumulate without causing issues. The directory can be safely deleted at any time to reset state.
 
 ### Evaluator
 
@@ -296,61 +297,27 @@ When many policies trigger, the agent receives excessive output, degrading perfo
 ### Solution
 
 **1. Output Batching**
-Group related policies into single messages:
+Group related policies into concise sections:
 
 ```
 The following policies require attention:
 
-## File Correspondence Issues (3)
-
-1. **Source/Test Pairing**: src/auth/login.py changed without tests/auth/login_test.py
-2. **Source/Test Pairing**: src/api/users.py changed without tests/api/users_test.py
-3. **API Documentation**: api/users.py changed without docs/api/users.md
- 
-[FEEDBACK] - these sections should all be shorter. Ex:
-## Source/Test Pairings
-src/auth/login.py changed without tests/auth/login_test.py
-src/api/users.py changed without tests/api/users_test.py
+## Source/Test Pairing
+src/auth/login.py → tests/auth/login_test.py
+src/api/users.py → tests/api/users_test.py
 
 ## API Documentation
-api/users.py changed without docs/api/users.md
-
-## Code Quality (1)
-
-4. **README Accuracy**: Source files changed, please verify README.md
-```
-
-**2. Priority Levels**
-Policies can specify priority (critical, high, normal, low):
-
-```yaml
-- name: "Security Review"
-  trigger: "src/auth/**/*"
-  priority: critical
-```
+api/users.py → docs/api/users.md
 
-Only critical and high priority shown immediately. Normal/low shown in summary.
-
-**3. Deferred Policies**
-Low-priority policies can be deferred to end of session:
-
-```yaml
-- name: "Documentation Check"
-  trigger: "src/**/*"
-  priority: low
-  defer: true  # Show at session end, not immediately
-```
-
-**4. Collapsed Instructions**
-Long instructions are truncated with expansion available:
-
-```
 ## README Accuracy
+Source files changed. Verify README.md is accurate.
+```
 
-Source code changed. Please verify README.md is accurate.
+**2. Grouped by Policy Name**
+Multiple violations of the same policy are grouped together under a single heading, keeping output compact.
 
-[+] Show full instructions (15 lines)
-```
+**3. Minimal Decoration**
+Avoid excessive formatting, numbering, or emphasis. Use simple arrow notation for correspondence violations.
 
 ## State Persistence
 
@@ -363,7 +330,7 @@ Source code changed. Please verify README.md is accurate.
 │   ├── source-test-pairing.md
 │   ├── api-documentation.md
 │   └── python-formatting.md
-├── tmp/
+├── tmp/                     # GITIGNORED - transient state
 │   └── policy/
 │       ├── queue/           # Queue entries
 │       │   ├── abc123.queued.json
@@ -375,15 +342,17 @@ Source code changed. Please verify README.md is accurate.
 └── policy_state.json        # Session state summary
 ```
 
+**Important:** The entire `.deepwork/tmp/` directory is gitignored. All queue entries, baselines, and caches are local transient state that is not committed. This means cleanup is not critical—files can accumulate and will be naturally cleaned when the directory is deleted or the repo is re-cloned.
+
 ### Policy File Format
 
 Each policy is a markdown file with YAML frontmatter:
 
 ```markdown
 ---
+name: README Accuracy
 trigger: src/**/*.py
 safety: README.md
-priority: normal
 ---
 Instructions shown to the agent when this policy fires.
 
@@ -425,16 +394,10 @@ Multiple baselines can exist for different prompts in a session.
     ┌─────────┐   ┌─────────┐   ┌─────────┐
     │ .passed │   │ .failed │   │.skipped │
     └─────────┘   └─────────┘   └─────────┘
-         │             │             │
-         └─────────────┼─────────────┘
-                       │
-                       ▼
-                  ┌─────────┐
-                  │ Pruned  │
-                  │(cleanup)│
-                  └─────────┘
 ```
 
+Terminal states persist in `.deepwork/tmp/` (gitignored) until manually cleared or the directory is deleted.
+
 ## Error Handling
 
 ### Pattern Errors
@@ -508,10 +471,7 @@ In `.deepwork/config.yml`:
 policy:
   enabled: true
   policies_dir: .deepwork/policies  # Can be customized
-  queue_retention_hours: 24
-  max_queued_entries: 100
-  output_mode: batched  # batched, individual, summary
-  priority_threshold: normal  # Show this priority and above
+  output_mode: batched  # batched or individual
 ```
 
 ## Performance Considerations
diff --git a/doc/test_scenarios.md b/doc/test_scenarios.md
index c9460f7..0651ad2 100644
--- a/doc/test_scenarios.md
+++ b/doc/test_scenarios.md
@@ -76,17 +76,17 @@ Setup: Branch diverged 3 commits ago from main
 
 ### 2.3 Promise Tags
 
-Policy names are now derived from filenames (without `.md` extension).
-
-| ID | Scenario | Conversation Contains | Policy File | Expected |
-|----|----------|----------------------|-------------|----------|
-| IP-2.3.1 | Exact promise | `<promise>readme-accuracy</promise>` | `readme-accuracy.md` | Suppressed |
-| IP-2.3.2 | Promise with checkmark | `<promise>✓ readme-accuracy</promise>` | `readme-accuracy.md` | Suppressed |
-| IP-2.3.3 | Case insensitive | `<promise>README-ACCURACY</promise>` | `readme-accuracy.md` | Suppressed |
-| IP-2.3.4 | Whitespace | `<promise>  readme-accuracy  </promise>` | `readme-accuracy.md` | Suppressed |
-| IP-2.3.5 | No promise | (none) | `readme-accuracy.md` | Not suppressed |
-| IP-2.3.6 | Wrong promise | `<promise>other-policy</promise>` | `readme-accuracy.md` | Not suppressed |
-| IP-2.3.7 | Multiple promises | `<promise>a</promise><promise>b</promise>` | `a.md` | Suppressed |
+Promise tags use the policy's `name` field (not filename) with a checkmark prefix for human readability.
+
+| ID | Scenario | Conversation Contains | Policy `name` | Expected |
+|----|----------|----------------------|---------------|----------|
+| IP-2.3.1 | Standard promise | `<promise>✓ README Accuracy</promise>` | `README Accuracy` | Suppressed |
+| IP-2.3.2 | Without checkmark | `<promise>README Accuracy</promise>` | `README Accuracy` | Suppressed |
+| IP-2.3.3 | Case insensitive | `<promise>✓ readme accuracy</promise>` | `README Accuracy` | Suppressed |
+| IP-2.3.4 | Whitespace | `<promise>  ✓ README Accuracy  </promise>` | `README Accuracy` | Suppressed |
+| IP-2.3.5 | No promise | (none) | `README Accuracy` | Not suppressed |
+| IP-2.3.6 | Wrong promise | `<promise>✓ Other Policy</promise>` | `README Accuracy` | Not suppressed |
+| IP-2.3.7 | Multiple promises | `<promise>✓ A</promise><promise>✓ B</promise>` | `A` | Suppressed |
 
 ## 3. Correspondence Sets
 
@@ -259,38 +259,22 @@ action:
 
 ## 7. Output Management
 
-### 7.1 Priority Ordering
-
-```
-Policies:
-- Critical: "Security Review"
-- High: "API Documentation"
-- Normal: "README Accuracy"
-- Low: "Code Style"
-```
-
-| ID | Scenario | Triggered Policies | Expected Order |
-|----|----------|-------------------|----------------|
-| OM-7.1.1 | All priorities | All 4 | Security, API, README, Style |
-| OM-7.1.2 | Mixed | High, Low | API, Style |
-| OM-7.1.3 | Same priority | 3 Normal | Alphabetical within priority |
-
-### 7.2 Output Batching
+### 7.1 Output Batching
 
 | ID | Scenario | Triggered Policies | Expected Output |
 |----|----------|-------------------|-----------------|
-| OM-7.2.1 | Single policy | 1 | Full instructions |
-| OM-7.2.2 | Two policies | 2 | Both, numbered |
-| OM-7.2.3 | Many policies | 10 | Batched with summary |
-| OM-7.2.4 | Same type | 3 Source/Test pairs | Grouped under heading |
+| OM-7.1.1 | Single policy | 1 | Full instructions |
+| OM-7.1.2 | Two policies | 2 | Both, grouped |
+| OM-7.1.3 | Many policies | 10 | Batched by policy name |
+| OM-7.1.4 | Same policy multiple files | 3 Source/Test pairs | Grouped under single heading |
 
-### 7.3 Deferred Policies
+### 7.2 Output Format
 
-| ID | Scenario | Policy defer Setting | Agent Action | Expected |
-|----|----------|---------------------|--------------|----------|
-| OM-7.3.1 | Deferred, stop | `defer: true` | Stop | Not shown |
-| OM-7.3.2 | Deferred, session end | `defer: true` | Session ends | Shown |
-| OM-7.3.3 | Not deferred | `defer: false` | Stop | Shown |
+| ID | Scenario | Input | Expected Format |
+|----|----------|-------|-----------------|
+| OM-7.2.1 | Correspondence violation | `src/foo.py` missing `tests/foo_test.py` | `src/foo.py → tests/foo_test.py` |
+| OM-7.2.2 | Multiple same policy | 3 correspondence violations | Single heading, 3 lines |
+| OM-7.2.3 | Instruction policy | Source files changed | Short summary + instructions |
 
 ## 8. Schema Validation
 
@@ -299,17 +283,17 @@ Policies:
 | ID | Scenario | Missing Field | Expected Error |
 |----|----------|---------------|----------------|
 | SV-8.1.1 | Missing name | `name` | "required field 'name'" |
-| SV-8.1.2 | Missing trigger (instruction) | `trigger` | "required 'trigger', 'set', or 'pair'" |
-| SV-8.1.3 | Missing instructions | `instructions` | "required 'instructions' or 'instructions_file'" |
+| SV-8.1.2 | Missing detection mode | no `trigger`, `set`, or `pair` | "must have 'trigger', 'set', or 'pair'" |
+| SV-8.1.3 | Missing markdown body | empty body (prompt action) | "instruction policies require markdown body" |
 | SV-8.1.4 | Missing set patterns | `set` is empty | "set requires at least 2 patterns" |
 
 ### 8.2 Mutually Exclusive Fields
 
 | ID | Scenario | Fields Present | Expected Error |
 |----|----------|----------------|----------------|
-| SV-8.2.1 | Both instructions types | `instructions` + `instructions_file` | "use one or the other" |
-| SV-8.2.2 | Both trigger types | `trigger` + `set` | "use trigger, set, or pair" |
-| SV-8.2.3 | All trigger types | `trigger` + `set` + `pair` | "use one policy type" |
+| SV-8.2.1 | Both trigger and set | `trigger` + `set` | "use trigger, set, or pair" |
+| SV-8.2.2 | Both trigger and pair | `trigger` + `pair` | "use trigger, set, or pair" |
+| SV-8.2.3 | All detection modes | `trigger` + `set` + `pair` | "use only one detection mode" |
 
 ### 8.3 Pattern Validation
 
@@ -325,8 +309,7 @@ Policies:
 | ID | Scenario | Field | Value | Expected Error |
 |----|----------|-------|-------|----------------|
 | SV-8.4.1 | Invalid compare_to | `compare_to` | `"yesterday"` | "must be base, default_tip, or prompt" |
-| SV-8.4.2 | Invalid priority | `priority` | `"urgent"` | "must be critical, high, normal, or low" |
-| SV-8.4.3 | Invalid run_for | `run_for` | `"first_match"` | "must be each_match or all_matches" |
+| SV-8.4.2 | Invalid run_for | `run_for` | `"first_match"` | "must be each_match or all_matches" |
 
 ## 9. Integration Tests
 
@@ -439,6 +422,7 @@ Policies are stored as individual markdown files in `.deepwork/policies/`:
 **`.deepwork/policies/readme-accuracy.md`**
 ```markdown
 ---
+name: README Accuracy
 trigger: src/**/*
 safety: README.md
 ---
@@ -448,6 +432,7 @@ Please review README.md for accuracy.
 **`.deepwork/policies/source-test-pairing.md`**
 ```markdown
 ---
+name: Source/Test Pairing
 set:
   - src/{path}.py
   - tests/{path}_test.py
@@ -458,6 +443,7 @@ Source and test should change together.
 **`.deepwork/policies/api-documentation.md`**
 ```markdown
 ---
+name: API Documentation
 pair:
   trigger: api/{module}.py
   expects: docs/api/{module}.md
@@ -468,6 +454,7 @@ API changes need documentation.
 **`.deepwork/policies/python-formatting.md`**
 ```markdown
 ---
+name: Python Formatting
 trigger: "**/*.py"
 action:
   command: black {file}
@@ -480,7 +467,8 @@ Auto-formats Python files with Black.
 
 ```json
 {
-  "policy_name": "source-test-pairing",
+  "policy_name": "Source/Test Pairing",
+  "policy_file": "source-test-pairing.md",
   "trigger_hash": "abc123def456",
   "status": "queued",
   "created_at": "2024-01-16T10:00:00Z",
@@ -502,7 +490,7 @@ Auto-formats Python files with Black.
 │   ├── source-test-pairing.md
 │   ├── api-documentation.md
 │   └── python-formatting.md
-└── tmp/
+└── tmp/                         # GITIGNORED
     └── policy/
         └── queue/
             └── (queue entries created during tests)

From 113a5eef2a03c80c52e124991be0699c37830762 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 18:41:30 +0000
Subject: [PATCH 04/10] Remove idempotency verification and unused output_mode
 config

- Don't enforce idempotency, just document it as expected behavior
- Give lint formatters (black, ruff, prettier) as good examples
- Remove output_mode from config (not referenced elsewhere)
- Remove idempotency verification test scenarios
---
 doc/policy_syntax.md        |  6 +-----
 doc/policy_system_design.md |  3 +--
 doc/test_scenarios.md       | 18 +++++-------------
 3 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/doc/policy_syntax.md b/doc/policy_syntax.md
index 382e966..4914a8d 100644
--- a/doc/policy_syntax.md
+++ b/doc/policy_syntax.md
@@ -237,11 +237,7 @@ action:
 
 **Idempotency Requirement:**
 
-Commands MUST be idempotent. The system verifies this by:
-1. Running the command
-2. Checking for changes
-3. If changes occurred, running again
-4. If more changes occur, marking as failed
+Commands should be idempotent—running them multiple times produces the same result. Lint formatters like `black`, `ruff format`, and `prettier` are good examples: they produce consistent output regardless of how many times they run.
 
 ## Pattern Syntax
 
diff --git a/doc/policy_system_design.md b/doc/policy_system_design.md
index 93f4989..d15e65b 100644
--- a/doc/policy_system_design.md
+++ b/doc/policy_system_design.md
@@ -74,7 +74,7 @@ action:
   run_for: each_match
 ```
 
-Command actions execute idempotent commands. The system verifies idempotency by running the command twice and checking that no additional changes occur.
+Command actions should be idempotent—running them multiple times produces the same result. Lint formatters like `black`, `ruff format`, and `prettier` are good examples.
 
 ## Architecture
 
@@ -471,7 +471,6 @@ In `.deepwork/config.yml`:
 policy:
   enabled: true
   policies_dir: .deepwork/policies  # Can be customized
-  output_mode: batched  # batched or individual
 ```
 
 ## Performance Considerations
diff --git a/doc/test_scenarios.md b/doc/test_scenarios.md
index 0651ad2..9ef03c0 100644
--- a/doc/test_scenarios.md
+++ b/doc/test_scenarios.md
@@ -199,22 +199,14 @@ action:
 | CMD-5.2.1 | Multiple files | `["a.js", "b.js", "c.js"]` | `eslint --fix a.js b.js c.js` |
 | CMD-5.2.2 | Single file | `["a.js"]` | `eslint --fix a.js` |
 
-### 5.3 Idempotency Verification
-
-| ID | Scenario | First Run | Second Run | Expected Result |
-|----|----------|-----------|------------|-----------------|
-| CMD-5.3.1 | Truly idempotent | Changes files | No changes | Pass |
-| CMD-5.3.2 | Not idempotent | Changes files | Changes files | Fail |
-| CMD-5.3.3 | No changes needed | No changes | (not run) | Pass |
-
-### 5.4 Command Errors
+### 5.3 Command Errors
 
 | ID | Scenario | Command Result | Expected |
 |----|----------|----------------|----------|
-| CMD-5.4.1 | Exit code 0 | Success | Pass |
-| CMD-5.4.2 | Exit code 1 | Failure | Fail, show stderr |
-| CMD-5.4.3 | Timeout | Command hangs | Fail, timeout error |
-| CMD-5.4.4 | Command not found | Not executable | Fail, not found error |
+| CMD-5.3.1 | Exit code 0 | Success | Pass |
+| CMD-5.3.2 | Exit code 1 | Failure | Fail, show stderr |
+| CMD-5.3.3 | Timeout | Command hangs | Fail, timeout error |
+| CMD-5.3.4 | Command not found | Not executable | Fail, not found error |
 
 ## 6. Queue System
 

From 549cfa5221528f67ac629f9f8367911f197d30a2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 18:53:10 +0000
Subject: [PATCH 05/10] Implement policy system v2 with sets, pairs, and
 command actions

This implements the redesigned policy system with:

- Detection modes: trigger/safety (default), set (bidirectional), pair (directional)
- Action types: prompt (show instructions), command (run idempotent command)
- Variable pattern matching: {path} for multi-segment, {name} for single-segment
- Queue system in .deepwork/tmp/policy/queue/ for state tracking
- Frontmatter markdown format for policy files in .deepwork/policies/

New core modules:
- pattern_matcher.py: Variable pattern matching with regex
- policy_queue.py: Queue system for policy state persistence
- command_executor.py: Command action execution with substitution

Updates to existing modules:
- policy_parser.py: v2 Policy class with detection modes and action types
- policy_check.py: Uses new v2 system with queue deduplication
- evaluate_policies.py: Updated for v1 backward compatibility
- policy_schema.py: New frontmatter schema for v2 format

Tests updated to work with both v1 and v2 APIs.
---
 src/deepwork/core/command_executor.py   | 169 +++++++
 src/deepwork/core/pattern_matcher.py    | 271 +++++++++++
 src/deepwork/core/policy_parser.py      | 616 ++++++++++++++++++------
 src/deepwork/core/policy_queue.py       | 321 ++++++++++++
 src/deepwork/hooks/evaluate_policies.py |  44 +-
 src/deepwork/hooks/policy_check.py      | 229 +++++++--
 src/deepwork/schemas/policy_schema.py   | 112 ++++-
 tests/unit/test_evaluate_policies.py    |  10 +-
 tests/unit/test_policy_parser.py        |  79 ++-
 9 files changed, 1638 insertions(+), 213 deletions(-)
 create mode 100644 src/deepwork/core/command_executor.py
 create mode 100644 src/deepwork/core/pattern_matcher.py
 create mode 100644 src/deepwork/core/policy_queue.py

diff --git a/src/deepwork/core/command_executor.py b/src/deepwork/core/command_executor.py
new file mode 100644
index 0000000..7db8ee2
--- /dev/null
+++ b/src/deepwork/core/command_executor.py
@@ -0,0 +1,169 @@
+"""Execute command actions for policies."""
+
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+
+from deepwork.core.policy_parser import CommandAction
+
+
+@dataclass
+class CommandResult:
+    """Result of executing a command."""
+
+    success: bool
+    exit_code: int
+    stdout: str
+    stderr: str
+    command: str  # The actual command that was run
+
+
+def substitute_command_variables(
+    command_template: str,
+    file: str | None = None,
+    files: list[str] | None = None,
+    repo_root: Path | None = None,
+) -> str:
+    """
+    Substitute template variables in a command string.
+
+    Variables:
+    - {file} - Single file path
+    - {files} - Space-separated file paths
+    - {repo_root} - Repository root directory
+
+    Args:
+        command_template: Command string with {var} placeholders
+        file: Single file path (for run_for: each_match)
+        files: List of file paths (for run_for: all_matches)
+        repo_root: Repository root path
+
+    Returns:
+        Command string with variables substituted
+    """
+    result = command_template
+
+    if file is not None:
+        result = result.replace("{file}", file)
+
+    if files is not None:
+        result = result.replace("{files}", " ".join(files))
+
+    if repo_root is not None:
+        result = result.replace("{repo_root}", str(repo_root))
+
+    return result
+
+
+def execute_command(
+    command: str,
+    cwd: Path | None = None,
+    timeout: int = 60,
+) -> CommandResult:
+    """
+    Execute a command and capture output.
+
+    Args:
+        command: Command string to execute
+        cwd: Working directory (defaults to current directory)
+        timeout: Timeout in seconds
+
+    Returns:
+        CommandResult with execution details
+    """
+    try:
+        # Run command as shell to support pipes, etc.
+        result = subprocess.run(
+            command,
+            shell=True,
+            cwd=cwd,
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+        )
+
+        return CommandResult(
+            success=result.returncode == 0,
+            exit_code=result.returncode,
+            stdout=result.stdout,
+            stderr=result.stderr,
+            command=command,
+        )
+
+    except subprocess.TimeoutExpired:
+        return CommandResult(
+            success=False,
+            exit_code=-1,
+            stdout="",
+            stderr=f"Command timed out after {timeout} seconds",
+            command=command,
+        )
+    except Exception as e:
+        return CommandResult(
+            success=False,
+            exit_code=-1,
+            stdout="",
+            stderr=str(e),
+            command=command,
+        )
+
+
+def run_command_action(
+    action: CommandAction,
+    trigger_files: list[str],
+    repo_root: Path | None = None,
+) -> list[CommandResult]:
+    """
+    Run a command action for the given trigger files.
+
+    Args:
+        action: CommandAction configuration
+        trigger_files: Files that triggered the policy
+        repo_root: Repository root path
+
+    Returns:
+        List of CommandResult (one per command execution)
+    """
+    results: list[CommandResult] = []
+
+    if action.run_for == "each_match":
+        # Run command for each file individually
+        for file_path in trigger_files:
+            command = substitute_command_variables(
+                action.command,
+                file=file_path,
+                repo_root=repo_root,
+            )
+            result = execute_command(command, cwd=repo_root)
+            results.append(result)
+
+    elif action.run_for == "all_matches":
+        # Run command once with all files
+        command = substitute_command_variables(
+            action.command,
+            files=trigger_files,
+            repo_root=repo_root,
+        )
+        result = execute_command(command, cwd=repo_root)
+        results.append(result)
+
+    return results
+
+
+def all_commands_succeeded(results: list[CommandResult]) -> bool:
+    """Check if all command executions succeeded."""
+    return all(r.success for r in results)
+
+
+def format_command_errors(results: list[CommandResult]) -> str:
+    """Format error messages from failed commands."""
+    errors: list[str] = []
+    for result in results:
+        if not result.success:
+            msg = f"Command failed: {result.command}\n"
+            if result.stderr:
+                msg += f"Error: {result.stderr}\n"
+            if result.exit_code != 0:
+                msg += f"Exit code: {result.exit_code}\n"
+            errors.append(msg)
+    return "\n".join(errors)
diff --git a/src/deepwork/core/pattern_matcher.py b/src/deepwork/core/pattern_matcher.py
new file mode 100644
index 0000000..215b1d9
--- /dev/null
+++ b/src/deepwork/core/pattern_matcher.py
@@ -0,0 +1,271 @@
+"""Pattern matching with variable extraction for policy file correspondence."""
+
+import re
+from dataclasses import dataclass
+from fnmatch import fnmatch
+
+
+class PatternError(Exception):
+    """Exception raised for invalid pattern syntax."""
+
+    pass
+
+
+@dataclass
+class MatchResult:
+    """Result of matching a file against a pattern."""
+
+    matched: bool
+    variables: dict[str, str]  # Captured variable values
+
+    @classmethod
+    def no_match(cls) -> "MatchResult":
+        return cls(matched=False, variables={})
+
+    @classmethod
+    def match(cls, variables: dict[str, str] | None = None) -> "MatchResult":
+        return cls(matched=True, variables=variables or {})
+
+
+def validate_pattern(pattern: str) -> None:
+    """
+    Validate pattern syntax.
+
+    Raises:
+        PatternError: If pattern has invalid syntax
+    """
+    # Check for unbalanced braces
+    brace_depth = 0
+    for i, char in enumerate(pattern):
+        if char == "{":
+            brace_depth += 1
+        elif char == "}":
+            brace_depth -= 1
+            if brace_depth < 0:
+                raise PatternError(f"Unmatched closing brace at position {i}")
+
+    if brace_depth > 0:
+        raise PatternError("Unclosed brace in pattern")
+
+    # Extract and validate variable names
+    var_pattern = r"\{([^}]*)\}"
+    seen_vars: set[str] = set()
+
+    for match in re.finditer(var_pattern, pattern):
+        var_name = match.group(1)
+
+        # Check for empty variable name
+        if not var_name:
+            raise PatternError("Empty variable name in pattern")
+
+        # Strip leading ** or * for validation
+        clean_name = var_name.lstrip("*")
+        if not clean_name:
+            # Just {*} or {**} is valid
+            continue
+
+        # Check for invalid characters in variable name
+        if "/" in clean_name or "\\" in clean_name:
+            raise PatternError(f"Invalid character in variable name: {var_name}")
+
+        # Check for duplicates (use clean name for comparison)
+        if clean_name in seen_vars:
+            raise PatternError(f"Duplicate variable: {clean_name}")
+        seen_vars.add(clean_name)
+
+
+def pattern_to_regex(pattern: str) -> tuple[str, list[str]]:
+    """
+    Convert a pattern with {var} placeholders to a regex.
+
+    Variables:
+    - {path} or {**name} - Matches multiple path segments (.+)
+    - {name} or {*name} - Matches single path segment ([^/]+)
+
+    Args:
+        pattern: Pattern string like "src/{path}.py"
+
+    Returns:
+        Tuple of (regex_pattern, list_of_variable_names)
+
+    Raises:
+        PatternError: If pattern has invalid syntax
+    """
+    validate_pattern(pattern)
+
+    # Normalize path separators
+    pattern = pattern.replace("\\", "/")
+
+    result: list[str] = []
+    var_names: list[str] = []
+    pos = 0
+
+    # Parse pattern segments
+    while pos < len(pattern):
+        # Look for next variable
+        brace_start = pattern.find("{", pos)
+
+        if brace_start == -1:
+            # No more variables, escape the rest
+            result.append(re.escape(pattern[pos:]))
+            break
+
+        # Escape literal part before variable
+        if brace_start > pos:
+            result.append(re.escape(pattern[pos:brace_start]))
+
+        # Find end of variable
+        brace_end = pattern.find("}", brace_start)
+        if brace_end == -1:
+            raise PatternError("Unclosed brace in pattern")
+
+        var_spec = pattern[brace_start + 1 : brace_end]
+
+        # Determine variable type and name
+        if var_spec.startswith("**"):
+            # Explicit multi-segment: {**name}
+            var_name = var_spec[2:] or "path"
+            regex_part = "(?P<{}>.+)".format(re.escape(var_name))
+        elif var_spec.startswith("*"):
+            # Explicit single-segment: {*name}
+            var_name = var_spec[1:] or "name"
+            regex_part = "(?P<{}>[^/]+)".format(re.escape(var_name))
+        elif var_spec == "path":
+            # Conventional multi-segment
+            var_name = "path"
+            regex_part = "(?P<path>.+)"
+        else:
+            # Default single-segment (including custom names)
+            var_name = var_spec
+            regex_part = "(?P<{}>[^/]+)".format(re.escape(var_name))
+
+        result.append(regex_part)
+        var_names.append(var_name)
+        pos = brace_end + 1
+
+    return "^" + "".join(result) + "$", var_names
+
+
+def match_pattern(pattern: str, filepath: str) -> MatchResult:
+    """
+    Match a filepath against a pattern, extracting variables.
+
+    Args:
+        pattern: Pattern with {var} placeholders
+        filepath: File path to match
+
+    Returns:
+        MatchResult with matched=True and captured variables, or matched=False
+    """
+    # Normalize path separators
+    filepath = filepath.replace("\\", "/")
+
+    try:
+        regex, _ = pattern_to_regex(pattern)
+    except PatternError:
+        return MatchResult.no_match()
+
+    match = re.fullmatch(regex, filepath)
+    if match:
+        return MatchResult.match(match.groupdict())
+    return MatchResult.no_match()
+
+
+def resolve_pattern(pattern: str, variables: dict[str, str]) -> str:
+    """
+    Substitute variables into a pattern to generate a filepath.
+
+    Args:
+        pattern: Pattern with {var} placeholders
+        variables: Dict of variable name -> value
+
+    Returns:
+        Resolved filepath string
+    """
+    result = pattern
+    for name, value in variables.items():
+        # Handle both {name} and {*name} / {**name} forms
+        result = result.replace(f"{{{name}}}", value)
+        result = result.replace(f"{{*{name}}}", value)
+        result = result.replace(f"{{**{name}}}", value)
+    return result
+
+
+def matches_glob(file_path: str, pattern: str) -> bool:
+    """
+    Match a file path against a glob pattern, supporting ** for recursive matching.
+
+    This is for simple glob patterns without variable capture.
+
+    Args:
+        file_path: File path to check
+        pattern: Glob pattern (supports *, **, ?)
+
+    Returns:
+        True if matches
+    """
+    # Normalize path separators
+    file_path = file_path.replace("\\", "/")
+    pattern = pattern.replace("\\", "/")
+
+    # Handle ** patterns (recursive directory matching)
+    if "**" in pattern:
+        # Split pattern by **
+        parts = pattern.split("**")
+
+        if len(parts) == 2:
+            prefix, suffix = parts[0], parts[1]
+
+            # Remove leading/trailing slashes from suffix
+            suffix = suffix.lstrip("/")
+
+            # Check if prefix matches the start of the path
+            if prefix:
+                prefix = prefix.rstrip("/")
+                if not file_path.startswith(prefix + "/") and file_path != prefix:
+                    return False
+                # Get the remaining path after prefix
+                remaining = file_path[len(prefix) :].lstrip("/")
+            else:
+                remaining = file_path
+
+            # If no suffix, any remaining path matches
+            if not suffix:
+                return True
+
+            # Check if suffix matches the end of any remaining path segment
+            remaining_parts = remaining.split("/")
+            for i in range(len(remaining_parts)):
+                test_path = "/".join(remaining_parts[i:])
+                if fnmatch(test_path, suffix):
+                    return True
+                # Also try just the filename
+                if fnmatch(remaining_parts[-1], suffix):
+                    return True
+
+            return False
+
+    # Simple pattern without **
+    return fnmatch(file_path, pattern)
+
+
+def matches_any_pattern(file_path: str, patterns: list[str]) -> bool:
+    """
+    Check if a file path matches any of the given glob patterns.
+
+    Args:
+        file_path: File path to check (relative path)
+        patterns: List of glob patterns to match against
+
+    Returns:
+        True if the file matches any pattern
+    """
+    for pattern in patterns:
+        if matches_glob(file_path, pattern):
+            return True
+    return False
+
+
+def has_variables(pattern: str) -> bool:
+    """Check if a pattern contains variable placeholders."""
+    return "{" in pattern and "}" in pattern
diff --git a/src/deepwork/core/policy_parser.py b/src/deepwork/core/policy_parser.py
index b6ade99..f1c5a28 100644
--- a/src/deepwork/core/policy_parser.py
+++ b/src/deepwork/core/policy_parser.py
@@ -1,13 +1,19 @@
-"""Policy definition parser."""
+"""Policy definition parser (v2 - frontmatter markdown format)."""
 
 from dataclasses import dataclass, field
-from fnmatch import fnmatch
+from enum import Enum
 from pathlib import Path
 from typing import Any
 
 import yaml
 
-from deepwork.schemas.policy_schema import POLICY_SCHEMA
+from deepwork.core.pattern_matcher import (
+    has_variables,
+    match_pattern,
+    matches_any_pattern,
+    resolve_pattern,
+)
+from deepwork.schemas.policy_schema import POLICY_FRONTMATTER_SCHEMA, POLICY_SCHEMA
 from deepwork.utils.validation import ValidationError, validate_against_schema
 
 
@@ -17,175 +23,309 @@ class PolicyParseError(Exception):
     pass
 
 
+class DetectionMode(Enum):
+    """How the policy detects when to fire."""
+
+    TRIGGER_SAFETY = "trigger_safety"  # Fire when trigger matches, safety doesn't
+    SET = "set"  # Bidirectional file correspondence
+    PAIR = "pair"  # Directional file correspondence
+
+
+class ActionType(Enum):
+    """What happens when the policy fires."""
+
+    PROMPT = "prompt"  # Show instructions to agent (default)
+    COMMAND = "command"  # Run an idempotent command
+
+
 # Valid compare_to values
 COMPARE_TO_VALUES = frozenset({"base", "default_tip", "prompt"})
 DEFAULT_COMPARE_TO = "base"
 
 
+@dataclass
+class CommandAction:
+    """Configuration for command action."""
+
+    command: str  # Command template (supports {file}, {files}, {repo_root})
+    run_for: str = "each_match"  # "each_match" or "all_matches"
+
+
+@dataclass
+class PairConfig:
+    """Configuration for pair detection mode."""
+
+    trigger: str  # Pattern that triggers
+    expects: list[str]  # Patterns for expected corresponding files
+
+
 @dataclass
 class Policy:
-    """Represents a single policy definition."""
+    """Represents a single policy definition (v2 format)."""
 
-    name: str
-    triggers: list[str]  # Normalized to list
-    safety: list[str] = field(default_factory=list)  # Normalized to list, empty if not specified
-    instructions: str = ""  # Resolved content (either inline or from file)
-    compare_to: str = DEFAULT_COMPARE_TO  # What to compare against: base, default_tip, or prompt
+    # Identity
+    name: str  # Human-friendly name (displayed in promise tags)
+    filename: str  # Filename without .md extension (used for queue)
+
+    # Detection mode (exactly one must be set)
+    detection_mode: DetectionMode
+    triggers: list[str] = field(default_factory=list)  # For TRIGGER_SAFETY mode
+    safety: list[str] = field(default_factory=list)  # For TRIGGER_SAFETY mode
+    set_patterns: list[str] = field(default_factory=list)  # For SET mode
+    pair_config: PairConfig | None = None  # For PAIR mode
+
+    # Action type
+    action_type: ActionType = ActionType.PROMPT
+    instructions: str = ""  # For PROMPT action (markdown body)
+    command_action: CommandAction | None = None  # For COMMAND action
+
+    # Common options
+    compare_to: str = DEFAULT_COMPARE_TO
 
     @classmethod
-    def from_dict(cls, data: dict[str, Any], base_dir: Path | None = None) -> "Policy":
+    def from_frontmatter(
+        cls,
+        frontmatter: dict[str, Any],
+        markdown_body: str,
+        filename: str,
+    ) -> "Policy":
         """
-        Create Policy from dictionary.
+        Create Policy from parsed frontmatter and markdown body.
 
         Args:
-            data: Parsed YAML data for a single policy
-            base_dir: Base directory for resolving instructions_file paths
+            frontmatter: Parsed YAML frontmatter
+            markdown_body: Markdown content after frontmatter
+            filename: Filename without .md extension
 
         Returns:
             Policy instance
 
         Raises:
-            PolicyParseError: If instructions cannot be resolved
+            PolicyParseError: If validation fails
         """
-        # Normalize trigger to list
-        trigger = data["trigger"]
-        triggers = [trigger] if isinstance(trigger, str) else list(trigger)
-
-        # Normalize safety to list (empty if not present)
-        safety_data = data.get("safety", [])
-        safety = [safety_data] if isinstance(safety_data, str) else list(safety_data)
+        # Get name (required)
+        name = frontmatter.get("name", "")
+        if not name:
+            raise PolicyParseError(f"Policy '{filename}' missing required 'name' field")
+
+        # Determine detection mode
+        has_trigger = "trigger" in frontmatter
+        has_set = "set" in frontmatter
+        has_pair = "pair" in frontmatter
+
+        mode_count = sum([has_trigger, has_set, has_pair])
+        if mode_count == 0:
+            raise PolicyParseError(
+                f"Policy '{name}' must have 'trigger', 'set', or 'pair'"
+            )
+        if mode_count > 1:
+            raise PolicyParseError(
+                f"Policy '{name}' has multiple detection modes - use only one"
+            )
 
-        # Resolve instructions
-        if "instructions" in data:
-            instructions = data["instructions"]
-        elif "instructions_file" in data:
-            if base_dir is None:
+        # Parse based on detection mode
+        detection_mode: DetectionMode
+        triggers: list[str] = []
+        safety: list[str] = []
+        set_patterns: list[str] = []
+        pair_config: PairConfig | None = None
+
+        if has_trigger:
+            detection_mode = DetectionMode.TRIGGER_SAFETY
+            trigger = frontmatter["trigger"]
+            triggers = [trigger] if isinstance(trigger, str) else list(trigger)
+            safety_data = frontmatter.get("safety", [])
+            safety = [safety_data] if isinstance(safety_data, str) else list(safety_data)
+
+        elif has_set:
+            detection_mode = DetectionMode.SET
+            set_patterns = list(frontmatter["set"])
+            if len(set_patterns) < 2:
                 raise PolicyParseError(
-                    f"Policy '{data['name']}' uses instructions_file but no base_dir provided"
+                    f"Policy '{name}' set requires at least 2 patterns"
                 )
-            instructions_path = base_dir / data["instructions_file"]
-            if not instructions_path.exists():
+
+        elif has_pair:
+            detection_mode = DetectionMode.PAIR
+            pair_data = frontmatter["pair"]
+            expects = pair_data["expects"]
+            expects_list = [expects] if isinstance(expects, str) else list(expects)
+            pair_config = PairConfig(
+                trigger=pair_data["trigger"],
+                expects=expects_list,
+            )
+
+        # Determine action type
+        action_type: ActionType
+        command_action: CommandAction | None = None
+
+        if "action" in frontmatter:
+            action_type = ActionType.COMMAND
+            action_data = frontmatter["action"]
+            command_action = CommandAction(
+                command=action_data["command"],
+                run_for=action_data.get("run_for", "each_match"),
+            )
+        else:
+            action_type = ActionType.PROMPT
+            # Markdown body is the instructions
+            if not markdown_body.strip():
                 raise PolicyParseError(
-                    f"Policy '{data['name']}' instructions file not found: {instructions_path}"
+                    f"Policy '{name}' with prompt action requires markdown body"
                 )
-            try:
-                instructions = instructions_path.read_text()
-            except Exception as e:
-                raise PolicyParseError(
-                    f"Policy '{data['name']}' failed to read instructions file: {e}"
-                ) from e
-        else:
-            # Schema should catch this, but be defensive
-            raise PolicyParseError(
-                f"Policy '{data['name']}' must have either 'instructions' or 'instructions_file'"
-            )
 
-        # Get compare_to (defaults to DEFAULT_COMPARE_TO)
-        compare_to = data.get("compare_to", DEFAULT_COMPARE_TO)
+        # Get compare_to
+        compare_to = frontmatter.get("compare_to", DEFAULT_COMPARE_TO)
 
         return cls(
-            name=data["name"],
+            name=name,
+            filename=filename,
+            detection_mode=detection_mode,
             triggers=triggers,
             safety=safety,
-            instructions=instructions,
+            set_patterns=set_patterns,
+            pair_config=pair_config,
+            action_type=action_type,
+            instructions=markdown_body.strip(),
+            command_action=command_action,
             compare_to=compare_to,
         )
 
 
-def matches_pattern(file_path: str, patterns: list[str]) -> bool:
+def parse_frontmatter_file(filepath: Path) -> tuple[dict[str, Any], str]:
     """
-    Check if a file path matches any of the given glob patterns.
+    Parse a markdown file with YAML frontmatter.
 
     Args:
-        file_path: File path to check (relative path)
-        patterns: List of glob patterns to match against
+        filepath: Path to .md file
 
     Returns:
-        True if the file matches any pattern
+        Tuple of (frontmatter_dict, markdown_body)
+
+    Raises:
+        PolicyParseError: If parsing fails
     """
-    for pattern in patterns:
-        if _matches_glob(file_path, pattern):
-            return True
-    return False
+    try:
+        content = filepath.read_text(encoding="utf-8")
+    except OSError as e:
+        raise PolicyParseError(f"Failed to read policy file: {e}") from e
+
+    # Split frontmatter from body
+    if not content.startswith("---"):
+        raise PolicyParseError(
+            f"Policy file '{filepath.name}' must start with '---' frontmatter delimiter"
+        )
+
+    # Find end of frontmatter
+    end_marker = content.find("\n---", 3)
+    if end_marker == -1:
+        raise PolicyParseError(
+            f"Policy file '{filepath.name}' missing closing '---' frontmatter delimiter"
+        )
+
+    frontmatter_str = content[4:end_marker]  # Skip initial "---\n"
+    markdown_body = content[end_marker + 4 :]  # Skip "\n---\n" or "\n---"
+
+    # Parse YAML frontmatter
+    try:
+        frontmatter = yaml.safe_load(frontmatter_str)
+    except yaml.YAMLError as e:
+        raise PolicyParseError(
+            f"Invalid YAML frontmatter in '{filepath.name}': {e}"
+        ) from e
 
+    if frontmatter is None:
+        frontmatter = {}
 
-def _matches_glob(file_path: str, pattern: str) -> bool:
+    if not isinstance(frontmatter, dict):
+        raise PolicyParseError(
+            f"Frontmatter in '{filepath.name}' must be a mapping, got {type(frontmatter).__name__}"
+        )
+
+    return frontmatter, markdown_body
+
+
+def parse_policy_file_v2(filepath: Path) -> Policy:
     """
-    Match a file path against a glob pattern, supporting ** for recursive matching.
+    Parse a single policy from a frontmatter markdown file.
 
     Args:
-        file_path: File path to check
-        pattern: Glob pattern (supports *, **, ?)
+        filepath: Path to .md file in .deepwork/policies/
 
     Returns:
-        True if matches
-    """
-    # Normalize path separators
-    file_path = file_path.replace("\\", "/")
-    pattern = pattern.replace("\\", "/")
-
-    # Handle ** patterns (recursive directory matching)
-    if "**" in pattern:
-        # Split pattern by **
-        parts = pattern.split("**")
-
-        if len(parts) == 2:
-            prefix, suffix = parts[0], parts[1]
-
-            # Remove leading/trailing slashes from suffix
-            suffix = suffix.lstrip("/")
-
-            # Check if prefix matches the start of the path
-            if prefix:
-                prefix = prefix.rstrip("/")
-                if not file_path.startswith(prefix + "/") and file_path != prefix:
-                    return False
-                # Get the remaining path after prefix
-                remaining = file_path[len(prefix) :].lstrip("/")
-            else:
-                remaining = file_path
-
-            # If no suffix, any remaining path matches
-            if not suffix:
-                return True
-
-            # Check if suffix matches the end of any remaining path segment
-            # For pattern "src/**/*.py", suffix is "*.py"
-            # We need to match *.py against the filename portion
-            remaining_parts = remaining.split("/")
-            for i in range(len(remaining_parts)):
-                test_path = "/".join(remaining_parts[i:])
-                if fnmatch(test_path, suffix):
-                    return True
-                # Also try just the filename
-                if fnmatch(remaining_parts[-1], suffix):
-                    return True
-
-            return False
-
-    # Simple pattern without **
-    return fnmatch(file_path, pattern)
-
-
-def evaluate_policy(policy: Policy, changed_files: list[str]) -> bool:
+        Parsed Policy object
+
+    Raises:
+        PolicyParseError: If parsing or validation fails
     """
-    Evaluate whether a policy should fire based on changed files.
+    if not filepath.exists():
+        raise PolicyParseError(f"Policy file does not exist: {filepath}")
 
-    A policy fires if:
-    - At least one changed file matches a trigger pattern
-    - AND no changed file matches a safety pattern
+    if not filepath.is_file():
+        raise PolicyParseError(f"Policy path is not a file: {filepath}")
+
+    frontmatter, markdown_body = parse_frontmatter_file(filepath)
+
+    # Validate against schema
+    try:
+        validate_against_schema(frontmatter, POLICY_FRONTMATTER_SCHEMA)
+    except ValidationError as e:
+        raise PolicyParseError(
+            f"Policy '{filepath.name}' validation failed: {e}"
+        ) from e
+
+    # Create Policy object
+    filename = filepath.stem  # filename without .md extension
+    return Policy.from_frontmatter(frontmatter, markdown_body, filename)
+
+
+def load_policies_from_directory(policies_dir: Path) -> list[Policy]:
+    """
+    Load all policies from a directory.
 
     Args:
-        policy: Policy to evaluate
-        changed_files: List of changed file paths (relative)
+        policies_dir: Path to .deepwork/policies/ directory
 
     Returns:
-        True if the policy should fire
+        List of parsed Policy objects (sorted by filename)
+
+    Raises:
+        PolicyParseError: If any policy file fails to parse
+    """
+    if not policies_dir.exists():
+        return []
+
+    if not policies_dir.is_dir():
+        raise PolicyParseError(f"Policies path is not a directory: {policies_dir}")
+
+    policies = []
+    for filepath in sorted(policies_dir.glob("*.md")):
+        policy = parse_policy_file_v2(filepath)
+        policies.append(policy)
+
+    return policies
+
+
+# =============================================================================
+# Evaluation Logic
+# =============================================================================
+
+
+def evaluate_trigger_safety(
+    policy: Policy,
+    changed_files: list[str],
+) -> bool:
+    """
+    Evaluate a trigger/safety mode policy.
+
+    Returns True if policy should fire:
+    - At least one changed file matches a trigger pattern
+    - AND no changed file matches a safety pattern
     """
     # Check if any trigger matches
     trigger_matched = False
     for file_path in changed_files:
-        if matches_pattern(file_path, policy.triggers):
+        if matches_any_pattern(file_path, policy.triggers):
             trigger_matched = True
             break
 
@@ -195,18 +335,165 @@ def evaluate_policy(policy: Policy, changed_files: list[str]) -> bool:
     # Check if any safety pattern matches
     if policy.safety:
         for file_path in changed_files:
-            if matches_pattern(file_path, policy.safety):
-                # Safety file was also changed, don't fire
+            if matches_any_pattern(file_path, policy.safety):
                 return False
 
     return True
 
 
+def evaluate_set_correspondence(
+    policy: Policy,
+    changed_files: list[str],
+) -> tuple[bool, list[str], list[str]]:
+    """
+    Evaluate a set (bidirectional correspondence) policy.
+
+    Returns:
+        Tuple of (should_fire, trigger_files, missing_files)
+        - should_fire: True if correspondence is incomplete
+        - trigger_files: Files that triggered (matched a pattern)
+        - missing_files: Expected files that didn't change
+    """
+    trigger_files: list[str] = []
+    missing_files: list[str] = []
+    changed_set = set(changed_files)
+
+    for file_path in changed_files:
+        # Check each pattern in the set
+        for pattern in policy.set_patterns:
+            result = match_pattern(pattern, file_path)
+            if result.matched:
+                trigger_files.append(file_path)
+
+                # Check if all other corresponding files also changed
+                for other_pattern in policy.set_patterns:
+                    if other_pattern == pattern:
+                        continue
+
+                    if has_variables(other_pattern):
+                        expected = resolve_pattern(other_pattern, result.variables)
+                    else:
+                        expected = other_pattern
+
+                    if expected not in changed_set:
+                        if expected not in missing_files:
+                            missing_files.append(expected)
+
+                break  # Only match one pattern per file
+
+    # Policy fires if there are trigger files with missing correspondences
+    should_fire = len(trigger_files) > 0 and len(missing_files) > 0
+    return should_fire, trigger_files, missing_files
+
+
+def evaluate_pair_correspondence(
+    policy: Policy,
+    changed_files: list[str],
+) -> tuple[bool, list[str], list[str]]:
+    """
+    Evaluate a pair (directional correspondence) policy.
+
+    Only trigger-side changes require corresponding expected files.
+    Expected-side changes alone do not trigger.
+
+    Returns:
+        Tuple of (should_fire, trigger_files, missing_files)
+    """
+    if policy.pair_config is None:
+        return False, [], []
+
+    trigger_files: list[str] = []
+    missing_files: list[str] = []
+    changed_set = set(changed_files)
+
+    trigger_pattern = policy.pair_config.trigger
+    expects_patterns = policy.pair_config.expects
+
+    for file_path in changed_files:
+        # Only check trigger pattern (directional)
+        result = match_pattern(trigger_pattern, file_path)
+        if result.matched:
+            trigger_files.append(file_path)
+
+            # Check if all expected files also changed
+            for expects_pattern in expects_patterns:
+                if has_variables(expects_pattern):
+                    expected = resolve_pattern(expects_pattern, result.variables)
+                else:
+                    expected = expects_pattern
+
+                if expected not in changed_set:
+                    if expected not in missing_files:
+                        missing_files.append(expected)
+
+    should_fire = len(trigger_files) > 0 and len(missing_files) > 0
+    return should_fire, trigger_files, missing_files
+
+
+@dataclass
+class PolicyEvaluationResult:
+    """Result of evaluating a single policy."""
+
+    policy: Policy
+    should_fire: bool
+    trigger_files: list[str] = field(default_factory=list)
+    missing_files: list[str] = field(default_factory=list)  # For set/pair modes
+
+
+def evaluate_policy(policy: Policy, changed_files: list[str]) -> PolicyEvaluationResult:
+    """
+    Evaluate whether a policy should fire based on changed files.
+
+    Args:
+        policy: Policy to evaluate
+        changed_files: List of changed file paths (relative)
+
+    Returns:
+        PolicyEvaluationResult with evaluation details
+    """
+    if policy.detection_mode == DetectionMode.TRIGGER_SAFETY:
+        should_fire = evaluate_trigger_safety(policy, changed_files)
+        trigger_files = (
+            [f for f in changed_files if matches_any_pattern(f, policy.triggers)]
+            if should_fire
+            else []
+        )
+        return PolicyEvaluationResult(
+            policy=policy,
+            should_fire=should_fire,
+            trigger_files=trigger_files,
+        )
+
+    elif policy.detection_mode == DetectionMode.SET:
+        should_fire, trigger_files, missing_files = evaluate_set_correspondence(
+            policy, changed_files
+        )
+        return PolicyEvaluationResult(
+            policy=policy,
+            should_fire=should_fire,
+            trigger_files=trigger_files,
+            missing_files=missing_files,
+        )
+
+    elif policy.detection_mode == DetectionMode.PAIR:
+        should_fire, trigger_files, missing_files = evaluate_pair_correspondence(
+            policy, changed_files
+        )
+        return PolicyEvaluationResult(
+            policy=policy,
+            should_fire=should_fire,
+            trigger_files=trigger_files,
+            missing_files=missing_files,
+        )
+
+    return PolicyEvaluationResult(policy=policy, should_fire=False)
+
+
 def evaluate_policies(
     policies: list[Policy],
     changed_files: list[str],
     promised_policies: set[str] | None = None,
-) -> list[Policy]:
+) -> list[PolicyEvaluationResult]:
     """
     Evaluate which policies should fire.
 
@@ -214,40 +501,91 @@ def evaluate_policies(
         policies: List of policies to evaluate
         changed_files: List of changed file paths (relative)
         promised_policies: Set of policy names that have been marked as addressed
-                          via <promise> tags (these are skipped)
+                          via <promise> tags (case-insensitive)
 
     Returns:
-        List of policies that should fire (trigger matches, no safety match, not promised)
+        List of PolicyEvaluationResult for policies that should fire
     """
     if promised_policies is None:
         promised_policies = set()
 
-    fired_policies = []
+    # Normalize promised names for case-insensitive comparison
+    promised_lower = {name.lower() for name in promised_policies}
+
+    results = []
     for policy in policies:
-        # Skip if already promised/addressed
-        if policy.name in promised_policies:
+        # Skip if already promised/addressed (case-insensitive)
+        if policy.name.lower() in promised_lower:
             continue
 
-        if evaluate_policy(policy, changed_files):
-            fired_policies.append(policy)
+        result = evaluate_policy(policy, changed_files)
+        if result.should_fire:
+            results.append(result)
+
+    return results
 
-    return fired_policies
 
+# =============================================================================
+# Legacy v1 Support (for migration)
+# =============================================================================
+
+
+@dataclass
+class PolicyV1:
+    """Legacy v1 policy format (from .deepwork.policy.yml)."""
 
-def parse_policy_file(policy_path: Path | str, base_dir: Path | None = None) -> list[Policy]:
+    name: str
+    triggers: list[str]
+    safety: list[str] = field(default_factory=list)
+    instructions: str = ""
+    compare_to: str = DEFAULT_COMPARE_TO
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any], base_dir: Path | None = None) -> "PolicyV1":
+        """Create PolicyV1 from dictionary (legacy format)."""
+        trigger = data["trigger"]
+        triggers = [trigger] if isinstance(trigger, str) else list(trigger)
+
+        safety_data = data.get("safety", [])
+        safety = [safety_data] if isinstance(safety_data, str) else list(safety_data)
+
+        if "instructions" in data:
+            instructions = data["instructions"]
+        elif "instructions_file" in data:
+            if base_dir is None:
+                raise PolicyParseError(
+                    f"Policy '{data['name']}' uses instructions_file but no base_dir provided"
+                )
+            instructions_path = base_dir / data["instructions_file"]
+            if not instructions_path.exists():
+                raise PolicyParseError(
+                    f"Policy '{data['name']}' instructions file not found: {instructions_path}"
+                )
+            instructions = instructions_path.read_text()
+        else:
+            raise PolicyParseError(
+                f"Policy '{data['name']}' must have 'instructions' or 'instructions_file'"
+            )
+
+        return cls(
+            name=data["name"],
+            triggers=triggers,
+            safety=safety,
+            instructions=instructions,
+            compare_to=data.get("compare_to", DEFAULT_COMPARE_TO),
+        )
+
+
+def parse_policy_file(policy_path: Path | str, base_dir: Path | None = None) -> list[PolicyV1]:
     """
-    Parse policy definitions from a YAML file.
+    Parse policy definitions from a YAML file (legacy v1 format).
 
     Args:
         policy_path: Path to .deepwork.policy.yml file
-        base_dir: Base directory for resolving instructions_file paths.
-                  Defaults to the directory containing the policy file.
+        base_dir: Base directory for resolving instructions_file paths
 
     Returns:
-        List of parsed Policy objects
-
-    Raises:
-        PolicyParseError: If parsing fails or validation errors occur
+        List of parsed PolicyV1 objects
     """
     policy_path = Path(policy_path)
 
@@ -257,11 +595,9 @@ def parse_policy_file(policy_path: Path | str, base_dir: Path | None = None) ->
     if not policy_path.is_file():
         raise PolicyParseError(f"Policy path is not a file: {policy_path}")
 
-    # Default base_dir to policy file's directory
     if base_dir is None:
         base_dir = policy_path.parent
 
-    # Load YAML (policies are stored as a list, not a dict)
     try:
         with open(policy_path, encoding="utf-8") as f:
             policy_data = yaml.safe_load(f)
@@ -270,26 +606,22 @@ def parse_policy_file(policy_path: Path | str, base_dir: Path | None = None) ->
     except OSError as e:
         raise PolicyParseError(f"Failed to read policy file: {e}") from e
 
-    # Handle empty file or null content
     if policy_data is None:
         return []
 
-    # Validate it's a list (schema expects array)
     if not isinstance(policy_data, list):
         raise PolicyParseError(
             f"Policy file must contain a list of policies, got {type(policy_data).__name__}"
         )
 
-    # Validate against schema
     try:
         validate_against_schema(policy_data, POLICY_SCHEMA)
     except ValidationError as e:
         raise PolicyParseError(f"Policy definition validation failed: {e}") from e
 
-    # Parse into dataclasses
     policies = []
     for policy_item in policy_data:
-        policy = Policy.from_dict(policy_item, base_dir)
+        policy = PolicyV1.from_dict(policy_item, base_dir)
         policies.append(policy)
 
     return policies
diff --git a/src/deepwork/core/policy_queue.py b/src/deepwork/core/policy_queue.py
new file mode 100644
index 0000000..4404683
--- /dev/null
+++ b/src/deepwork/core/policy_queue.py
@@ -0,0 +1,321 @@
+"""Queue system for tracking policy state in .deepwork/tmp/policy/queue/."""
+
+import hashlib
+import json
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from pathlib import Path
+from typing import Any
+
+
+class QueueEntryStatus(Enum):
+    """Status of a queue entry."""
+
+    QUEUED = "queued"  # Detected, awaiting evaluation
+    PASSED = "passed"  # Evaluated, policy satisfied (promise found or action succeeded)
+    FAILED = "failed"  # Evaluated, policy not satisfied
+    SKIPPED = "skipped"  # Safety pattern matched, skipped
+
+
+@dataclass
+class ActionResult:
+    """Result of executing a policy action."""
+
+    type: str  # "prompt" or "command"
+    output: str | None = None  # Command stdout or prompt message shown
+    exit_code: int | None = None  # Command exit code (None for prompt)
+
+
+@dataclass
+class QueueEntry:
+    """A single entry in the policy queue."""
+
+    # Identity
+    policy_name: str  # Human-friendly name
+    policy_file: str  # Filename (e.g., "source-test-pairing.md")
+    trigger_hash: str  # Hash for deduplication
+
+    # State
+    status: QueueEntryStatus = QueueEntryStatus.QUEUED
+    created_at: str = ""  # ISO8601 timestamp
+    evaluated_at: str | None = None  # ISO8601 timestamp
+
+    # Context
+    baseline_ref: str = ""  # Commit hash or timestamp used as baseline
+    trigger_files: list[str] = field(default_factory=list)
+    expected_files: list[str] = field(default_factory=list)  # For set/pair modes
+    matched_files: list[str] = field(default_factory=list)  # Files that also changed
+
+    # Result
+    action_result: ActionResult | None = None
+
+    def __post_init__(self) -> None:
+        if not self.created_at:
+            self.created_at = datetime.now(timezone.utc).isoformat()
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        data = asdict(self)
+        data["status"] = self.status.value
+        if self.action_result:
+            data["action_result"] = asdict(self.action_result)
+        return data
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "QueueEntry":
+        """Create from dictionary."""
+        action_result = None
+        if data.get("action_result"):
+            action_result = ActionResult(**data["action_result"])
+
+        return cls(
+            policy_name=data["policy_name"],
+            policy_file=data["policy_file"],
+            trigger_hash=data["trigger_hash"],
+            status=QueueEntryStatus(data["status"]),
+            created_at=data.get("created_at", ""),
+            evaluated_at=data.get("evaluated_at"),
+            baseline_ref=data.get("baseline_ref", ""),
+            trigger_files=data.get("trigger_files", []),
+            expected_files=data.get("expected_files", []),
+            matched_files=data.get("matched_files", []),
+            action_result=action_result,
+        )
+
+
+def compute_trigger_hash(
+    policy_name: str,
+    trigger_files: list[str],
+    baseline_ref: str,
+) -> str:
+    """
+    Compute a hash for deduplication.
+
+    The hash is based on:
+    - Policy name
+    - Sorted list of trigger files
+    - Baseline reference (commit hash or timestamp)
+
+    Returns:
+        12-character hex hash
+    """
+    hash_input = f"{policy_name}:{sorted(trigger_files)}:{baseline_ref}"
+    return hashlib.sha256(hash_input.encode()).hexdigest()[:12]
+
+
+class PolicyQueue:
+    """
+    Manages the policy queue in .deepwork/tmp/policy/queue/.
+
+    Queue entries are stored as JSON files named {hash}.{status}.json
+    """
+
+    def __init__(self, queue_dir: Path | None = None):
+        """
+        Initialize the queue.
+
+        Args:
+            queue_dir: Path to queue directory. Defaults to .deepwork/tmp/policy/queue/
+        """
+        if queue_dir is None:
+            queue_dir = Path(".deepwork/tmp/policy/queue")
+        self.queue_dir = queue_dir
+
+    def _ensure_dir(self) -> None:
+        """Ensure queue directory exists."""
+        self.queue_dir.mkdir(parents=True, exist_ok=True)
+
+    def _get_entry_path(self, trigger_hash: str, status: QueueEntryStatus) -> Path:
+        """Get path for an entry file."""
+        return self.queue_dir / f"{trigger_hash}.{status.value}.json"
+
+    def _find_entry_path(self, trigger_hash: str) -> Path | None:
+        """Find existing entry file for a hash (any status)."""
+        for status in QueueEntryStatus:
+            path = self._get_entry_path(trigger_hash, status)
+            if path.exists():
+                return path
+        return None
+
+    def has_entry(self, trigger_hash: str) -> bool:
+        """Check if an entry exists for this hash."""
+        return self._find_entry_path(trigger_hash) is not None
+
+    def get_entry(self, trigger_hash: str) -> QueueEntry | None:
+        """Get an entry by hash."""
+        path = self._find_entry_path(trigger_hash)
+        if path is None:
+            return None
+
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            return QueueEntry.from_dict(data)
+        except (json.JSONDecodeError, OSError, KeyError):
+            return None
+
+    def create_entry(
+        self,
+        policy_name: str,
+        policy_file: str,
+        trigger_files: list[str],
+        baseline_ref: str,
+        expected_files: list[str] | None = None,
+    ) -> QueueEntry | None:
+        """
+        Create a new queue entry if one doesn't already exist.
+
+        Args:
+            policy_name: Human-friendly policy name
+            policy_file: Policy filename (e.g., "source-test-pairing.md")
+            trigger_files: Files that triggered the policy
+            baseline_ref: Baseline reference for change detection
+            expected_files: Expected corresponding files (for set/pair)
+
+        Returns:
+            Created QueueEntry, or None if entry already exists
+        """
+        trigger_hash = compute_trigger_hash(policy_name, trigger_files, baseline_ref)
+
+        # Check if already exists
+        if self.has_entry(trigger_hash):
+            return None
+
+        self._ensure_dir()
+
+        entry = QueueEntry(
+            policy_name=policy_name,
+            policy_file=policy_file,
+            trigger_hash=trigger_hash,
+            status=QueueEntryStatus.QUEUED,
+            baseline_ref=baseline_ref,
+            trigger_files=trigger_files,
+            expected_files=expected_files or [],
+        )
+
+        path = self._get_entry_path(trigger_hash, QueueEntryStatus.QUEUED)
+        with open(path, "w", encoding="utf-8") as f:
+            json.dump(entry.to_dict(), f, indent=2)
+
+        return entry
+
+    def update_status(
+        self,
+        trigger_hash: str,
+        new_status: QueueEntryStatus,
+        action_result: ActionResult | None = None,
+    ) -> bool:
+        """
+        Update the status of an entry.
+
+        This renames the file to reflect the new status.
+
+        Args:
+            trigger_hash: Hash of the entry to update
+            new_status: New status
+            action_result: Optional result of action execution
+
+        Returns:
+            True if updated, False if entry not found
+        """
+        old_path = self._find_entry_path(trigger_hash)
+        if old_path is None:
+            return False
+
+        # Load existing entry
+        try:
+            with open(old_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+        except (json.JSONDecodeError, OSError):
+            return False
+
+        # Update fields
+        data["status"] = new_status.value
+        data["evaluated_at"] = datetime.now(timezone.utc).isoformat()
+        if action_result:
+            data["action_result"] = asdict(action_result)
+
+        # Write to new path
+        new_path = self._get_entry_path(trigger_hash, new_status)
+
+        # If status didn't change, just update in place
+        if old_path == new_path:
+            with open(new_path, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=2)
+        else:
+            # Write new file then delete old
+            with open(new_path, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=2)
+            old_path.unlink()
+
+        return True
+
+    def get_queued_entries(self) -> list[QueueEntry]:
+        """Get all entries with QUEUED status."""
+        if not self.queue_dir.exists():
+            return []
+
+        entries = []
+        for path in self.queue_dir.glob("*.queued.json"):
+            try:
+                with open(path, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                entries.append(QueueEntry.from_dict(data))
+            except (json.JSONDecodeError, OSError, KeyError):
+                continue
+
+        return entries
+
+    def get_all_entries(self) -> list[QueueEntry]:
+        """Get all entries regardless of status."""
+        if not self.queue_dir.exists():
+            return []
+
+        entries = []
+        for path in self.queue_dir.glob("*.json"):
+            try:
+                with open(path, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                entries.append(QueueEntry.from_dict(data))
+            except (json.JSONDecodeError, OSError, KeyError):
+                continue
+
+        return entries
+
+    def clear(self) -> int:
+        """
+        Clear all entries from the queue.
+
+        Returns:
+            Number of entries removed
+        """
+        if not self.queue_dir.exists():
+            return 0
+
+        count = 0
+        for path in self.queue_dir.glob("*.json"):
+            try:
+                path.unlink()
+                count += 1
+            except OSError:
+                continue
+
+        return count
+
+    def remove_entry(self, trigger_hash: str) -> bool:
+        """
+        Remove an entry by hash.
+
+        Returns:
+            True if removed, False if not found
+        """
+        path = self._find_entry_path(trigger_hash)
+        if path is None:
+            return False
+
+        try:
+            path.unlink()
+            return True
+        except OSError:
+            return False
diff --git a/src/deepwork/hooks/evaluate_policies.py b/src/deepwork/hooks/evaluate_policies.py
index 07ac384..3a2b05d 100644
--- a/src/deepwork/hooks/evaluate_policies.py
+++ b/src/deepwork/hooks/evaluate_policies.py
@@ -28,14 +28,48 @@
 import sys
 from pathlib import Path
 
+from deepwork.core.pattern_matcher import matches_any_pattern
 from deepwork.core.policy_parser import (
-    Policy,
     PolicyParseError,
-    evaluate_policy,
+    PolicyV1,
     parse_policy_file,
 )
 
 
+def evaluate_policy_v1(policy: PolicyV1, changed_files: list[str]) -> bool:
+    """
+    Evaluate whether a v1 policy should fire based on changed files.
+
+    A policy fires when:
+    - At least one changed file matches a trigger pattern
+    - AND no changed file matches a safety pattern
+
+    Args:
+        policy: PolicyV1 to evaluate
+        changed_files: List of changed file paths
+
+    Returns:
+        True if policy should fire, False otherwise
+    """
+    # Check if any trigger matches
+    trigger_matched = False
+    for file_path in changed_files:
+        if matches_any_pattern(file_path, policy.triggers):
+            trigger_matched = True
+            break
+
+    if not trigger_matched:
+        return False
+
+    # Check if any safety pattern matches
+    if policy.safety:
+        for file_path in changed_files:
+            if matches_any_pattern(file_path, policy.safety):
+                return False
+
+    return True
+
+
 def get_default_branch() -> str:
     """
     Get the default branch name (main or master).
@@ -334,7 +368,7 @@ def main() -> None:
         return
 
     # Group policies by compare_to mode to minimize git calls
-    policies_by_mode: dict[str, list[Policy]] = {}
+    policies_by_mode: dict[str, list[PolicyV1]] = {}
     for policy in policies:
         mode = policy.compare_to
         if mode not in policies_by_mode:
@@ -342,7 +376,7 @@ def main() -> None:
         policies_by_mode[mode].append(policy)
 
     # Get changed files for each mode and evaluate policies
-    fired_policies: list[Policy] = []
+    fired_policies: list[PolicyV1] = []
     for mode, mode_policies in policies_by_mode.items():
         changed_files = get_changed_files_for_mode(mode)
         if not changed_files:
@@ -353,7 +387,7 @@ def main() -> None:
             if policy.name in promised_policies:
                 continue
             # Evaluate this policy
-            if evaluate_policy(policy, changed_files):
+            if evaluate_policy_v1(policy, changed_files):
                 fired_policies.append(policy)
 
     if not fired_policies:
diff --git a/src/deepwork/hooks/policy_check.py b/src/deepwork/hooks/policy_check.py
index 287852b..4fb0914 100644
--- a/src/deepwork/hooks/policy_check.py
+++ b/src/deepwork/hooks/policy_check.py
@@ -1,9 +1,11 @@
 """
-Policy check hook for DeepWork.
+Policy check hook for DeepWork (v2).
 
 This hook evaluates policies when the agent finishes (after_agent event).
 It uses the wrapper system for cross-platform compatibility.
 
+Policy files are loaded from .deepwork/policies/ directory as frontmatter markdown files.
+
 Usage (via shell wrapper):
     claude_hook.sh deepwork.hooks.policy_check
     gemini_hook.sh deepwork.hooks.policy_check
@@ -21,11 +23,25 @@
 import sys
 from pathlib import Path
 
+from deepwork.core.command_executor import (
+    all_commands_succeeded,
+    format_command_errors,
+    run_command_action,
+)
 from deepwork.core.policy_parser import (
+    ActionType,
+    DetectionMode,
     Policy,
+    PolicyEvaluationResult,
     PolicyParseError,
-    evaluate_policy,
-    parse_policy_file,
+    evaluate_policies,
+    load_policies_from_directory,
+)
+from deepwork.core.policy_queue import (
+    ActionResult,
+    PolicyQueue,
+    QueueEntryStatus,
+    compute_trigger_hash,
 )
 from deepwork.hooks.wrapper import (
     HookInput,
@@ -63,6 +79,41 @@ def get_default_branch() -> str:
     return "main"
 
 
+def get_baseline_ref(mode: str) -> str:
+    """Get the baseline reference for a compare_to mode."""
+    if mode == "base":
+        try:
+            default_branch = get_default_branch()
+            result = subprocess.run(
+                ["git", "merge-base", "HEAD", f"origin/{default_branch}"],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            return result.stdout.strip()
+        except subprocess.CalledProcessError:
+            return "base"
+    elif mode == "default_tip":
+        try:
+            default_branch = get_default_branch()
+            result = subprocess.run(
+                ["git", "rev-parse", f"origin/{default_branch}"],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            return result.stdout.strip()
+        except subprocess.CalledProcessError:
+            return "default_tip"
+    elif mode == "prompt":
+        baseline_path = Path(".deepwork/.last_work_tree")
+        if baseline_path.exists():
+            # Use file modification time as reference
+            return str(int(baseline_path.stat().st_mtime))
+        return "prompt"
+    return mode
+
+
 def get_changed_files_base() -> list[str]:
     """Get files changed relative to branch base."""
     default_branch = get_default_branch()
@@ -188,8 +239,15 @@ def get_changed_files_for_mode(mode: str) -> list[str]:
 
 
 def extract_promise_tags(text: str) -> set[str]:
-    """Extract policy names from <promise> tags in text."""
-    pattern = r"<promise>✓\s*([^<]+)</promise>"
+    """
+    Extract policy names from <promise> tags in text.
+
+    Supports both:
+    - <promise>✓ Policy Name</promise>
+    - <promise>Policy Name</promise>
+    """
+    # Match with or without checkmark
+    pattern = r"<promise>(?:✓\s*)?([^<]+)</promise>"
     matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
     return {m.strip() for m in matches}
 
@@ -247,28 +305,52 @@ def extract_conversation_from_transcript(transcript_path: str, platform: Platfor
         return ""
 
 
-def format_policy_message(policies: list[Policy]) -> str:
-    """Format triggered policies into a message for the agent."""
+def format_policy_message(results: list[PolicyEvaluationResult]) -> str:
+    """
+    Format triggered policies into a concise message for the agent.
+
+    Groups policies by name and uses minimal formatting.
+    """
     lines = ["## DeepWork Policies Triggered", ""]
     lines.append(
         "Comply with the following policies. "
         "To mark a policy as addressed, include `<promise>✓ Policy Name</promise>` "
-        "in your response (replace Policy Name with the actual policy name)."
+        "in your response."
     )
     lines.append("")
 
-    for policy in policies:
-        lines.append(f"### Policy: {policy.name}")
-        lines.append("")
-        lines.append(policy.instructions.strip())
+    # Group results by policy name
+    by_name: dict[str, list[PolicyEvaluationResult]] = {}
+    for result in results:
+        name = result.policy.name
+        if name not in by_name:
+            by_name[name] = []
+        by_name[name].append(result)
+
+    for name, policy_results in by_name.items():
+        policy = policy_results[0].policy
+        lines.append(f"## {name}")
         lines.append("")
 
+        # For set/pair modes, show the correspondence violations concisely
+        if policy.detection_mode in (DetectionMode.SET, DetectionMode.PAIR):
+            for result in policy_results:
+                for trigger_file in result.trigger_files:
+                    for missing_file in result.missing_files:
+                        lines.append(f"{trigger_file} → {missing_file}")
+            lines.append("")
+
+        # Show instructions
+        if policy.instructions:
+            lines.append(policy.instructions.strip())
+            lines.append("")
+
     return "\n".join(lines)
 
 
 def policy_check_hook(hook_input: HookInput) -> HookOutput:
     """
-    Main hook logic for policy evaluation.
+    Main hook logic for policy evaluation (v2).
 
     This is called for after_agent events to check if policies need attention
     before allowing the agent to complete.
@@ -277,9 +359,9 @@ def policy_check_hook(hook_input: HookInput) -> HookOutput:
     if hook_input.event != NormalizedEvent.AFTER_AGENT:
         return HookOutput()
 
-    # Check if policy file exists
-    policy_path = Path(".deepwork.policy.yml")
-    if not policy_path.exists():
+    # Check if policies directory exists
+    policies_dir = Path(".deepwork/policies")
+    if not policies_dir.exists():
         return HookOutput()
 
     # Extract conversation context from transcript
@@ -287,19 +369,22 @@ def policy_check_hook(hook_input: HookInput) -> HookOutput:
         hook_input.transcript_path, hook_input.platform
     )
 
-    # Extract promise tags
+    # Extract promise tags (case-insensitive)
     promised_policies = extract_promise_tags(conversation_context)
 
-    # Parse policies
+    # Load policies
     try:
-        policies = parse_policy_file(policy_path)
+        policies = load_policies_from_directory(policies_dir)
     except PolicyParseError as e:
-        print(f"Error parsing policy file: {e}", file=sys.stderr)
+        print(f"Error loading policies: {e}", file=sys.stderr)
         return HookOutput()
 
     if not policies:
         return HookOutput()
 
+    # Initialize queue
+    queue = PolicyQueue()
+
     # Group policies by compare_to mode
     policies_by_mode: dict[str, list[Policy]] = {}
     for policy in policies:
@@ -308,25 +393,105 @@ def policy_check_hook(hook_input: HookInput) -> HookOutput:
             policies_by_mode[mode] = []
         policies_by_mode[mode].append(policy)
 
-    # Evaluate policies
-    fired_policies: list[Policy] = []
+    # Evaluate policies and collect results
+    prompt_results: list[PolicyEvaluationResult] = []
+    command_errors: list[str] = []
+
     for mode, mode_policies in policies_by_mode.items():
         changed_files = get_changed_files_for_mode(mode)
         if not changed_files:
             continue
 
-        for policy in mode_policies:
-            if policy.name in promised_policies:
-                continue
-            if evaluate_policy(policy, changed_files):
-                fired_policies.append(policy)
+        baseline_ref = get_baseline_ref(mode)
 
-    if not fired_policies:
-        return HookOutput()
+        # Evaluate which policies fire
+        results = evaluate_policies(mode_policies, changed_files, promised_policies)
+
+        for result in results:
+            policy = result.policy
+
+            # Compute trigger hash for queue deduplication
+            trigger_hash = compute_trigger_hash(
+                policy.name,
+                result.trigger_files,
+                baseline_ref,
+            )
+
+            # Check if already in queue (passed/skipped)
+            existing = queue.get_entry(trigger_hash)
+            if existing and existing.status in (
+                QueueEntryStatus.PASSED,
+                QueueEntryStatus.SKIPPED,
+            ):
+                continue
 
-    # Format message and return blocking response
-    message = format_policy_message(fired_policies)
-    return HookOutput(decision="block", reason=message)
+            # Create queue entry if new
+            if not existing:
+                queue.create_entry(
+                    policy_name=policy.name,
+                    policy_file=f"{policy.filename}.md",
+                    trigger_files=result.trigger_files,
+                    baseline_ref=baseline_ref,
+                    expected_files=result.missing_files,
+                )
+
+            # Handle based on action type
+            if policy.action_type == ActionType.COMMAND:
+                # Run command action
+                if policy.command_action:
+                    repo_root = Path.cwd()
+                    cmd_results = run_command_action(
+                        policy.command_action,
+                        result.trigger_files,
+                        repo_root,
+                    )
+
+                    if all_commands_succeeded(cmd_results):
+                        # Command succeeded, mark as passed
+                        queue.update_status(
+                            trigger_hash,
+                            QueueEntryStatus.PASSED,
+                            ActionResult(
+                                type="command",
+                                output=cmd_results[0].stdout if cmd_results else None,
+                                exit_code=0,
+                            ),
+                        )
+                    else:
+                        # Command failed
+                        error_msg = format_command_errors(cmd_results)
+                        command_errors.append(f"## {policy.name}\n{error_msg}")
+                        queue.update_status(
+                            trigger_hash,
+                            QueueEntryStatus.FAILED,
+                            ActionResult(
+                                type="command",
+                                output=error_msg,
+                                exit_code=cmd_results[0].exit_code if cmd_results else -1,
+                            ),
+                        )
+
+            elif policy.action_type == ActionType.PROMPT:
+                # Collect for prompt output
+                prompt_results.append(result)
+
+    # Build response
+    messages: list[str] = []
+
+    # Add command errors if any
+    if command_errors:
+        messages.append("## Command Policy Errors\n")
+        messages.extend(command_errors)
+        messages.append("")
+
+    # Add prompt policies if any
+    if prompt_results:
+        messages.append(format_policy_message(prompt_results))
+
+    if messages:
+        return HookOutput(decision="block", reason="\n".join(messages))
+
+    return HookOutput()
 
 
 def main() -> None:
diff --git a/src/deepwork/schemas/policy_schema.py b/src/deepwork/schemas/policy_schema.py
index 5aa6ae8..690cb64 100644
--- a/src/deepwork/schemas/policy_schema.py
+++ b/src/deepwork/schemas/policy_schema.py
@@ -1,10 +1,111 @@
-"""JSON Schema definition for policy definitions."""
+"""JSON Schema definition for policy definitions (v2 - frontmatter format)."""
 
 from typing import Any
 
-# JSON Schema for .deepwork.policy.yml files
-# Policies are defined as an array of policy objects
-POLICY_SCHEMA: dict[str, Any] = {
+# Pattern for string or array of strings
+STRING_OR_ARRAY: dict[str, Any] = {
+    "oneOf": [
+        {"type": "string", "minLength": 1},
+        {"type": "array", "items": {"type": "string", "minLength": 1}, "minItems": 1},
+    ]
+}
+
+# JSON Schema for policy frontmatter (YAML between --- delimiters)
+# Policies are stored as individual .md files in .deepwork/policies/
+POLICY_FRONTMATTER_SCHEMA: dict[str, Any] = {
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "type": "object",
+    "required": ["name"],
+    "properties": {
+        "name": {
+            "type": "string",
+            "minLength": 1,
+            "description": "Human-friendly name for the policy (displayed in promise tags)",
+        },
+        # Detection mode: trigger/safety (mutually exclusive with set/pair)
+        "trigger": {
+            **STRING_OR_ARRAY,
+            "description": "Glob pattern(s) for files that trigger this policy",
+        },
+        "safety": {
+            **STRING_OR_ARRAY,
+            "description": "Glob pattern(s) that suppress the policy if changed",
+        },
+        # Detection mode: set (bidirectional correspondence)
+        "set": {
+            "type": "array",
+            "items": {"type": "string", "minLength": 1},
+            "minItems": 2,
+            "description": "Patterns defining bidirectional file correspondence",
+        },
+        # Detection mode: pair (directional correspondence)
+        "pair": {
+            "type": "object",
+            "required": ["trigger", "expects"],
+            "properties": {
+                "trigger": {
+                    "type": "string",
+                    "minLength": 1,
+                    "description": "Pattern that triggers the policy",
+                },
+                "expects": {
+                    **STRING_OR_ARRAY,
+                    "description": "Pattern(s) for expected corresponding files",
+                },
+            },
+            "additionalProperties": False,
+            "description": "Directional file correspondence (trigger -> expects)",
+        },
+        # Action type: command (default is prompt using markdown body)
+        "action": {
+            "type": "object",
+            "required": ["command"],
+            "properties": {
+                "command": {
+                    "type": "string",
+                    "minLength": 1,
+                    "description": "Command to run (supports {file}, {files}, {repo_root})",
+                },
+                "run_for": {
+                    "type": "string",
+                    "enum": ["each_match", "all_matches"],
+                    "default": "each_match",
+                    "description": "Run command for each file or all files at once",
+                },
+            },
+            "additionalProperties": False,
+            "description": "Command action to run instead of prompting",
+        },
+        # Common options
+        "compare_to": {
+            "type": "string",
+            "enum": ["base", "default_tip", "prompt"],
+            "default": "base",
+            "description": "Baseline for detecting file changes",
+        },
+    },
+    "additionalProperties": False,
+    # Detection mode must be exactly one of: trigger, set, or pair
+    "oneOf": [
+        {
+            "required": ["trigger"],
+            "not": {"anyOf": [{"required": ["set"]}, {"required": ["pair"]}]},
+        },
+        {
+            "required": ["set"],
+            "not": {"anyOf": [{"required": ["trigger"]}, {"required": ["pair"]}]},
+        },
+        {
+            "required": ["pair"],
+            "not": {"anyOf": [{"required": ["trigger"]}, {"required": ["set"]}]},
+        },
+    ],
+}
+
+
+# Legacy schema for .deepwork.policy.yml (v1 format)
+# Kept for reference but not used in v2
+POLICY_SCHEMA_V1: dict[str, Any] = {
     "$schema": "http://json-schema.org/draft-07/schema#",
     "type": "array",
     "description": "List of policies that trigger based on file changes",
@@ -76,3 +177,6 @@
         "additionalProperties": False,
     },
 }
+
+# Alias for backwards compatibility
+POLICY_SCHEMA = POLICY_SCHEMA_V1
diff --git a/tests/unit/test_evaluate_policies.py b/tests/unit/test_evaluate_policies.py
index 03f1a26..c0abdce 100644
--- a/tests/unit/test_evaluate_policies.py
+++ b/tests/unit/test_evaluate_policies.py
@@ -1,6 +1,6 @@
 """Tests for the hooks evaluate_policies module."""
 
-from deepwork.core.policy_parser import Policy
+from deepwork.core.policy_parser import PolicyV1
 from deepwork.hooks.evaluate_policies import extract_promise_tags, format_policy_message
 
 
@@ -48,7 +48,7 @@ class TestFormatPolicyMessage:
     def test_formats_single_policy(self) -> None:
         """Test formatting a single policy."""
         policies = [
-            Policy(
+            PolicyV1(
                 name="Test Policy",
                 triggers=["src/*"],
                 safety=[],
@@ -65,13 +65,13 @@ def test_formats_single_policy(self) -> None:
     def test_formats_multiple_policies(self) -> None:
         """Test formatting multiple policies."""
         policies = [
-            Policy(
+            PolicyV1(
                 name="Policy 1",
                 triggers=["src/*"],
                 safety=[],
                 instructions="Do thing 1.",
             ),
-            Policy(
+            PolicyV1(
                 name="Policy 2",
                 triggers=["test/*"],
                 safety=[],
@@ -88,7 +88,7 @@ def test_formats_multiple_policies(self) -> None:
     def test_strips_instruction_whitespace(self) -> None:
         """Test that instruction whitespace is stripped."""
         policies = [
-            Policy(
+            PolicyV1(
                 name="Test",
                 triggers=["*"],
                 safety=[],
diff --git a/tests/unit/test_policy_parser.py b/tests/unit/test_policy_parser.py
index 80eedbb..24e537c 100644
--- a/tests/unit/test_policy_parser.py
+++ b/tests/unit/test_policy_parser.py
@@ -4,19 +4,21 @@
 
 import pytest
 
+from deepwork.core.pattern_matcher import matches_any_pattern as matches_pattern
 from deepwork.core.policy_parser import (
     DEFAULT_COMPARE_TO,
+    DetectionMode,
     Policy,
     PolicyParseError,
+    PolicyV1,
     evaluate_policies,
     evaluate_policy,
-    matches_pattern,
     parse_policy_file,
 )
 
 
-class TestPolicy:
-    """Tests for Policy dataclass."""
+class TestPolicyV1:
+    """Tests for PolicyV1 dataclass (legacy format)."""
 
     def test_from_dict_with_inline_instructions(self) -> None:
         """Test creating policy from dict with inline instructions."""
@@ -26,7 +28,7 @@ def test_from_dict_with_inline_instructions(self) -> None:
             "safety": "docs/readme.md",
             "instructions": "Do something",
         }
-        policy = Policy.from_dict(data)
+        policy = PolicyV1.from_dict(data)
 
         assert policy.name == "Test Policy"
         assert policy.triggers == ["src/**/*"]
@@ -40,7 +42,7 @@ def test_from_dict_normalizes_trigger_string_to_list(self) -> None:
             "trigger": "*.py",
             "instructions": "Check it",
         }
-        policy = Policy.from_dict(data)
+        policy = PolicyV1.from_dict(data)
 
         assert policy.triggers == ["*.py"]
 
@@ -51,7 +53,7 @@ def test_from_dict_preserves_trigger_list(self) -> None:
             "trigger": ["*.py", "*.js"],
             "instructions": "Check it",
         }
-        policy = Policy.from_dict(data)
+        policy = PolicyV1.from_dict(data)
 
         assert policy.triggers == ["*.py", "*.js"]
 
@@ -63,7 +65,7 @@ def test_from_dict_normalizes_safety_string_to_list(self) -> None:
             "safety": "docs/README.md",
             "instructions": "Check it",
         }
-        policy = Policy.from_dict(data)
+        policy = PolicyV1.from_dict(data)
 
         assert policy.safety == ["docs/README.md"]
 
@@ -74,7 +76,7 @@ def test_from_dict_safety_defaults_to_empty_list(self) -> None:
             "trigger": "src/*",
             "instructions": "Check it",
         }
-        policy = Policy.from_dict(data)
+        policy = PolicyV1.from_dict(data)
 
         assert policy.safety == []
 
@@ -89,7 +91,7 @@ def test_from_dict_with_instructions_file(self, temp_dir: Path) -> None:
             "trigger": "src/*",
             "instructions_file": "instructions.md",
         }
-        policy = Policy.from_dict(data, base_dir=temp_dir)
+        policy = PolicyV1.from_dict(data, base_dir=temp_dir)
 
         assert policy.instructions == "# Instructions\nDo this and that."
 
@@ -102,7 +104,7 @@ def test_from_dict_instructions_file_not_found(self, temp_dir: Path) -> None:
         }
 
         with pytest.raises(PolicyParseError, match="instructions file not found"):
-            Policy.from_dict(data, base_dir=temp_dir)
+            PolicyV1.from_dict(data, base_dir=temp_dir)
 
     def test_from_dict_instructions_file_without_base_dir(self) -> None:
         """Test error when instructions_file used without base_dir."""
@@ -113,7 +115,7 @@ def test_from_dict_instructions_file_without_base_dir(self) -> None:
         }
 
         with pytest.raises(PolicyParseError, match="no base_dir provided"):
-            Policy.from_dict(data, base_dir=None)
+            PolicyV1.from_dict(data, base_dir=None)
 
     def test_from_dict_compare_to_defaults_to_base(self) -> None:
         """Test that compare_to defaults to 'base'."""
@@ -122,7 +124,7 @@ def test_from_dict_compare_to_defaults_to_base(self) -> None:
             "trigger": "src/*",
             "instructions": "Check it",
         }
-        policy = Policy.from_dict(data)
+        policy = PolicyV1.from_dict(data)
 
         assert policy.compare_to == DEFAULT_COMPARE_TO
         assert policy.compare_to == "base"
@@ -135,7 +137,7 @@ def test_from_dict_compare_to_explicit_base(self) -> None:
             "instructions": "Check it",
             "compare_to": "base",
         }
-        policy = Policy.from_dict(data)
+        policy = PolicyV1.from_dict(data)
 
         assert policy.compare_to == "base"
 
@@ -147,7 +149,7 @@ def test_from_dict_compare_to_default_tip(self) -> None:
             "instructions": "Check it",
             "compare_to": "default_tip",
         }
-        policy = Policy.from_dict(data)
+        policy = PolicyV1.from_dict(data)
 
         assert policy.compare_to == "default_tip"
 
@@ -159,7 +161,7 @@ def test_from_dict_compare_to_prompt(self) -> None:
             "instructions": "Check it",
             "compare_to": "prompt",
         }
-        policy = Policy.from_dict(data)
+        policy = PolicyV1.from_dict(data)
 
         assert policy.compare_to == "prompt"
 
@@ -204,65 +206,82 @@ def test_fires_when_trigger_matches(self) -> None:
         """Test policy fires when trigger matches."""
         policy = Policy(
             name="Test",
+            filename="test",
+            detection_mode=DetectionMode.TRIGGER_SAFETY,
             triggers=["src/**/*.py"],
             safety=[],
             instructions="Check it",
         )
         changed_files = ["src/main.py", "README.md"]
 
-        assert evaluate_policy(policy, changed_files) is True
+        result = evaluate_policy(policy, changed_files)
+        assert result.should_fire is True
 
     def test_does_not_fire_when_no_trigger_match(self) -> None:
         """Test policy doesn't fire when no trigger matches."""
         policy = Policy(
             name="Test",
+            filename="test",
+            detection_mode=DetectionMode.TRIGGER_SAFETY,
             triggers=["src/**/*.py"],
             safety=[],
             instructions="Check it",
         )
         changed_files = ["test/main.py", "README.md"]
 
-        assert evaluate_policy(policy, changed_files) is False
+        result = evaluate_policy(policy, changed_files)
+        assert result.should_fire is False
 
     def test_does_not_fire_when_safety_matches(self) -> None:
         """Test policy doesn't fire when safety file is also changed."""
         policy = Policy(
             name="Test",
+            filename="test",
+            detection_mode=DetectionMode.TRIGGER_SAFETY,
             triggers=["app/config/**/*"],
             safety=["docs/install_guide.md"],
             instructions="Update docs",
         )
         changed_files = ["app/config/settings.py", "docs/install_guide.md"]
 
-        assert evaluate_policy(policy, changed_files) is False
+        result = evaluate_policy(policy, changed_files)
+        assert result.should_fire is False
 
     def test_fires_when_trigger_matches_but_safety_doesnt(self) -> None:
         """Test policy fires when trigger matches but safety doesn't."""
         policy = Policy(
             name="Test",
+            filename="test",
+            detection_mode=DetectionMode.TRIGGER_SAFETY,
             triggers=["app/config/**/*"],
             safety=["docs/install_guide.md"],
             instructions="Update docs",
         )
         changed_files = ["app/config/settings.py", "app/main.py"]
 
-        assert evaluate_policy(policy, changed_files) is True
+        result = evaluate_policy(policy, changed_files)
+        assert result.should_fire is True
 
     def test_multiple_safety_patterns(self) -> None:
         """Test policy with multiple safety patterns."""
         policy = Policy(
             name="Test",
+            filename="test",
+            detection_mode=DetectionMode.TRIGGER_SAFETY,
             triggers=["src/auth/**/*"],
             safety=["SECURITY.md", "docs/security_review.md"],
             instructions="Security review",
         )
 
         # Should not fire if any safety file is changed
-        assert evaluate_policy(policy, ["src/auth/login.py", "SECURITY.md"]) is False
-        assert evaluate_policy(policy, ["src/auth/login.py", "docs/security_review.md"]) is False
+        result1 = evaluate_policy(policy, ["src/auth/login.py", "SECURITY.md"])
+        assert result1.should_fire is False
+        result2 = evaluate_policy(policy, ["src/auth/login.py", "docs/security_review.md"])
+        assert result2.should_fire is False
 
         # Should fire if no safety files changed
-        assert evaluate_policy(policy, ["src/auth/login.py"]) is True
+        result3 = evaluate_policy(policy, ["src/auth/login.py"])
+        assert result3.should_fire is True
 
 
 class TestEvaluatePolicies:
@@ -273,12 +292,16 @@ def test_returns_fired_policies(self) -> None:
         policies = [
             Policy(
                 name="Policy 1",
+                filename="policy1",
+                detection_mode=DetectionMode.TRIGGER_SAFETY,
                 triggers=["src/**/*"],
                 safety=[],
                 instructions="Do 1",
             ),
             Policy(
                 name="Policy 2",
+                filename="policy2",
+                detection_mode=DetectionMode.TRIGGER_SAFETY,
                 triggers=["test/**/*"],
                 safety=[],
                 instructions="Do 2",
@@ -289,20 +312,24 @@ def test_returns_fired_policies(self) -> None:
         fired = evaluate_policies(policies, changed_files)
 
         assert len(fired) == 2
-        assert fired[0].name == "Policy 1"
-        assert fired[1].name == "Policy 2"
+        assert fired[0].policy.name == "Policy 1"
+        assert fired[1].policy.name == "Policy 2"
 
     def test_skips_promised_policies(self) -> None:
         """Test that promised policies are skipped."""
         policies = [
             Policy(
                 name="Policy 1",
+                filename="policy1",
+                detection_mode=DetectionMode.TRIGGER_SAFETY,
                 triggers=["src/**/*"],
                 safety=[],
                 instructions="Do 1",
             ),
             Policy(
                 name="Policy 2",
+                filename="policy2",
+                detection_mode=DetectionMode.TRIGGER_SAFETY,
                 triggers=["src/**/*"],
                 safety=[],
                 instructions="Do 2",
@@ -314,13 +341,15 @@ def test_skips_promised_policies(self) -> None:
         fired = evaluate_policies(policies, changed_files, promised)
 
         assert len(fired) == 1
-        assert fired[0].name == "Policy 2"
+        assert fired[0].policy.name == "Policy 2"
 
     def test_returns_empty_when_no_policies_fire(self) -> None:
         """Test returns empty list when no policies fire."""
         policies = [
             Policy(
                 name="Policy 1",
+                filename="policy1",
+                detection_mode=DetectionMode.TRIGGER_SAFETY,
                 triggers=["src/**/*"],
                 safety=[],
                 instructions="Do 1",

From 19a8310d52c8a97436335602f589d587562d694a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 18:56:33 +0000
Subject: [PATCH 06/10] Update documentation and version for policy system v2

- Update README.md with v2 policy examples and directory structure
- Update doc/architecture.md with v2 detection modes, action types, and queue system
- Bump version to 0.4.0 in pyproject.toml
- Add changelog entry for v2 policy system features
---
 CHANGELOG.md        |  19 +++++
 README.md           |  45 +++++++++---
 doc/architecture.md | 170 +++++++++++++++++++++++++++++++++-----------
 pyproject.toml      |   2 +-
 4 files changed, 183 insertions(+), 53 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 79ba35e..d099153 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,24 @@ All notable changes to DeepWork will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.0] - 2026-01-16
+
+### Added
+- Policy system v2 with frontmatter markdown format in `.deepwork/policies/`
+  - Detection modes: trigger/safety (default), set (bidirectional), pair (directional)
+  - Action types: prompt (show instructions), command (run idempotent commands)
+  - Variable pattern matching with `{path}` (multi-segment) and `{name}` (single-segment)
+  - Queue system in `.deepwork/tmp/policy/queue/` for state tracking and deduplication
+- New core modules:
+  - `pattern_matcher.py`: Variable pattern matching with regex-based capture
+  - `policy_queue.py`: Queue system for policy state persistence
+  - `command_executor.py`: Command action execution with variable substitution
+- Updated `policy_check.py` hook to use v2 system with queue-based deduplication
+
+### Changed
+- Policy parser now supports both v1 (`.deepwork.policy.yml`) and v2 (`.deepwork/policies/*.md`) formats
+- Documentation updated with v2 policy examples and configuration
+
 ## [0.3.0] - 2026-01-16
 
 ### Added
@@ -64,6 +82,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 Initial version.
 
+[0.4.0]: https://github.com/anthropics/deepwork/releases/tag/0.4.0
 [0.3.0]: https://github.com/anthropics/deepwork/releases/tag/0.3.0
 [0.1.1]: https://github.com/anthropics/deepwork/releases/tag/0.1.1
 [0.1.0]: https://github.com/anthropics/deepwork/releases/tag/0.1.0
diff --git a/README.md b/README.md
index 3331996..218a30f 100644
--- a/README.md
+++ b/README.md
@@ -178,6 +178,10 @@ DeepWork follows a **Git-native, installation-only** design:
 your-project/
 ├── .deepwork/
 │   ├── config.yml          # Platform configuration
+│   ├── policies/           # Policy definitions (v2 format)
+│   │   └── policy-name.md  # Individual policy files
+│   ├── tmp/                # Temporary state (gitignored)
+│   │   └── policy/queue/   # Policy evaluation queue
 │   └── jobs/               # Job definitions
 │       └── job_name/
 │           ├── job.yml     # Job metadata
@@ -208,11 +212,16 @@ deepwork/
 │   ├── core/             # Core functionality
 │   │   ├── parser.py     # Job definition parsing
 │   │   ├── detector.py   # Platform detection
-│   │   └── generator.py  # Skill file generation
+│   │   ├── generator.py  # Skill file generation
+│   │   ├── policy_parser.py    # Policy parsing (v1 and v2)
+│   │   ├── pattern_matcher.py  # Variable pattern matching
+│   │   ├── policy_queue.py     # Policy state queue
+│   │   └── command_executor.py # Command action execution
 │   ├── hooks/            # Cross-platform hook wrappers
 │   │   ├── wrapper.py    # Input/output normalization
-│   │   ├── claude_hook.sh  # Claude Code adapter
-│   │   └── gemini_hook.sh  # Gemini CLI adapter
+│   │   ├── policy_check.py   # Policy evaluation hook (v2)
+│   │   ├── claude_hook.sh    # Claude Code adapter
+│   │   └── gemini_hook.sh    # Gemini CLI adapter
 │   ├── templates/        # Jinja2 templates
 │   │   ├── claude/       # Claude Code templates
 │   │   └── gemini/       # Gemini CLI templates
@@ -243,15 +252,31 @@ Maintain a clean repository with automatic branch management and isolation.
 ### 🛡️ Automated Policies
 Enforce project standards and best practices without manual oversight. Policies monitor file changes and automatically prompt your AI assistant to follow specific guidelines when relevant code is modified.
 - **Automatic Triggers**: Detect when specific files or directories are changed to fire relevant policies.
+- **File Correspondence**: Define bidirectional (set) or directional (pair) relationships between files.
+- **Command Actions**: Run idempotent commands (formatters, linters) automatically when files change.
 - **Contextual Guidance**: Instructions are injected directly into the AI's workflow at the right moment.
-- **Common Use Cases**: Keep documentation in sync, enforce security reviews, or automate changelog updates.
 
-**Example Policy**:
-```yaml
-# Enforce documentation updates when config changes
-- name: "Update docs on config changes"
-  trigger: "app/config/**/*"
-  instructions: "Configuration files changed. Please update docs/install_guide.md."
+**Example Policy** (`.deepwork/policies/source-test-pairing.md`):
+```markdown
+---
+name: Source/Test Pairing
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+When source files change, corresponding test files should also change.
+Please create or update tests for the modified source files.
+```
+
+**Example Command Policy** (`.deepwork/policies/format-python.md`):
+```markdown
+---
+name: Format Python
+trigger: "**/*.py"
+action:
+  command: "ruff format {file}"
+  run_for: each_match
+---
 ```
 
 ### 🚀 Multi-Platform Support
diff --git a/doc/architecture.md b/doc/architecture.md
index 33ac973..a17a1c5 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -46,8 +46,11 @@ deepwork/                       # DeepWork tool repository
 │       │   ├── detector.py     # AI platform detection
 │       │   ├── generator.py    # Command file generation
 │       │   ├── parser.py       # Job definition parsing
-│       │   ├── policy_parser.py # Policy definition parsing
-│       │   └── hooks_syncer.py # Hook syncing to platforms
+│       │   ├── policy_parser.py    # Policy definition parsing (v1 and v2)
+│       │   ├── pattern_matcher.py  # Variable pattern matching for policies
+│       │   ├── policy_queue.py     # Policy state queue system
+│       │   ├── command_executor.py # Command action execution
+│       │   └── hooks_syncer.py     # Hook syncing to platforms
 │       ├── hooks/              # Hook system and cross-platform wrappers
 │       │   ├── __init__.py
 │       │   ├── wrapper.py           # Cross-platform input/output normalization
@@ -286,7 +289,13 @@ my-project/                     # User's project (target)
 │       └── ...
 ├── .deepwork/                  # DeepWork configuration
 │   ├── config.yml              # Platform config
-│   ├── .gitignore              # Ignores .last_work_tree
+│   ├── .gitignore              # Ignores tmp/ directory
+│   ├── policies/               # Policy definitions (v2 format)
+│   │   ├── source-test-pairing.md
+│   │   ├── format-python.md
+│   │   └── api-docs.md
+│   ├── tmp/                    # Temporary state (gitignored)
+│   │   └── policy/queue/       # Policy evaluation queue
 │   └── jobs/                   # Job definitions
 │       ├── deepwork_jobs/      # Core job for managing jobs
 │       │   ├── job.yml
@@ -305,7 +314,7 @@ my-project/                     # User's project (target)
 │       │   └── steps/
 │       └── ad_campaign/
 │           └── ...
-├── .deepwork.policy.yml        # Policy definitions (project root)
+├── .deepwork.policy.yml        # Legacy policy definitions (v1 format)
 ├── (rest of user's project files)
 └── README.md
 ```
@@ -1000,57 +1009,125 @@ Policies are automated enforcement rules that trigger based on file changes duri
 - Documentation stays in sync with code changes
 - Security reviews happen when sensitive code is modified
 - Team guidelines are followed automatically
+- File correspondences are maintained (e.g., source/test pairing)
 
-### Policy Configuration File
+### Policy System v2 (Frontmatter Markdown)
 
-Policies are defined in `.deepwork.policy.yml` at the project root:
+Policies are defined as individual markdown files in `.deepwork/policies/`:
 
+```
+.deepwork/policies/
+├── source-test-pairing.md
+├── format-python.md
+└── api-docs.md
+```
+
+Each policy file uses YAML frontmatter with a markdown body for instructions:
+
+```markdown
+---
+name: Source/Test Pairing
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+When source files change, corresponding test files should also change.
+Please create or update tests for the modified source files.
+```
+
+### Detection Modes
+
+Policies support three detection modes:
+
+**1. Trigger/Safety (default)** - Fire when trigger matches but safety doesn't:
 ```yaml
-- name: "Update install guide on config changes"
-  trigger: "app/config/**/*"
-  safety: "docs/install_guide.md"
-  instructions: |
-    Configuration files have been modified. Please review docs/install_guide.md
-    and update it if any installation instructions need to change.
-
-- name: "Security review for auth changes"
-  trigger:
-    - "src/auth/**/*"
-    - "src/security/**/*"
-  safety:
-    - "SECURITY.md"
-    - "docs/security_audit.md"
-  instructions: |
-    Authentication or security code has been changed. Please:
-    1. Check for hardcoded credentials
-    2. Verify input validation
-    3. Review access control logic
+---
+name: Update install guide
+trigger: "app/config/**/*"
+safety: "docs/install_guide.md"
+---
+```
+
+**2. Set (bidirectional)** - Enforce file correspondence in both directions:
+```yaml
+---
+name: Source/Test Pairing
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+```
+Uses variable patterns like `{path}` (multi-segment) and `{name}` (single-segment) for matching.
+
+**3. Pair (directional)** - Trigger requires corresponding files, but not vice versa:
+```yaml
+---
+name: API Documentation
+pair:
+  trigger: src/api/{name}.py
+  expects: docs/api/{name}.md
+---
+```
+
+### Action Types
+
+**1. Prompt (default)** - Show instructions to the agent:
+```yaml
+---
+name: Security Review
+trigger: "src/auth/**/*"
+---
+Please check for hardcoded credentials and validate input.
+```
+
+**2. Command** - Run an idempotent command:
+```yaml
+---
+name: Format Python
+trigger: "**/*.py"
+action:
+  command: "ruff format {file}"
+  run_for: each_match  # or "all_matches"
+---
 ```
 
 ### Policy Evaluation Flow
 
 1. **Session Start**: When a Claude Code session begins, the baseline git state is captured
 2. **Agent Works**: The AI agent performs tasks, potentially modifying files
-3. **Session Stop**: When the agent finishes:
-   - Changed files are detected by comparing against the baseline
-   - Each policy is evaluated:
-     - If any changed file matches a `trigger` pattern AND
-     - No changed file matches a `safety` pattern AND
-     - The agent hasn't marked it with a `<promise>` tag
-     - → The policy fires
-   - If policies fire, Claude is prompted to address them
+3. **Session Stop**: When the agent finishes (after_agent event):
+   - Changed files are detected based on `compare_to` setting (base, default_tip, or prompt)
+   - Each policy is evaluated based on its detection mode
+   - Queue entries are created in `.deepwork/tmp/policy/queue/` for deduplication
+   - For command actions: commands are executed, results tracked
+   - For prompt actions: if policy fires and not already promised, agent is prompted
 4. **Promise Tags**: Agents can mark policies as addressed by including `<promise>✓ Policy Name</promise>` in their response
 
+### Queue System
+
+Policy state is tracked in `.deepwork/tmp/policy/queue/` with files named `{hash}.{status}.json`:
+- `queued` - Detected, awaiting evaluation
+- `passed` - Policy satisfied (promise found or command succeeded)
+- `failed` - Policy not satisfied
+- `skipped` - Safety pattern matched
+
+This prevents re-prompting for the same policy violation within a session.
+
 ### Hook Integration
 
-Policies are implemented using Claude Code's hooks system. The `deepwork_policy` standard job includes:
+The v2 policy system uses the cross-platform hook wrapper:
 
 ```
-.deepwork/jobs/deepwork_policy/hooks/
-├── global_hooks.yml              # Maps lifecycle events to scripts
-├── user_prompt_submit.sh         # Captures baseline at each prompt
-├── capture_prompt_work_tree.sh   # Creates git state snapshot for compare_to: prompt
-└── policy_stop_hook.sh           # Evaluates policies on stop (calls Python evaluator)
+src/deepwork/hooks/
+├── wrapper.py           # Cross-platform input/output normalization
+├── policy_check.py      # Policy evaluation hook (v2)
+├── claude_hook.sh       # Claude Code shell wrapper
+└── gemini_hook.sh       # Gemini CLI shell wrapper
+```
+
+Hooks are called via the shell wrappers:
+```bash
+claude_hook.sh deepwork.hooks.policy_check
 ```
 
 The hooks are installed to `.claude/settings.json` during `deepwork sync`:
@@ -1058,9 +1135,6 @@ The hooks are installed to `.claude/settings.json` during `deepwork sync`:
 ```json
 {
   "hooks": {
-    "UserPromptSubmit": [
-      {"matcher": "", "hooks": [{"type": "command", "command": ".deepwork/jobs/deepwork_policy/hooks/user_prompt_submit.sh"}]}
-    ],
     "Stop": [
       {"matcher": "", "hooks": [{"type": "command", "command": ".deepwork/jobs/deepwork_policy/hooks/policy_stop_hook.sh"}]}
     ]
@@ -1068,6 +1142,18 @@ The hooks are installed to `.claude/settings.json` during `deepwork sync`:
 }
 ```
 
+### Legacy v1 Format
+
+The v1 format (`.deepwork.policy.yml`) is still supported for backward compatibility:
+
+```yaml
+- name: "Update install guide"
+  trigger: "app/config/**/*"
+  safety: "docs/install_guide.md"
+  instructions: |
+    Configuration files have been modified. Please review docs/install_guide.md.
+```
+
 ### Cross-Platform Hook Wrapper System
 
 The `hooks/` module provides a wrapper system that allows writing hooks once in Python and running them on multiple platforms. This normalizes the differences between Claude Code and Gemini CLI hook systems.
diff --git a/pyproject.toml b/pyproject.toml
index f3d38af..d84e3ed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "deepwork"
-version = "0.3.0"
+version = "0.4.0"
 description = "Framework for enabling AI agents to perform complex, multi-step work tasks"
 readme = "README.md"
 requires-python = ">=3.11"

From 78ddb044d4e8a87b8d93dd2d8ee1dfd1270dd030 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 19:12:52 +0000
Subject: [PATCH 07/10] Update policy_stop_hook.sh to support both v1 and v2
 policy formats

The hook now:
- Checks for v2 policies in .deepwork/policies/ first
- Falls back to v1 policies in .deepwork.policy.yml if no v2 found
- Passes JSON input directly to policy_check.py for v2 (via wrapper)
- Maintains existing behavior for v1 evaluate_policies.py
---
 .../deepwork_policy/hooks/policy_stop_hook.sh | 77 +++++++++++--------
 1 file changed, 45 insertions(+), 32 deletions(-)

diff --git a/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh b/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh
index b12d456..6a84bdd 100755
--- a/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh
+++ b/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh
@@ -2,16 +2,27 @@
 # policy_stop_hook.sh - Evaluates policies when the agent stops
 #
 # This script is called as a Claude Code Stop hook. It:
-# 1. Evaluates policies from .deepwork.policy.yml
+# 1. Evaluates policies from .deepwork/policies/ (v2) or .deepwork.policy.yml (v1)
 # 2. Computes changed files based on each policy's compare_to setting
 # 3. Checks for <promise> tags in the conversation transcript
 # 4. Returns JSON to block stop if policies need attention
 
 set -e
 
-# Check if policy file exists
-if [ ! -f .deepwork.policy.yml ]; then
-    # No policies defined, nothing to do
+# Determine which policy system to use
+USE_V2=false
+V1_POLICY_FILE=".deepwork.policy.yml"
+V2_POLICY_DIR=".deepwork/policies"
+
+if [ -d "${V2_POLICY_DIR}" ]; then
+    # Check if there are any .md files in the v2 directory
+    if ls "${V2_POLICY_DIR}"/*.md 1>/dev/null 2>&1; then
+        USE_V2=true
+    fi
+fi
+
+# If no v2 policies and no v1 policy file, nothing to do
+if [ "${USE_V2}" = false ] && [ ! -f "${V1_POLICY_FILE}" ]; then
     exit 0
 fi
 
@@ -21,36 +32,38 @@ if [ ! -t 0 ]; then
     HOOK_INPUT=$(cat)
 fi
 
-# Extract transcript_path from the hook input JSON using jq
-# Claude Code passes: {"session_id": "...", "transcript_path": "...", ...}
-TRANSCRIPT_PATH=""
-if [ -n "${HOOK_INPUT}" ]; then
-    TRANSCRIPT_PATH=$(echo "${HOOK_INPUT}" | jq -r '.transcript_path // empty' 2>/dev/null || echo "")
-fi
+if [ "${USE_V2}" = true ]; then
+    # Use v2 policy system via cross-platform wrapper
+    # The wrapper reads JSON input and handles transcript extraction
+    result=$(echo "${HOOK_INPUT}" | DEEPWORK_HOOK_PLATFORM=claude DEEPWORK_HOOK_EVENT=Stop python -m deepwork.hooks.policy_check 2>/dev/null || echo '{}')
+else
+    # Use v1 policy system - extract conversation context for evaluate_policies
 
-# Extract conversation text from the JSONL transcript
-# The transcript is JSONL format - each line is a JSON object
-# We need to extract the text content from assistant messages
-conversation_context=""
-if [ -n "${TRANSCRIPT_PATH}" ] && [ -f "${TRANSCRIPT_PATH}" ]; then
-    # Extract text content from all assistant messages in the transcript
-    # Each line is a JSON object; we extract .message.content[].text for assistant messages
-    conversation_context=$(cat "${TRANSCRIPT_PATH}" | \
-        grep -E '"role"\s*:\s*"assistant"' | \
-        jq -r '.message.content // [] | map(select(.type == "text")) | map(.text) | join("\n")' 2>/dev/null | \
-        tr -d '\0' || echo "")
-fi
+    # Extract transcript_path from the hook input JSON using jq
+    # Claude Code passes: {"session_id": "...", "transcript_path": "...", ...}
+    TRANSCRIPT_PATH=""
+    if [ -n "${HOOK_INPUT}" ]; then
+        TRANSCRIPT_PATH=$(echo "${HOOK_INPUT}" | jq -r '.transcript_path // empty' 2>/dev/null || echo "")
+    fi
+
+    # Extract conversation text from the JSONL transcript
+    # The transcript is JSONL format - each line is a JSON object
+    # We need to extract the text content from assistant messages
+    conversation_context=""
+    if [ -n "${TRANSCRIPT_PATH}" ] && [ -f "${TRANSCRIPT_PATH}" ]; then
+        # Extract text content from all assistant messages in the transcript
+        # Each line is a JSON object; we extract .message.content[].text for assistant messages
+        conversation_context=$(cat "${TRANSCRIPT_PATH}" | \
+            grep -E '"role"\s*:\s*"assistant"' | \
+            jq -r '.message.content // [] | map(select(.type == "text")) | map(.text) | join("\n")' 2>/dev/null | \
+            tr -d '\0' || echo "")
+    fi
 
-# Call the Python evaluator
-# The Python module handles:
-# - Parsing the policy file
-# - Computing changed files based on each policy's compare_to setting
-# - Matching changed files against triggers/safety patterns
-# - Checking for promise tags in the conversation context
-# - Generating appropriate JSON output
-result=$(echo "${conversation_context}" | python -m deepwork.hooks.evaluate_policies \
-    --policy-file .deepwork.policy.yml \
-    2>/dev/null || echo '{}')
+    # Call the Python v1 evaluator
+    result=$(echo "${conversation_context}" | python -m deepwork.hooks.evaluate_policies \
+        --policy-file "${V1_POLICY_FILE}" \
+        2>/dev/null || echo '{}')
+fi
 
 # Output the result (JSON for Claude Code hooks)
 echo "${result}"

From 9ff59e7e364c2002d8394f66db247e487fb95d31 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 19:29:59 +0000
Subject: [PATCH 08/10] Remove v1 policy format support

Remove all legacy v1 policy format (.deepwork.policy.yml) support:

- Remove evaluate_policies.py hook module
- Remove PolicyV1 class and parse_policy_file from policy_parser.py
- Remove v1 schema (POLICY_SCHEMA_V1) from policy_schema.py
- Remove v1 test fixtures and test_evaluate_policies.py
- Update test fixtures to use v2 frontmatter markdown format
- Update documentation to remove v1 references
- Fix policy_stop_hook.sh to handle exit code 2 (block) correctly

Only v2 frontmatter markdown format (.deepwork/policies/*.md) is now supported.
---
 .claude/commands/deepwork_policy.define.md    | 382 ++++++++++------
 .deepwork/jobs/deepwork_policy/job.yml        |  21 +-
 .../jobs/deepwork_policy/steps/define.md      | 338 +++++++++------
 .gemini/commands/deepwork_policy/define.toml  | 383 ++++++++++------
 CHANGELOG.md                                  |   4 +-
 README.md                                     |   5 +-
 doc/architecture.md                           |  18 +-
 src/deepwork/core/policy_parser.py            | 104 +----
 src/deepwork/hooks/README.md                  |   2 -
 src/deepwork/hooks/evaluate_policies.py       | 410 ------------------
 src/deepwork/schemas/policy_schema.py         |  79 ----
 .../deepwork_policy/hooks/policy_stop_hook.sh |  58 +--
 tests/fixtures/policies/empty_policy.yml      |   1 -
 .../policies/instructions/security_review.md  |   8 -
 .../policies/invalid_missing_instructions.yml |   2 -
 .../policies/invalid_missing_trigger.yml      |   3 -
 tests/fixtures/policies/multiple_policies.yml |  21 -
 .../policy_with_instructions_file.yml         |   3 -
 tests/fixtures/policies/valid_policy.yml      |   6 -
 tests/shell_script_tests/conftest.py          |  20 +-
 .../test_policy_stop_hook.py                  |  52 ++-
 tests/unit/test_evaluate_policies.py          | 101 -----
 tests/unit/test_policy_parser.py              | 343 ++++++---------
 23 files changed, 896 insertions(+), 1468 deletions(-)
 delete mode 100644 src/deepwork/hooks/evaluate_policies.py
 delete mode 100644 tests/fixtures/policies/empty_policy.yml
 delete mode 100644 tests/fixtures/policies/instructions/security_review.md
 delete mode 100644 tests/fixtures/policies/invalid_missing_instructions.yml
 delete mode 100644 tests/fixtures/policies/invalid_missing_trigger.yml
 delete mode 100644 tests/fixtures/policies/multiple_policies.yml
 delete mode 100644 tests/fixtures/policies/policy_with_instructions_file.yml
 delete mode 100644 tests/fixtures/policies/valid_policy.yml
 delete mode 100644 tests/unit/test_evaluate_policies.py

diff --git a/.claude/commands/deepwork_policy.define.md b/.claude/commands/deepwork_policy.define.md
index 9e7d1c2..9a2a551 100644
--- a/.claude/commands/deepwork_policy.define.md
+++ b/.claude/commands/deepwork_policy.define.md
@@ -1,5 +1,5 @@
 ---
-description: Create or update policy entries in .deepwork.policy.yml
+description: Create or update policies in .deepwork/policies/ (v2) or .deepwork.policy.yml (v1)
 ---
 
 # deepwork_policy.define
@@ -14,17 +14,22 @@ Manages policies that automatically trigger when certain files change during an
 Policies help ensure that code changes follow team guidelines, documentation is updated,
 and architectural decisions are respected.
 
-Policies are defined in a `.deepwork.policy.yml` file at the root of your project. Each policy
-specifies:
-- Trigger patterns: Glob patterns for files that, when changed, should trigger the policy
-- Safety patterns: Glob patterns for files that, if also changed, mean the policy doesn't need to fire
-- Instructions: What the agent should do when the policy triggers
+**Policy System v2 (Recommended)**
+Policies are defined as individual markdown files in `.deepwork/policies/` with YAML frontmatter.
+This format supports:
+- Detection modes: trigger/safety (default), set (bidirectional), pair (directional)
+- Action types: prompt (show instructions), command (run idempotent commands)
+- Variable pattern matching for file correspondence (e.g., `src/{path}.py` ↔ `tests/{path}_test.py`)
+
+**Legacy v1 Format**
+Still supported: `.deepwork.policy.yml` at project root with trigger/safety/instructions fields.
 
 Example use cases:
+- Enforce source/test pairing with set patterns
+- Run formatters automatically when files change
 - Update installation docs when configuration files change
 - Require security review when authentication code is modified
 - Ensure API documentation stays in sync with API code
-- Remind developers to update changelogs
 
 
 
@@ -34,200 +39,295 @@ Example use cases:
 
 ## Objective
 
-Create or update policy entries in the `.deepwork.policy.yml` file to enforce team guidelines, documentation requirements, or other constraints when specific files change.
+Create or update policies to enforce team guidelines, documentation requirements, file correspondences, or automated commands when specific files change.
 
 ## Task
 
 Guide the user through defining a new policy by asking structured questions. **Do not create the policy without first understanding what they want to enforce.**
 
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
+**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user.
 
-### Step 1: Understand the Policy Purpose
+## Policy System Overview
 
-Start by asking structured questions to understand what the user wants to enforce:
+DeepWork supports two policy formats:
 
-1. **What guideline or constraint should this policy enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this policy?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
+**v2 (Recommended)**: Individual markdown files in `.deepwork/policies/` with YAML frontmatter
+**v1 (Legacy)**: Single `.deepwork.policy.yml` file at project root
 
-2. **What action should be taken?**
-   - What should the agent do when the policy triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
+**Always prefer v2 format** for new policies. It supports more detection modes and action types.
 
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the policy doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
+---
 
-### Step 2: Define the Trigger Patterns
+## Step 1: Understand the Policy Purpose
 
-Help the user define glob patterns for files that should trigger the policy:
+Ask structured questions to understand what the user wants to enforce:
 
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
+1. **What should this policy enforce?**
+   - Documentation sync? Security review? File correspondence? Code formatting?
 
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
+2. **What files trigger this policy?**
+   - Which files/directories, when changed, should trigger action?
 
-### Step 3: Define Safety Patterns (Optional)
+3. **What should happen when the policy fires?**
+   - Show instructions to the agent? Run a command automatically?
 
-If there are files that, when also changed, mean the policy shouldn't fire:
+---
 
-**Examples:**
-- Policy: "Update install guide when config changes"
-  - Trigger: `app/config/**/*`
-  - Safety: `docs/install_guide.md` (if already updated, don't prompt)
+## Step 2: Choose Detection Mode
 
-- Policy: "Security review for auth changes"
-  - Trigger: `src/auth/**/*`
-  - Safety: `SECURITY.md`, `docs/security_review.md`
+Policies support three detection modes:
 
-### Step 3b: Choose the Comparison Mode (Optional)
+### Trigger/Safety (Default)
+Fire when trigger patterns match AND safety patterns don't.
 
-The `compare_to` field controls what baseline is used when detecting "changed files":
+**Use for**: General checks like "source changed, verify README"
 
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). This is the most common choice for feature branches, as it shows all changes made on the branch.
-- `default_tip` - Compares to the current tip of the default branch (main/master). Useful when you want to see the difference from what's currently in production.
-- `prompt` - Compares to the state at the start of each prompt. Useful for policies that should only fire based on changes made during a single agent response.
+```yaml
+trigger: "app/config/**/*"
+safety: "docs/install_guide.md"
+```
 
-**When to use each:**
-- **base**: Best for most policies. "Did this branch change config files?" → trigger docs review
-- **default_tip**: For policies about what's different from production/main
-- **prompt**: For policies that should only consider very recent changes within the current session
+### Set (Bidirectional Correspondence)
+Fire when files matching one pattern change but corresponding files don't.
 
-Most policies should use the default (`base`) and don't need to specify `compare_to`.
+**Use for**: Source/test pairing, i18n files, paired documentation
 
-### Step 4: Write the Instructions
+```yaml
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+```
 
-Create clear, actionable instructions for what the agent should do when the policy fires.
+If `src/utils/helper.py` changes, expects `tests/utils/helper_test.py` to also change.
 
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
+### Pair (Directional Correspondence)
+Fire when trigger files change but expected files don't. Changes to expected files alone don't trigger.
 
-**Example:**
+**Use for**: API code requires docs (but docs changes don't require API changes)
+
+```yaml
+pair:
+  trigger: src/api/{name}.py
+  expects: docs/api/{name}.md
+```
+
+### Variable Pattern Syntax
+
+- `{path}` - Matches multiple path segments (e.g., `foo/bar/baz`)
+- `{name}` - Matches a single segment (e.g., `helper`)
+
+---
+
+## Step 3: Choose Action Type
+
+### Prompt (Default)
+Show instructions to the agent. The markdown body becomes the instructions.
+
+```markdown
+---
+name: Security Review
+trigger: "src/auth/**/*"
+---
+Please review for hardcoded credentials and validate input handling.
 ```
-Configuration files have changed. Please:
-1. Review docs/install_guide.md for accuracy
-2. Update any installation steps that reference changed config
-3. Verify environment variable documentation is current
-4. Test that installation instructions still work
+
+### Command
+Run an idempotent command automatically. No markdown body needed.
+
+```markdown
+---
+name: Format Python
+trigger: "**/*.py"
+action:
+  command: "ruff format {file}"
+  run_for: each_match
+---
 ```
 
-### Step 5: Create the Policy Entry
+**Command variables**:
+- `{file}` - Current file being processed
+- `{files}` - Space-separated list of all matching files
+- `{repo_root}` - Repository root path
 
-Create or update `.deepwork.policy.yml` in the project root.
+**run_for options**:
+- `each_match` - Run command once per matching file
+- `all_matches` - Run command once with all files
 
-**File Location**: `.deepwork.policy.yml` (root of project)
+---
 
-**Format**:
-```yaml
-- name: "[Friendly name for the policy]"
-  trigger: "[glob pattern]"  # or array: ["pattern1", "pattern2"]
-  safety: "[glob pattern]"   # optional, or array
-  compare_to: "base"         # optional: "base" (default), "default_tip", or "prompt"
-  instructions: |
-    [Multi-line instructions for the agent...]
+## Step 4: Define Optional Settings
+
+### compare_to (Optional)
+Controls what baseline is used for detecting changed files:
+
+- `base` (default) - Changes since branch diverged from main/master
+- `default_tip` - Changes compared to current main/master tip
+- `prompt` - Changes since the last prompt submission
+
+Most policies should use the default (`base`).
+
+---
+
+## Step 5: Create the Policy File (v2 Format)
+
+### File Location
+Create: `.deepwork/policies/[policy-name].md`
+
+Use kebab-case for filename (e.g., `source-test-pairing.md`, `format-python.md`)
+
+### v2 Format Examples
+
+**Trigger/Safety with Prompt:**
+```markdown
+---
+name: Update Install Guide
+trigger: "app/config/**/*"
+safety: "docs/install_guide.md"
+---
+Configuration files have changed. Please review docs/install_guide.md
+and update installation instructions if needed.
 ```
 
-**Alternative with instructions_file**:
-```yaml
-- name: "[Friendly name for the policy]"
-  trigger: "[glob pattern]"
-  safety: "[glob pattern]"
-  compare_to: "base"         # optional
-  instructions_file: "path/to/instructions.md"
+**Set (Bidirectional) with Prompt:**
+```markdown
+---
+name: Source/Test Pairing
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+When source files change, corresponding test files should also change.
+Please create or update tests for the modified source files.
 ```
 
-### Step 6: Verify the Policy
+**Pair (Directional) with Prompt:**
+```markdown
+---
+name: API Documentation
+pair:
+  trigger: src/api/{name}.py
+  expects: docs/api/{name}.md
+---
+API code has changed. Please update the corresponding documentation.
+```
 
-After creating the policy:
+**Command Action:**
+```markdown
+---
+name: Format Python Files
+trigger: "**/*.py"
+action:
+  command: "ruff format {file}"
+  run_for: each_match
+---
+```
 
-1. **Check the YAML syntax** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the policy doesn't conflict with existing ones
+**Multiple Trigger Patterns:**
+```markdown
+---
+name: Security Review
+trigger:
+  - "src/auth/**/*"
+  - "src/security/**/*"
+safety:
+  - "SECURITY.md"
+  - "docs/security_audit.md"
+---
+Authentication or security code has been changed. Please review for:
+1. Hardcoded credentials or secrets
+2. Input validation issues
+3. Access control logic
+```
+
+---
+
+## Step 6: Legacy v1 Format (If Needed)
+
+Only use v1 format when adding to an existing `.deepwork.policy.yml` file.
 
-## Example Policies
+**File Location**: `.deepwork.policy.yml` (project root)
 
-### Update Documentation on Config Changes
 ```yaml
 - name: "Update install guide on config changes"
   trigger: "app/config/**/*"
   safety: "docs/install_guide.md"
+  compare_to: "base"
   instructions: |
-    Configuration files have been modified. Please review docs/install_guide.md
-    and update it if any installation instructions need to change based on the
-    new configuration.
+    Configuration files have changed. Please review docs/install_guide.md.
 ```
 
-### Security Review for Auth Code
+**Alternative with instructions_file:**
 ```yaml
-- name: "Security review for authentication changes"
-  trigger:
-    - "src/auth/**/*"
-    - "src/security/**/*"
-  safety:
-    - "SECURITY.md"
-    - "docs/security_audit.md"
-  instructions: |
-    Authentication or security code has been changed. Please:
-    1. Review for hardcoded credentials or secrets
-    2. Check input validation on user inputs
-    3. Verify access control logic is correct
-    4. Update security documentation if needed
+- name: "Security review"
+  trigger: "src/auth/**/*"
+  instructions_file: "path/to/instructions.md"
 ```
 
-### API Documentation Sync
-```yaml
-- name: "API documentation update"
-  trigger: "src/api/**/*.py"
-  safety: "docs/api/**/*.md"
-  instructions: |
-    API code has changed. Please verify that API documentation in docs/api/
-    is up to date with the code changes. Pay special attention to:
-    - New or changed endpoints
-    - Modified request/response schemas
-    - Updated authentication requirements
-```
+---
+
+## Step 7: Verify the Policy
+
+After creating the policy:
+
+1. **Check YAML frontmatter syntax** - Ensure valid YAML
+2. **Verify detection mode is appropriate** - trigger/safety vs set vs pair
+3. **Test patterns match intended files** - Check glob/variable patterns
+4. **Review instructions/command** - Ensure they're actionable
+5. **Check for conflicts** - Ensure no overlap with existing policies
+
+---
+
+## Pattern Reference
+
+### Glob Patterns
+- `*` - Matches any characters within a single path segment
+- `**` - Matches across multiple path segments (recursive)
+- `?` - Matches a single character
+
+### Variable Patterns (v2 only)
+- `{path}` - Captures multiple segments: `src/{path}.py` matches `src/a/b/c.py` → path=`a/b/c`
+- `{name}` - Captures single segment: `src/{name}.py` matches `src/utils.py` → name=`utils`
+
+### Common Examples
+- `src/**/*.py` - All Python files in src (recursive)
+- `app/config/**/*` - All files in app/config
+- `*.md` - Markdown files in root only
+- `**/*.test.ts` - All test files anywhere
+- `src/{path}.ts` ↔ `tests/{path}.test.ts` - Source/test pairs
+
+---
 
 ## Output Format
 
-### .deepwork.policy.yml
-Create or update this file at the project root with the new policy entry.
+Create one of:
+- `.deepwork/policies/[policy-name].md` (v2 format, recommended)
+- Entry in `.deepwork.policy.yml` (v1 format, legacy)
+
+---
 
 ## Quality Criteria
 
-- Asked structured questions to understand user requirements
+- Asked structured questions to understand requirements
+- Chose appropriate detection mode (trigger/safety, set, or pair)
+- Chose appropriate action type (prompt or command)
 - Policy name is clear and descriptive
-- Trigger patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering
-- Instructions are actionable and specific
-- YAML is valid and properly formatted
+- Patterns accurately match intended files
+- Instructions or command are actionable
+- YAML frontmatter is valid
+
+---
 
 ## Context
 
-Policies are evaluated automatically when you finish working on a task. The system:
-1. Determines which files have changed based on each policy's `compare_to` setting:
-   - `base` (default): Files changed since the branch diverged from main/master
-   - `default_tip`: Files different from the current main/master branch
-   - `prompt`: Files changed since the last prompt submission
-2. Checks if any changes match policy trigger patterns
-3. Skips policies where safety patterns also matched
-4. Prompts you with instructions for any triggered policies
+Policies are evaluated automatically when you finish working. The system:
+
+1. Loads policies from `.deepwork/policies/` (v2) and `.deepwork.policy.yml` (v1)
+2. Detects changed files based on `compare_to` setting
+3. Evaluates each policy based on its detection mode
+4. For **command** actions: Runs the command automatically
+5. For **prompt** actions: Shows instructions if policy fires
 
-You can mark a policy as addressed by including `<promise>✓ Policy Name</promise>` in your response (replace Policy Name with the actual policy name). This tells the system you've already handled that policy's requirements.
+Mark a policy as addressed by including `<promise>✓ Policy Name</promise>` in your response.
 
 
 ## Inputs
@@ -255,7 +355,7 @@ All work for this job should be done on a dedicated work branch:
 ## Output Requirements
 
 Create the following output(s):
-- `.deepwork.policy.yml`
+- `.deepwork/policies/*.md`- `.deepwork.policy.yml`
 Ensure all outputs are:
 - Well-formatted and complete
 - Ready for review or use by subsequent steps
@@ -268,7 +368,7 @@ After completing this step:
 
 2. **Inform the user**:
    - The define command is complete
-   - Outputs created: .deepwork.policy.yml
+   - Outputs created: .deepwork/policies/*.md, .deepwork.policy.yml
    - This command can be run again anytime to make further changes
 
 ## Command Complete
diff --git a/.deepwork/jobs/deepwork_policy/job.yml b/.deepwork/jobs/deepwork_policy/job.yml
index 777894e..946f238 100644
--- a/.deepwork/jobs/deepwork_policy/job.yml
+++ b/.deepwork/jobs/deepwork_policy/job.yml
@@ -1,37 +1,40 @@
 name: deepwork_policy
-version: "0.2.0"
+version: "0.3.0"
 summary: "Policy enforcement for AI agent sessions"
 description: |
   Manages policies that automatically trigger when certain files change during an AI agent session.
   Policies help ensure that code changes follow team guidelines, documentation is updated,
   and architectural decisions are respected.
 
-  Policies are defined in a `.deepwork.policy.yml` file at the root of your project. Each policy
-  specifies:
-  - Trigger patterns: Glob patterns for files that, when changed, should trigger the policy
-  - Safety patterns: Glob patterns for files that, if also changed, mean the policy doesn't need to fire
-  - Instructions: What the agent should do when the policy triggers
+  Policies are defined as individual markdown files in `.deepwork/policies/` with YAML frontmatter.
+  This format supports:
+  - Detection modes: trigger/safety (default), set (bidirectional), pair (directional)
+  - Action types: prompt (show instructions), command (run idempotent commands)
+  - Variable pattern matching for file correspondence (e.g., `src/{path}.py` ↔ `tests/{path}_test.py`)
 
   Example use cases:
+  - Enforce source/test pairing with set patterns
+  - Run formatters automatically when files change
   - Update installation docs when configuration files change
   - Require security review when authentication code is modified
   - Ensure API documentation stays in sync with API code
-  - Remind developers to update changelogs
 
 changelog:
   - version: "0.1.0"
     changes: "Initial version"
   - version: "0.2.0"
     changes: "Standardized on 'ask structured questions' phrasing for user input"
+  - version: "0.3.0"
+    changes: "Updated for policy system v2 with detection modes, action types, and variable patterns"
 
 steps:
   - id: define
     name: "Define Policy"
-    description: "Create or update policy entries in .deepwork.policy.yml"
+    description: "Create or update policies in .deepwork/policies/"
     instructions_file: steps/define.md
     inputs:
       - name: policy_purpose
         description: "What guideline or constraint should this policy enforce?"
     outputs:
-      - .deepwork.policy.yml
+      - .deepwork/policies/*.md
     dependencies: []
diff --git a/.deepwork/jobs/deepwork_policy/steps/define.md b/.deepwork/jobs/deepwork_policy/steps/define.md
index 302eda7..452194a 100644
--- a/.deepwork/jobs/deepwork_policy/steps/define.md
+++ b/.deepwork/jobs/deepwork_policy/steps/define.md
@@ -2,197 +2,257 @@
 
 ## Objective
 
-Create or update policy entries in the `.deepwork.policy.yml` file to enforce team guidelines, documentation requirements, or other constraints when specific files change.
+Create or update policies to enforce team guidelines, documentation requirements, file correspondences, or automated commands when specific files change.
 
 ## Task
 
 Guide the user through defining a new policy by asking structured questions. **Do not create the policy without first understanding what they want to enforce.**
 
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
+**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user.
 
-### Step 1: Understand the Policy Purpose
+---
 
-Start by asking structured questions to understand what the user wants to enforce:
+## Step 1: Understand the Policy Purpose
 
-1. **What guideline or constraint should this policy enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this policy?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
+Ask structured questions to understand what the user wants to enforce:
 
-2. **What action should be taken?**
-   - What should the agent do when the policy triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
+1. **What should this policy enforce?**
+   - Documentation sync? Security review? File correspondence? Code formatting?
 
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the policy doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
+2. **What files trigger this policy?**
+   - Which files/directories, when changed, should trigger action?
 
-### Step 2: Define the Trigger Patterns
+3. **What should happen when the policy fires?**
+   - Show instructions to the agent? Run a command automatically?
 
-Help the user define glob patterns for files that should trigger the policy:
+---
 
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
+## Step 2: Choose Detection Mode
 
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
+Policies support three detection modes:
 
-### Step 3: Define Safety Patterns (Optional)
+### Trigger/Safety (Default)
+Fire when trigger patterns match AND safety patterns don't.
 
-If there are files that, when also changed, mean the policy shouldn't fire:
+**Use for**: General checks like "source changed, verify README"
 
-**Examples:**
-- Policy: "Update install guide when config changes"
-  - Trigger: `app/config/**/*`
-  - Safety: `docs/install_guide.md` (if already updated, don't prompt)
+```yaml
+trigger: "app/config/**/*"
+safety: "docs/install_guide.md"
+```
 
-- Policy: "Security review for auth changes"
-  - Trigger: `src/auth/**/*`
-  - Safety: `SECURITY.md`, `docs/security_review.md`
+### Set (Bidirectional Correspondence)
+Fire when files matching one pattern change but corresponding files don't.
 
-### Step 3b: Choose the Comparison Mode (Optional)
+**Use for**: Source/test pairing, i18n files, paired documentation
 
-The `compare_to` field controls what baseline is used when detecting "changed files":
+```yaml
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+```
 
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). This is the most common choice for feature branches, as it shows all changes made on the branch.
-- `default_tip` - Compares to the current tip of the default branch (main/master). Useful when you want to see the difference from what's currently in production.
-- `prompt` - Compares to the state at the start of each prompt. Useful for policies that should only fire based on changes made during a single agent response.
+If `src/utils/helper.py` changes, expects `tests/utils/helper_test.py` to also change.
+
+### Pair (Directional Correspondence)
+Fire when trigger files change but expected files don't. Changes to expected files alone don't trigger.
+
+**Use for**: API code requires docs (but docs changes don't require API changes)
+
+```yaml
+pair:
+  trigger: src/api/{name}.py
+  expects: docs/api/{name}.md
+```
 
-**When to use each:**
-- **base**: Best for most policies. "Did this branch change config files?" → trigger docs review
-- **default_tip**: For policies about what's different from production/main
-- **prompt**: For policies that should only consider very recent changes within the current session
+### Variable Pattern Syntax
 
-Most policies should use the default (`base`) and don't need to specify `compare_to`.
+- `{path}` - Matches multiple path segments (e.g., `foo/bar/baz`)
+- `{name}` - Matches a single segment (e.g., `helper`)
 
-### Step 4: Write the Instructions
+---
 
-Create clear, actionable instructions for what the agent should do when the policy fires.
+## Step 3: Choose Action Type
 
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
+### Prompt (Default)
+Show instructions to the agent. The markdown body becomes the instructions.
 
-**Example:**
+```markdown
+---
+name: Security Review
+trigger: "src/auth/**/*"
+---
+Please review for hardcoded credentials and validate input handling.
 ```
-Configuration files have changed. Please:
-1. Review docs/install_guide.md for accuracy
-2. Update any installation steps that reference changed config
-3. Verify environment variable documentation is current
-4. Test that installation instructions still work
+
+### Command
+Run an idempotent command automatically. No markdown body needed.
+
+```markdown
+---
+name: Format Python
+trigger: "**/*.py"
+action:
+  command: "ruff format {file}"
+  run_for: each_match
+---
 ```
 
-### Step 5: Create the Policy Entry
+**Command variables**:
+- `{file}` - Current file being processed
+- `{files}` - Space-separated list of all matching files
+- `{repo_root}` - Repository root path
 
-Create or update `.deepwork.policy.yml` in the project root.
+**run_for options**:
+- `each_match` - Run command once per matching file
+- `all_matches` - Run command once with all files
 
-**File Location**: `.deepwork.policy.yml` (root of project)
+---
 
-**Format**:
-```yaml
-- name: "[Friendly name for the policy]"
-  trigger: "[glob pattern]"  # or array: ["pattern1", "pattern2"]
-  safety: "[glob pattern]"   # optional, or array
-  compare_to: "base"         # optional: "base" (default), "default_tip", or "prompt"
-  instructions: |
-    [Multi-line instructions for the agent...]
-```
+## Step 4: Define Optional Settings
 
-**Alternative with instructions_file**:
-```yaml
-- name: "[Friendly name for the policy]"
-  trigger: "[glob pattern]"
-  safety: "[glob pattern]"
-  compare_to: "base"         # optional
-  instructions_file: "path/to/instructions.md"
-```
+### compare_to (Optional)
+Controls what baseline is used for detecting changed files:
 
-### Step 6: Verify the Policy
+- `base` (default) - Changes since branch diverged from main/master
+- `default_tip` - Changes compared to current main/master tip
+- `prompt` - Changes since the last prompt submission
 
-After creating the policy:
+Most policies should use the default (`base`).
 
-1. **Check the YAML syntax** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the policy doesn't conflict with existing ones
+---
 
-## Example Policies
+## Step 5: Create the Policy File
 
-### Update Documentation on Config Changes
-```yaml
-- name: "Update install guide on config changes"
-  trigger: "app/config/**/*"
-  safety: "docs/install_guide.md"
-  instructions: |
-    Configuration files have been modified. Please review docs/install_guide.md
-    and update it if any installation instructions need to change based on the
-    new configuration.
+### File Location
+Create: `.deepwork/policies/[policy-name].md`
+
+Use kebab-case for filename (e.g., `source-test-pairing.md`, `format-python.md`)
+
+### Examples
+
+**Trigger/Safety with Prompt:**
+```markdown
+---
+name: Update Install Guide
+trigger: "app/config/**/*"
+safety: "docs/install_guide.md"
+---
+Configuration files have changed. Please review docs/install_guide.md
+and update installation instructions if needed.
 ```
 
-### Security Review for Auth Code
-```yaml
-- name: "Security review for authentication changes"
-  trigger:
-    - "src/auth/**/*"
-    - "src/security/**/*"
-  safety:
-    - "SECURITY.md"
-    - "docs/security_audit.md"
-  instructions: |
-    Authentication or security code has been changed. Please:
-    1. Review for hardcoded credentials or secrets
-    2. Check input validation on user inputs
-    3. Verify access control logic is correct
-    4. Update security documentation if needed
+**Set (Bidirectional) with Prompt:**
+```markdown
+---
+name: Source/Test Pairing
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+When source files change, corresponding test files should also change.
+Please create or update tests for the modified source files.
 ```
 
-### API Documentation Sync
-```yaml
-- name: "API documentation update"
-  trigger: "src/api/**/*.py"
-  safety: "docs/api/**/*.md"
-  instructions: |
-    API code has changed. Please verify that API documentation in docs/api/
-    is up to date with the code changes. Pay special attention to:
-    - New or changed endpoints
-    - Modified request/response schemas
-    - Updated authentication requirements
+**Pair (Directional) with Prompt:**
+```markdown
+---
+name: API Documentation
+pair:
+  trigger: src/api/{name}.py
+  expects: docs/api/{name}.md
+---
+API code has changed. Please update the corresponding documentation.
+```
+
+**Command Action:**
+```markdown
+---
+name: Format Python Files
+trigger: "**/*.py"
+action:
+  command: "ruff format {file}"
+  run_for: each_match
+---
 ```
 
+**Multiple Trigger Patterns:**
+```markdown
+---
+name: Security Review
+trigger:
+  - "src/auth/**/*"
+  - "src/security/**/*"
+safety:
+  - "SECURITY.md"
+  - "docs/security_audit.md"
+---
+Authentication or security code has been changed. Please review for:
+1. Hardcoded credentials or secrets
+2. Input validation issues
+3. Access control logic
+```
+
+---
+
+## Step 6: Verify the Policy
+
+After creating the policy:
+
+1. **Check YAML frontmatter syntax** - Ensure valid YAML
+2. **Verify detection mode is appropriate** - trigger/safety vs set vs pair
+3. **Test patterns match intended files** - Check glob/variable patterns
+4. **Review instructions/command** - Ensure they're actionable
+5. **Check for conflicts** - Ensure no overlap with existing policies
+
+---
+
+## Pattern Reference
+
+### Glob Patterns
+- `*` - Matches any characters within a single path segment
+- `**` - Matches across multiple path segments (recursive)
+- `?` - Matches a single character
+
+### Variable Patterns
+- `{path}` - Captures multiple segments: `src/{path}.py` matches `src/a/b/c.py` → path=`a/b/c`
+- `{name}` - Captures single segment: `src/{name}.py` matches `src/utils.py` → name=`utils`
+
+### Common Examples
+- `src/**/*.py` - All Python files in src (recursive)
+- `app/config/**/*` - All files in app/config
+- `*.md` - Markdown files in root only
+- `**/*.test.ts` - All test files anywhere
+- `src/{path}.ts` ↔ `tests/{path}.test.ts` - Source/test pairs
+
+---
+
 ## Output Format
 
-### .deepwork.policy.yml
-Create or update this file at the project root with the new policy entry.
+Create: `.deepwork/policies/[policy-name].md`
+
+---
 
 ## Quality Criteria
 
-- Asked structured questions to understand user requirements
+- Asked structured questions to understand requirements
+- Chose appropriate detection mode (trigger/safety, set, or pair)
+- Chose appropriate action type (prompt or command)
 - Policy name is clear and descriptive
-- Trigger patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering
-- Instructions are actionable and specific
-- YAML is valid and properly formatted
+- Patterns accurately match intended files
+- Instructions or command are actionable
+- YAML frontmatter is valid
+
+---
 
 ## Context
 
-Policies are evaluated automatically when you finish working on a task. The system:
-1. Determines which files have changed based on each policy's `compare_to` setting:
-   - `base` (default): Files changed since the branch diverged from main/master
-   - `default_tip`: Files different from the current main/master branch
-   - `prompt`: Files changed since the last prompt submission
-2. Checks if any changes match policy trigger patterns
-3. Skips policies where safety patterns also matched
-4. Prompts you with instructions for any triggered policies
+Policies are evaluated automatically when you finish working. The system:
+
+1. Loads policies from `.deepwork/policies/`
+2. Detects changed files based on `compare_to` setting
+3. Evaluates each policy based on its detection mode
+4. For **command** actions: Runs the command automatically
+5. For **prompt** actions: Shows instructions if policy fires
 
-You can mark a policy as addressed by including `<promise>✓ Policy Name</promise>` in your response (replace Policy Name with the actual policy name). This tells the system you've already handled that policy's requirements.
+Mark a policy as addressed by including `<promise>✓ Policy Name</promise>` in your response.
diff --git a/.gemini/commands/deepwork_policy/define.toml b/.gemini/commands/deepwork_policy/define.toml
index ca45a47..0195ff1 100644
--- a/.gemini/commands/deepwork_policy/define.toml
+++ b/.gemini/commands/deepwork_policy/define.toml
@@ -1,10 +1,10 @@
 # deepwork_policy:define
 #
-# Create or update policy entries in .deepwork.policy.yml
+# Create or update policies in .deepwork/policies/ (v2) or .deepwork.policy.yml (v1)
 #
 # Generated by DeepWork - do not edit manually
 
-description = "Create or update policy entries in .deepwork.policy.yml"
+description = "Create or update policies in .deepwork/policies/ (v2) or .deepwork.policy.yml (v1)"
 
 prompt = """
 # deepwork_policy:define
@@ -19,17 +19,22 @@ Manages policies that automatically trigger when certain files change during an
 Policies help ensure that code changes follow team guidelines, documentation is updated,
 and architectural decisions are respected.
 
-Policies are defined in a `.deepwork.policy.yml` file at the root of your project. Each policy
-specifies:
-- Trigger patterns: Glob patterns for files that, when changed, should trigger the policy
-- Safety patterns: Glob patterns for files that, if also changed, mean the policy doesn't need to fire
-- Instructions: What the agent should do when the policy triggers
+**Policy System v2 (Recommended)**
+Policies are defined as individual markdown files in `.deepwork/policies/` with YAML frontmatter.
+This format supports:
+- Detection modes: trigger/safety (default), set (bidirectional), pair (directional)
+- Action types: prompt (show instructions), command (run idempotent commands)
+- Variable pattern matching for file correspondence (e.g., `src/{path}.py` ↔ `tests/{path}_test.py`)
+
+**Legacy v1 Format**
+Still supported: `.deepwork.policy.yml` at project root with trigger/safety/instructions fields.
 
 Example use cases:
+- Enforce source/test pairing with set patterns
+- Run formatters automatically when files change
 - Update installation docs when configuration files change
 - Require security review when authentication code is modified
 - Ensure API documentation stays in sync with API code
-- Remind developers to update changelogs
 
 
 
@@ -39,200 +44,295 @@ Example use cases:
 
 ## Objective
 
-Create or update policy entries in the `.deepwork.policy.yml` file to enforce team guidelines, documentation requirements, or other constraints when specific files change.
+Create or update policies to enforce team guidelines, documentation requirements, file correspondences, or automated commands when specific files change.
 
 ## Task
 
 Guide the user through defining a new policy by asking structured questions. **Do not create the policy without first understanding what they want to enforce.**
 
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
+**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user.
 
-### Step 1: Understand the Policy Purpose
+## Policy System Overview
 
-Start by asking structured questions to understand what the user wants to enforce:
+DeepWork supports two policy formats:
 
-1. **What guideline or constraint should this policy enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this policy?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
+**v2 (Recommended)**: Individual markdown files in `.deepwork/policies/` with YAML frontmatter
+**v1 (Legacy)**: Single `.deepwork.policy.yml` file at project root
 
-2. **What action should be taken?**
-   - What should the agent do when the policy triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
+**Always prefer v2 format** for new policies. It supports more detection modes and action types.
 
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the policy doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
+---
 
-### Step 2: Define the Trigger Patterns
+## Step 1: Understand the Policy Purpose
 
-Help the user define glob patterns for files that should trigger the policy:
+Ask structured questions to understand what the user wants to enforce:
 
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
+1. **What should this policy enforce?**
+   - Documentation sync? Security review? File correspondence? Code formatting?
 
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
+2. **What files trigger this policy?**
+   - Which files/directories, when changed, should trigger action?
 
-### Step 3: Define Safety Patterns (Optional)
+3. **What should happen when the policy fires?**
+   - Show instructions to the agent? Run a command automatically?
 
-If there are files that, when also changed, mean the policy shouldn't fire:
+---
 
-**Examples:**
-- Policy: "Update install guide when config changes"
-  - Trigger: `app/config/**/*`
-  - Safety: `docs/install_guide.md` (if already updated, don't prompt)
+## Step 2: Choose Detection Mode
 
-- Policy: "Security review for auth changes"
-  - Trigger: `src/auth/**/*`
-  - Safety: `SECURITY.md`, `docs/security_review.md`
+Policies support three detection modes:
 
-### Step 3b: Choose the Comparison Mode (Optional)
+### Trigger/Safety (Default)
+Fire when trigger patterns match AND safety patterns don't.
 
-The `compare_to` field controls what baseline is used when detecting "changed files":
+**Use for**: General checks like "source changed, verify README"
 
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). This is the most common choice for feature branches, as it shows all changes made on the branch.
-- `default_tip` - Compares to the current tip of the default branch (main/master). Useful when you want to see the difference from what's currently in production.
-- `prompt` - Compares to the state at the start of each prompt. Useful for policies that should only fire based on changes made during a single agent response.
+```yaml
+trigger: "app/config/**/*"
+safety: "docs/install_guide.md"
+```
 
-**When to use each:**
-- **base**: Best for most policies. "Did this branch change config files?" → trigger docs review
-- **default_tip**: For policies about what's different from production/main
-- **prompt**: For policies that should only consider very recent changes within the current session
+### Set (Bidirectional Correspondence)
+Fire when files matching one pattern change but corresponding files don't.
 
-Most policies should use the default (`base`) and don't need to specify `compare_to`.
+**Use for**: Source/test pairing, i18n files, paired documentation
 
-### Step 4: Write the Instructions
+```yaml
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+```
 
-Create clear, actionable instructions for what the agent should do when the policy fires.
+If `src/utils/helper.py` changes, expects `tests/utils/helper_test.py` to also change.
 
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
+### Pair (Directional Correspondence)
+Fire when trigger files change but expected files don't. Changes to expected files alone don't trigger.
 
-**Example:**
+**Use for**: API code requires docs (but docs changes don't require API changes)
+
+```yaml
+pair:
+  trigger: src/api/{name}.py
+  expects: docs/api/{name}.md
+```
+
+### Variable Pattern Syntax
+
+- `{path}` - Matches multiple path segments (e.g., `foo/bar/baz`)
+- `{name}` - Matches a single segment (e.g., `helper`)
+
+---
+
+## Step 3: Choose Action Type
+
+### Prompt (Default)
+Show instructions to the agent. The markdown body becomes the instructions.
+
+```markdown
+---
+name: Security Review
+trigger: "src/auth/**/*"
+---
+Please review for hardcoded credentials and validate input handling.
 ```
-Configuration files have changed. Please:
-1. Review docs/install_guide.md for accuracy
-2. Update any installation steps that reference changed config
-3. Verify environment variable documentation is current
-4. Test that installation instructions still work
+
+### Command
+Run an idempotent command automatically. No markdown body needed.
+
+```markdown
+---
+name: Format Python
+trigger: "**/*.py"
+action:
+  command: "ruff format {file}"
+  run_for: each_match
+---
 ```
 
-### Step 5: Create the Policy Entry
+**Command variables**:
+- `{file}` - Current file being processed
+- `{files}` - Space-separated list of all matching files
+- `{repo_root}` - Repository root path
 
-Create or update `.deepwork.policy.yml` in the project root.
+**run_for options**:
+- `each_match` - Run command once per matching file
+- `all_matches` - Run command once with all files
 
-**File Location**: `.deepwork.policy.yml` (root of project)
+---
 
-**Format**:
-```yaml
-- name: "[Friendly name for the policy]"
-  trigger: "[glob pattern]"  # or array: ["pattern1", "pattern2"]
-  safety: "[glob pattern]"   # optional, or array
-  compare_to: "base"         # optional: "base" (default), "default_tip", or "prompt"
-  instructions: |
-    [Multi-line instructions for the agent...]
+## Step 4: Define Optional Settings
+
+### compare_to (Optional)
+Controls what baseline is used for detecting changed files:
+
+- `base` (default) - Changes since branch diverged from main/master
+- `default_tip` - Changes compared to current main/master tip
+- `prompt` - Changes since the last prompt submission
+
+Most policies should use the default (`base`).
+
+---
+
+## Step 5: Create the Policy File (v2 Format)
+
+### File Location
+Create: `.deepwork/policies/[policy-name].md`
+
+Use kebab-case for filename (e.g., `source-test-pairing.md`, `format-python.md`)
+
+### v2 Format Examples
+
+**Trigger/Safety with Prompt:**
+```markdown
+---
+name: Update Install Guide
+trigger: "app/config/**/*"
+safety: "docs/install_guide.md"
+---
+Configuration files have changed. Please review docs/install_guide.md
+and update installation instructions if needed.
 ```
 
-**Alternative with instructions_file**:
-```yaml
-- name: "[Friendly name for the policy]"
-  trigger: "[glob pattern]"
-  safety: "[glob pattern]"
-  compare_to: "base"         # optional
-  instructions_file: "path/to/instructions.md"
+**Set (Bidirectional) with Prompt:**
+```markdown
+---
+name: Source/Test Pairing
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+When source files change, corresponding test files should also change.
+Please create or update tests for the modified source files.
 ```
 
-### Step 6: Verify the Policy
+**Pair (Directional) with Prompt:**
+```markdown
+---
+name: API Documentation
+pair:
+  trigger: src/api/{name}.py
+  expects: docs/api/{name}.md
+---
+API code has changed. Please update the corresponding documentation.
+```
 
-After creating the policy:
+**Command Action:**
+```markdown
+---
+name: Format Python Files
+trigger: "**/*.py"
+action:
+  command: "ruff format {file}"
+  run_for: each_match
+---
+```
 
-1. **Check the YAML syntax** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the policy doesn't conflict with existing ones
+**Multiple Trigger Patterns:**
+```markdown
+---
+name: Security Review
+trigger:
+  - "src/auth/**/*"
+  - "src/security/**/*"
+safety:
+  - "SECURITY.md"
+  - "docs/security_audit.md"
+---
+Authentication or security code has been changed. Please review for:
+1. Hardcoded credentials or secrets
+2. Input validation issues
+3. Access control logic
+```
+
+---
+
+## Step 6: Legacy v1 Format (If Needed)
+
+Only use v1 format when adding to an existing `.deepwork.policy.yml` file.
 
-## Example Policies
+**File Location**: `.deepwork.policy.yml` (project root)
 
-### Update Documentation on Config Changes
 ```yaml
 - name: "Update install guide on config changes"
   trigger: "app/config/**/*"
   safety: "docs/install_guide.md"
+  compare_to: "base"
   instructions: |
-    Configuration files have been modified. Please review docs/install_guide.md
-    and update it if any installation instructions need to change based on the
-    new configuration.
+    Configuration files have changed. Please review docs/install_guide.md.
 ```
 
-### Security Review for Auth Code
+**Alternative with instructions_file:**
 ```yaml
-- name: "Security review for authentication changes"
-  trigger:
-    - "src/auth/**/*"
-    - "src/security/**/*"
-  safety:
-    - "SECURITY.md"
-    - "docs/security_audit.md"
-  instructions: |
-    Authentication or security code has been changed. Please:
-    1. Review for hardcoded credentials or secrets
-    2. Check input validation on user inputs
-    3. Verify access control logic is correct
-    4. Update security documentation if needed
+- name: "Security review"
+  trigger: "src/auth/**/*"
+  instructions_file: "path/to/instructions.md"
 ```
 
-### API Documentation Sync
-```yaml
-- name: "API documentation update"
-  trigger: "src/api/**/*.py"
-  safety: "docs/api/**/*.md"
-  instructions: |
-    API code has changed. Please verify that API documentation in docs/api/
-    is up to date with the code changes. Pay special attention to:
-    - New or changed endpoints
-    - Modified request/response schemas
-    - Updated authentication requirements
-```
+---
+
+## Step 7: Verify the Policy
+
+After creating the policy:
+
+1. **Check YAML frontmatter syntax** - Ensure valid YAML
+2. **Verify detection mode is appropriate** - trigger/safety vs set vs pair
+3. **Test patterns match intended files** - Check glob/variable patterns
+4. **Review instructions/command** - Ensure they're actionable
+5. **Check for conflicts** - Ensure no overlap with existing policies
+
+---
+
+## Pattern Reference
+
+### Glob Patterns
+- `*` - Matches any characters within a single path segment
+- `**` - Matches across multiple path segments (recursive)
+- `?` - Matches a single character
+
+### Variable Patterns (v2 only)
+- `{path}` - Captures multiple segments: `src/{path}.py` matches `src/a/b/c.py` → path=`a/b/c`
+- `{name}` - Captures single segment: `src/{name}.py` matches `src/utils.py` → name=`utils`
+
+### Common Examples
+- `src/**/*.py` - All Python files in src (recursive)
+- `app/config/**/*` - All files in app/config
+- `*.md` - Markdown files in root only
+- `**/*.test.ts` - All test files anywhere
+- `src/{path}.ts` ↔ `tests/{path}.test.ts` - Source/test pairs
+
+---
 
 ## Output Format
 
-### .deepwork.policy.yml
-Create or update this file at the project root with the new policy entry.
+Create one of:
+- `.deepwork/policies/[policy-name].md` (v2 format, recommended)
+- Entry in `.deepwork.policy.yml` (v1 format, legacy)
+
+---
 
 ## Quality Criteria
 
-- Asked structured questions to understand user requirements
+- Asked structured questions to understand requirements
+- Chose appropriate detection mode (trigger/safety, set, or pair)
+- Chose appropriate action type (prompt or command)
 - Policy name is clear and descriptive
-- Trigger patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering
-- Instructions are actionable and specific
-- YAML is valid and properly formatted
+- Patterns accurately match intended files
+- Instructions or command are actionable
+- YAML frontmatter is valid
+
+---
 
 ## Context
 
-Policies are evaluated automatically when you finish working on a task. The system:
-1. Determines which files have changed based on each policy's `compare_to` setting:
-   - `base` (default): Files changed since the branch diverged from main/master
-   - `default_tip`: Files different from the current main/master branch
-   - `prompt`: Files changed since the last prompt submission
-2. Checks if any changes match policy trigger patterns
-3. Skips policies where safety patterns also matched
-4. Prompts you with instructions for any triggered policies
+Policies are evaluated automatically when you finish working. The system:
+
+1. Loads policies from `.deepwork/policies/` (v2) and `.deepwork.policy.yml` (v1)
+2. Detects changed files based on `compare_to` setting
+3. Evaluates each policy based on its detection mode
+4. For **command** actions: Runs the command automatically
+5. For **prompt** actions: Shows instructions if policy fires
 
-You can mark a policy as addressed by including `<promise>✓ Policy Name</promise>` in your response (replace Policy Name with the actual policy name). This tells the system you've already handled that policy's requirements.
+Mark a policy as addressed by including `<promise>✓ Policy Name</promise>` in your response.
 
 
 ## Inputs
@@ -260,6 +360,7 @@ All work for this job should be done on a dedicated work branch:
 ## Output Requirements
 
 Create the following output(s):
+- `.deepwork/policies/*.md`
 - `.deepwork.policy.yml`
 
 Ensure all outputs are:
@@ -274,7 +375,7 @@ After completing this step:
 
 2. **Inform the user**:
    - The define command is complete
-   - Outputs created: .deepwork.policy.yml
+   - Outputs created: .deepwork/policies/*.md, .deepwork.policy.yml
    - This command can be run again anytime to make further changes
 
 ## Command Complete
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d099153..08d8e29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,9 +20,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Updated `policy_check.py` hook to use v2 system with queue-based deduplication
 
 ### Changed
-- Policy parser now supports both v1 (`.deepwork.policy.yml`) and v2 (`.deepwork/policies/*.md`) formats
 - Documentation updated with v2 policy examples and configuration
 
+### Removed
+- v1 policy format (`.deepwork.policy.yml`) - now only v2 frontmatter markdown format is supported
+
 ## [0.3.0] - 2026-01-16
 
 ### Added
diff --git a/README.md b/README.md
index 218a30f..6005b14 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,6 @@ This will:
 - Generate core DeepWork jobs
 - Install DeepWork jobs for your AI assistant
 - Configure hooks for your AI assistant to enable policies
-- Create a `.deepwork.policy.yml` template file with example policies
 
 ## Quick Start
 
@@ -213,13 +212,13 @@ deepwork/
 │   │   ├── parser.py     # Job definition parsing
 │   │   ├── detector.py   # Platform detection
 │   │   ├── generator.py  # Skill file generation
-│   │   ├── policy_parser.py    # Policy parsing (v1 and v2)
+│   │   ├── policy_parser.py    # Policy parsing
 │   │   ├── pattern_matcher.py  # Variable pattern matching
 │   │   ├── policy_queue.py     # Policy state queue
 │   │   └── command_executor.py # Command action execution
 │   ├── hooks/            # Cross-platform hook wrappers
 │   │   ├── wrapper.py    # Input/output normalization
-│   │   ├── policy_check.py   # Policy evaluation hook (v2)
+│   │   ├── policy_check.py   # Policy evaluation hook
 │   │   ├── claude_hook.sh    # Claude Code adapter
 │   │   └── gemini_hook.sh    # Gemini CLI adapter
 │   ├── templates/        # Jinja2 templates
diff --git a/doc/architecture.md b/doc/architecture.md
index a17a1c5..806084d 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -46,7 +46,7 @@ deepwork/                       # DeepWork tool repository
 │       │   ├── detector.py     # AI platform detection
 │       │   ├── generator.py    # Command file generation
 │       │   ├── parser.py       # Job definition parsing
-│       │   ├── policy_parser.py    # Policy definition parsing (v1 and v2)
+│       │   ├── policy_parser.py    # Policy definition parsing
 │       │   ├── pattern_matcher.py  # Variable pattern matching for policies
 │       │   ├── policy_queue.py     # Policy state queue system
 │       │   ├── command_executor.py # Command action execution
@@ -56,8 +56,7 @@ deepwork/                       # DeepWork tool repository
 │       │   ├── wrapper.py           # Cross-platform input/output normalization
 │       │   ├── claude_hook.sh       # Shell wrapper for Claude Code
 │       │   ├── gemini_hook.sh       # Shell wrapper for Gemini CLI
-│       │   ├── policy_check.py      # Cross-platform policy evaluation hook
-│       │   └── evaluate_policies.py # Legacy policy evaluation CLI
+│       │   └── policy_check.py      # Cross-platform policy evaluation hook
 │       ├── templates/          # Command templates for each platform
 │       │   ├── claude/
 │       │   │   └── command-job-step.md.jinja
@@ -314,7 +313,6 @@ my-project/                     # User's project (target)
 │       │   └── steps/
 │       └── ad_campaign/
 │           └── ...
-├── .deepwork.policy.yml        # Legacy policy definitions (v1 format)
 ├── (rest of user's project files)
 └── README.md
 ```
@@ -1142,18 +1140,6 @@ The hooks are installed to `.claude/settings.json` during `deepwork sync`:
 }
 ```
 
-### Legacy v1 Format
-
-The v1 format (`.deepwork.policy.yml`) is still supported for backward compatibility:
-
-```yaml
-- name: "Update install guide"
-  trigger: "app/config/**/*"
-  safety: "docs/install_guide.md"
-  instructions: |
-    Configuration files have been modified. Please review docs/install_guide.md.
-```
-
 ### Cross-Platform Hook Wrapper System
 
 The `hooks/` module provides a wrapper system that allows writing hooks once in Python and running them on multiple platforms. This normalizes the differences between Claude Code and Gemini CLI hook systems.
diff --git a/src/deepwork/core/policy_parser.py b/src/deepwork/core/policy_parser.py
index f1c5a28..0637209 100644
--- a/src/deepwork/core/policy_parser.py
+++ b/src/deepwork/core/policy_parser.py
@@ -13,7 +13,7 @@
     matches_any_pattern,
     resolve_pattern,
 )
-from deepwork.schemas.policy_schema import POLICY_FRONTMATTER_SCHEMA, POLICY_SCHEMA
+from deepwork.schemas.policy_schema import POLICY_FRONTMATTER_SCHEMA
 from deepwork.utils.validation import ValidationError, validate_against_schema
 
 
@@ -523,105 +523,3 @@ def evaluate_policies(
             results.append(result)
 
     return results
-
-
-# =============================================================================
-# Legacy v1 Support (for migration)
-# =============================================================================
-
-
-@dataclass
-class PolicyV1:
-    """Legacy v1 policy format (from .deepwork.policy.yml)."""
-
-    name: str
-    triggers: list[str]
-    safety: list[str] = field(default_factory=list)
-    instructions: str = ""
-    compare_to: str = DEFAULT_COMPARE_TO
-
-    @classmethod
-    def from_dict(cls, data: dict[str, Any], base_dir: Path | None = None) -> "PolicyV1":
-        """Create PolicyV1 from dictionary (legacy format)."""
-        trigger = data["trigger"]
-        triggers = [trigger] if isinstance(trigger, str) else list(trigger)
-
-        safety_data = data.get("safety", [])
-        safety = [safety_data] if isinstance(safety_data, str) else list(safety_data)
-
-        if "instructions" in data:
-            instructions = data["instructions"]
-        elif "instructions_file" in data:
-            if base_dir is None:
-                raise PolicyParseError(
-                    f"Policy '{data['name']}' uses instructions_file but no base_dir provided"
-                )
-            instructions_path = base_dir / data["instructions_file"]
-            if not instructions_path.exists():
-                raise PolicyParseError(
-                    f"Policy '{data['name']}' instructions file not found: {instructions_path}"
-                )
-            instructions = instructions_path.read_text()
-        else:
-            raise PolicyParseError(
-                f"Policy '{data['name']}' must have 'instructions' or 'instructions_file'"
-            )
-
-        return cls(
-            name=data["name"],
-            triggers=triggers,
-            safety=safety,
-            instructions=instructions,
-            compare_to=data.get("compare_to", DEFAULT_COMPARE_TO),
-        )
-
-
-def parse_policy_file(policy_path: Path | str, base_dir: Path | None = None) -> list[PolicyV1]:
-    """
-    Parse policy definitions from a YAML file (legacy v1 format).
-
-    Args:
-        policy_path: Path to .deepwork.policy.yml file
-        base_dir: Base directory for resolving instructions_file paths
-
-    Returns:
-        List of parsed PolicyV1 objects
-    """
-    policy_path = Path(policy_path)
-
-    if not policy_path.exists():
-        raise PolicyParseError(f"Policy file does not exist: {policy_path}")
-
-    if not policy_path.is_file():
-        raise PolicyParseError(f"Policy path is not a file: {policy_path}")
-
-    if base_dir is None:
-        base_dir = policy_path.parent
-
-    try:
-        with open(policy_path, encoding="utf-8") as f:
-            policy_data = yaml.safe_load(f)
-    except yaml.YAMLError as e:
-        raise PolicyParseError(f"Failed to parse policy YAML: {e}") from e
-    except OSError as e:
-        raise PolicyParseError(f"Failed to read policy file: {e}") from e
-
-    if policy_data is None:
-        return []
-
-    if not isinstance(policy_data, list):
-        raise PolicyParseError(
-            f"Policy file must contain a list of policies, got {type(policy_data).__name__}"
-        )
-
-    try:
-        validate_against_schema(policy_data, POLICY_SCHEMA)
-    except ValidationError as e:
-        raise PolicyParseError(f"Policy definition validation failed: {e}") from e
-
-    policies = []
-    for policy_item in policy_data:
-        policy = PolicyV1.from_dict(policy_item, base_dir)
-        policies.append(policy)
-
-    return policies
diff --git a/src/deepwork/hooks/README.md b/src/deepwork/hooks/README.md
index 7cf5155..84914a1 100644
--- a/src/deepwork/hooks/README.md
+++ b/src/deepwork/hooks/README.md
@@ -17,7 +17,6 @@ The hook system provides:
 
 3. **Hook implementations**:
    - `policy_check.py` - Evaluates DeepWork policies on `after_agent` events
-   - `evaluate_policies.py` - Legacy Claude-specific policy evaluation
 
 ## Usage
 
@@ -180,4 +179,3 @@ pytest tests/shell_script_tests/test_hook_wrappers.py -v
 | `claude_hook.sh` | Shell wrapper for Claude Code |
 | `gemini_hook.sh` | Shell wrapper for Gemini CLI |
 | `policy_check.py` | Cross-platform policy evaluation hook |
-| `evaluate_policies.py` | Legacy Claude-specific policy evaluation |
diff --git a/src/deepwork/hooks/evaluate_policies.py b/src/deepwork/hooks/evaluate_policies.py
deleted file mode 100644
index 3a2b05d..0000000
--- a/src/deepwork/hooks/evaluate_policies.py
+++ /dev/null
@@ -1,410 +0,0 @@
-"""
-Policy evaluation module for DeepWork hooks.
-
-This module is called by the policy_stop_hook.sh script to evaluate which policies
-should fire based on changed files and conversation context.
-
-Usage:
-    python -m deepwork.hooks.evaluate_policies \
-        --policy-file .deepwork.policy.yml
-
-The conversation context is read from stdin and checked for <promise> tags
-that indicate policies have already been addressed.
-
-Changed files are computed based on each policy's compare_to setting:
-- base: Compare to merge-base with default branch (default)
-- default_tip: Two-dot diff against default branch tip
-- prompt: Compare to state captured at prompt submission
-
-Output is JSON suitable for Claude Code Stop hooks:
-    {"decision": "block", "reason": "..."}  # Block stop, policies need attention
-    {}  # No policies fired, allow stop
-"""
-
-import argparse
-import json
-import re
-import subprocess
-import sys
-from pathlib import Path
-
-from deepwork.core.pattern_matcher import matches_any_pattern
-from deepwork.core.policy_parser import (
-    PolicyParseError,
-    PolicyV1,
-    parse_policy_file,
-)
-
-
-def evaluate_policy_v1(policy: PolicyV1, changed_files: list[str]) -> bool:
-    """
-    Evaluate whether a v1 policy should fire based on changed files.
-
-    A policy fires when:
-    - At least one changed file matches a trigger pattern
-    - AND no changed file matches a safety pattern
-
-    Args:
-        policy: PolicyV1 to evaluate
-        changed_files: List of changed file paths
-
-    Returns:
-        True if policy should fire, False otherwise
-    """
-    # Check if any trigger matches
-    trigger_matched = False
-    for file_path in changed_files:
-        if matches_any_pattern(file_path, policy.triggers):
-            trigger_matched = True
-            break
-
-    if not trigger_matched:
-        return False
-
-    # Check if any safety pattern matches
-    if policy.safety:
-        for file_path in changed_files:
-            if matches_any_pattern(file_path, policy.safety):
-                return False
-
-    return True
-
-
-def get_default_branch() -> str:
-    """
-    Get the default branch name (main or master).
-
-    Returns:
-        Default branch name, or "main" if cannot be determined.
-    """
-    # Try to get the default branch from remote HEAD
-    try:
-        result = subprocess.run(
-            ["git", "symbolic-ref", "refs/remotes/origin/HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        # Output is like "refs/remotes/origin/main"
-        return result.stdout.strip().split("/")[-1]
-    except subprocess.CalledProcessError:
-        pass
-
-    # Try common default branch names
-    for branch in ["main", "master"]:
-        try:
-            subprocess.run(
-                ["git", "rev-parse", "--verify", f"origin/{branch}"],
-                capture_output=True,
-                check=True,
-            )
-            return branch
-        except subprocess.CalledProcessError:
-            continue
-
-    # Fall back to main
-    return "main"
-
-
-def get_changed_files_base() -> list[str]:
-    """
-    Get files changed relative to the base of the current branch.
-
-    This finds the merge-base between the current branch and the default branch,
-    then returns all files changed since that point.
-
-    Returns:
-        List of changed file paths.
-    """
-    default_branch = get_default_branch()
-
-    try:
-        # Get the merge-base (where current branch diverged from default)
-        result = subprocess.run(
-            ["git", "merge-base", "HEAD", f"origin/{default_branch}"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        merge_base = result.stdout.strip()
-
-        # Stage all changes so they appear in diff
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        # Get files changed since merge-base (including staged)
-        result = subprocess.run(
-            ["git", "diff", "--name-only", merge_base, "HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Also get staged changes not yet committed
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Get untracked files
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_files = committed_files | staged_files | untracked_files
-        return sorted([f for f in all_files if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_changed_files_default_tip() -> list[str]:
-    """
-    Get files changed compared to the tip of the default branch.
-
-    This does a two-dot diff: what's different between HEAD and origin/default.
-
-    Returns:
-        List of changed file paths.
-    """
-    default_branch = get_default_branch()
-
-    try:
-        # Stage all changes so they appear in diff
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        # Two-dot diff against default branch tip
-        result = subprocess.run(
-            ["git", "diff", "--name-only", f"origin/{default_branch}..HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Also get staged changes not yet committed
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Get untracked files
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_files = committed_files | staged_files | untracked_files
-        return sorted([f for f in all_files if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_changed_files_prompt() -> list[str]:
-    """
-    Get files changed since the prompt was submitted.
-
-    This compares against the baseline captured by capture_prompt_work_tree.sh.
-
-    Returns:
-        List of changed file paths.
-    """
-    baseline_path = Path(".deepwork/.last_work_tree")
-
-    try:
-        # Stage all changes so we can see them with --cached
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        # Get all staged files (includes what was just staged)
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        current_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-        current_files = {f for f in current_files if f}
-
-        if baseline_path.exists():
-            # Read baseline and find new files
-            baseline_files = set(baseline_path.read_text().strip().split("\n"))
-            baseline_files = {f for f in baseline_files if f}
-            # Return files that are in current but not in baseline
-            new_files = current_files - baseline_files
-            return sorted(new_files)
-        else:
-            # No baseline, return all current changes
-            return sorted(current_files)
-
-    except (subprocess.CalledProcessError, OSError):
-        return []
-
-
-def get_changed_files_for_mode(mode: str) -> list[str]:
-    """
-    Get changed files for a specific compare_to mode.
-
-    Args:
-        mode: One of 'base', 'default_tip', or 'prompt'
-
-    Returns:
-        List of changed file paths.
-    """
-    if mode == "base":
-        return get_changed_files_base()
-    elif mode == "default_tip":
-        return get_changed_files_default_tip()
-    elif mode == "prompt":
-        return get_changed_files_prompt()
-    else:
-        # Unknown mode, fall back to base
-        return get_changed_files_base()
-
-
-def extract_promise_tags(text: str) -> set[str]:
-    """
-    Extract policy names from <promise> tags in text.
-
-    Supported format:
-    - <promise>✓ Policy Name</promise>
-
-    Args:
-        text: Text to search for promise tags
-
-    Returns:
-        Set of policy names that have been promised/addressed
-    """
-    # Match <promise>✓ Policy Name</promise> and extract the policy name
-    pattern = r"<promise>✓\s*([^<]+)</promise>"
-    matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
-    return {m.strip() for m in matches}
-
-
-def format_policy_message(policies: list) -> str:
-    """
-    Format triggered policies into a message for the agent.
-
-    Args:
-        policies: List of Policy objects that fired
-
-    Returns:
-        Formatted message with all policy instructions
-    """
-    lines = ["## DeepWork Policies Triggered", ""]
-    lines.append(
-        "Comply with the following policies. "
-        "To mark a policy as addressed, include `<promise>✓ Policy Name</promise>` "
-        "in your response (replace Policy Name with the actual policy name)."
-    )
-    lines.append("")
-
-    for policy in policies:
-        lines.append(f"### Policy: {policy.name}")
-        lines.append("")
-        lines.append(policy.instructions.strip())
-        lines.append("")
-
-    return "\n".join(lines)
-
-
-def main() -> None:
-    """Main entry point for policy evaluation CLI."""
-    parser = argparse.ArgumentParser(
-        description="Evaluate DeepWork policies based on changed files"
-    )
-    parser.add_argument(
-        "--policy-file",
-        type=str,
-        required=True,
-        help="Path to .deepwork.policy.yml file",
-    )
-
-    args = parser.parse_args()
-
-    # Check if policy file exists
-    policy_path = Path(args.policy_file)
-    if not policy_path.exists():
-        # No policy file, nothing to evaluate
-        print("{}")
-        return
-
-    # Read conversation context from stdin (if available)
-    conversation_context = ""
-    if not sys.stdin.isatty():
-        try:
-            conversation_context = sys.stdin.read()
-        except Exception:
-            pass
-
-    # Extract promise tags from conversation
-    promised_policies = extract_promise_tags(conversation_context)
-
-    # Parse policies
-    try:
-        policies = parse_policy_file(policy_path)
-    except PolicyParseError as e:
-        # Log error to stderr, return empty result
-        print(f"Error parsing policy file: {e}", file=sys.stderr)
-        print("{}")
-        return
-
-    if not policies:
-        # No policies defined
-        print("{}")
-        return
-
-    # Group policies by compare_to mode to minimize git calls
-    policies_by_mode: dict[str, list[PolicyV1]] = {}
-    for policy in policies:
-        mode = policy.compare_to
-        if mode not in policies_by_mode:
-            policies_by_mode[mode] = []
-        policies_by_mode[mode].append(policy)
-
-    # Get changed files for each mode and evaluate policies
-    fired_policies: list[PolicyV1] = []
-    for mode, mode_policies in policies_by_mode.items():
-        changed_files = get_changed_files_for_mode(mode)
-        if not changed_files:
-            continue
-
-        for policy in mode_policies:
-            # Skip if already promised
-            if policy.name in promised_policies:
-                continue
-            # Evaluate this policy
-            if evaluate_policy_v1(policy, changed_files):
-                fired_policies.append(policy)
-
-    if not fired_policies:
-        # No policies fired
-        print("{}")
-        return
-
-    # Format output for Claude Code Stop hooks
-    # Use "decision": "block" to prevent Claude from stopping
-    message = format_policy_message(fired_policies)
-    result = {
-        "decision": "block",
-        "reason": message,
-    }
-
-    print(json.dumps(result))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/deepwork/schemas/policy_schema.py b/src/deepwork/schemas/policy_schema.py
index 690cb64..51e3581 100644
--- a/src/deepwork/schemas/policy_schema.py
+++ b/src/deepwork/schemas/policy_schema.py
@@ -101,82 +101,3 @@
         },
     ],
 }
-
-
-# Legacy schema for .deepwork.policy.yml (v1 format)
-# Kept for reference but not used in v2
-POLICY_SCHEMA_V1: dict[str, Any] = {
-    "$schema": "http://json-schema.org/draft-07/schema#",
-    "type": "array",
-    "description": "List of policies that trigger based on file changes",
-    "items": {
-        "type": "object",
-        "required": ["name", "trigger"],
-        "properties": {
-            "name": {
-                "type": "string",
-                "minLength": 1,
-                "description": "Friendly name for the policy",
-            },
-            "trigger": {
-                "oneOf": [
-                    {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Glob pattern for files that trigger this policy",
-                    },
-                    {
-                        "type": "array",
-                        "items": {"type": "string", "minLength": 1},
-                        "minItems": 1,
-                        "description": "List of glob patterns for files that trigger this policy",
-                    },
-                ],
-                "description": "Glob pattern(s) for files that, if changed, should trigger this policy",
-            },
-            "safety": {
-                "oneOf": [
-                    {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Glob pattern for safety files",
-                    },
-                    {
-                        "type": "array",
-                        "items": {"type": "string", "minLength": 1},
-                        "description": "List of glob patterns for safety files",
-                    },
-                ],
-                "description": "Glob pattern(s) for files that, if also changed, mean the policy doesn't need to trigger",
-            },
-            "instructions": {
-                "type": "string",
-                "minLength": 1,
-                "description": "Instructions to give the agent when this policy triggers",
-            },
-            "instructions_file": {
-                "type": "string",
-                "minLength": 1,
-                "description": "Path to a file containing instructions (alternative to inline instructions)",
-            },
-            "compare_to": {
-                "type": "string",
-                "enum": ["base", "default_tip", "prompt"],
-                "description": (
-                    "What to compare against when detecting changed files. "
-                    "'base' (default) compares to the base of the current branch. "
-                    "'default_tip' compares to the tip of the default branch. "
-                    "'prompt' compares to the state at the start of the prompt."
-                ),
-            },
-        },
-        "oneOf": [
-            {"required": ["instructions"]},
-            {"required": ["instructions_file"]},
-        ],
-        "additionalProperties": False,
-    },
-}
-
-# Alias for backwards compatibility
-POLICY_SCHEMA = POLICY_SCHEMA_V1
diff --git a/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh b/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh
index 6a84bdd..4ad1b53 100755
--- a/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh
+++ b/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh
@@ -2,27 +2,24 @@
 # policy_stop_hook.sh - Evaluates policies when the agent stops
 #
 # This script is called as a Claude Code Stop hook. It:
-# 1. Evaluates policies from .deepwork/policies/ (v2) or .deepwork.policy.yml (v1)
+# 1. Evaluates policies from .deepwork/policies/
 # 2. Computes changed files based on each policy's compare_to setting
 # 3. Checks for <promise> tags in the conversation transcript
 # 4. Returns JSON to block stop if policies need attention
 
 set -e
 
-# Determine which policy system to use
-USE_V2=false
-V1_POLICY_FILE=".deepwork.policy.yml"
-V2_POLICY_DIR=".deepwork/policies"
+# Check if policies directory exists with .md files
+POLICY_DIR=".deepwork/policies"
 
-if [ -d "${V2_POLICY_DIR}" ]; then
-    # Check if there are any .md files in the v2 directory
-    if ls "${V2_POLICY_DIR}"/*.md 1>/dev/null 2>&1; then
-        USE_V2=true
-    fi
+if [ ! -d "${POLICY_DIR}" ]; then
+    # No policies directory, nothing to do
+    exit 0
 fi
 
-# If no v2 policies and no v1 policy file, nothing to do
-if [ "${USE_V2}" = false ] && [ ! -f "${V1_POLICY_FILE}" ]; then
+# Check if there are any .md files
+if ! ls "${POLICY_DIR}"/*.md 1>/dev/null 2>&1; then
+    # No policy files, nothing to do
     exit 0
 fi
 
@@ -32,37 +29,14 @@ if [ ! -t 0 ]; then
     HOOK_INPUT=$(cat)
 fi
 
-if [ "${USE_V2}" = true ]; then
-    # Use v2 policy system via cross-platform wrapper
-    # The wrapper reads JSON input and handles transcript extraction
-    result=$(echo "${HOOK_INPUT}" | DEEPWORK_HOOK_PLATFORM=claude DEEPWORK_HOOK_EVENT=Stop python -m deepwork.hooks.policy_check 2>/dev/null || echo '{}')
-else
-    # Use v1 policy system - extract conversation context for evaluate_policies
-
-    # Extract transcript_path from the hook input JSON using jq
-    # Claude Code passes: {"session_id": "...", "transcript_path": "...", ...}
-    TRANSCRIPT_PATH=""
-    if [ -n "${HOOK_INPUT}" ]; then
-        TRANSCRIPT_PATH=$(echo "${HOOK_INPUT}" | jq -r '.transcript_path // empty' 2>/dev/null || echo "")
-    fi
-
-    # Extract conversation text from the JSONL transcript
-    # The transcript is JSONL format - each line is a JSON object
-    # We need to extract the text content from assistant messages
-    conversation_context=""
-    if [ -n "${TRANSCRIPT_PATH}" ] && [ -f "${TRANSCRIPT_PATH}" ]; then
-        # Extract text content from all assistant messages in the transcript
-        # Each line is a JSON object; we extract .message.content[].text for assistant messages
-        conversation_context=$(cat "${TRANSCRIPT_PATH}" | \
-            grep -E '"role"\s*:\s*"assistant"' | \
-            jq -r '.message.content // [] | map(select(.type == "text")) | map(.text) | join("\n")' 2>/dev/null | \
-            tr -d '\0' || echo "")
-    fi
+# Call the Python policy evaluator via the cross-platform wrapper
+# The wrapper reads JSON input and handles transcript extraction
+# Note: exit code 2 means "block" which is valid (not an error), so capture it
+result=$(echo "${HOOK_INPUT}" | DEEPWORK_HOOK_PLATFORM=claude DEEPWORK_HOOK_EVENT=Stop python -m deepwork.hooks.policy_check 2>/dev/null) || true
 
-    # Call the Python v1 evaluator
-    result=$(echo "${conversation_context}" | python -m deepwork.hooks.evaluate_policies \
-        --policy-file "${V1_POLICY_FILE}" \
-        2>/dev/null || echo '{}')
+# If no output (error case), provide empty JSON as fallback
+if [ -z "${result}" ]; then
+    result='{}'
 fi
 
 # Output the result (JSON for Claude Code hooks)
diff --git a/tests/fixtures/policies/empty_policy.yml b/tests/fixtures/policies/empty_policy.yml
deleted file mode 100644
index c8faa07..0000000
--- a/tests/fixtures/policies/empty_policy.yml
+++ /dev/null
@@ -1 +0,0 @@
-# Empty policy file
diff --git a/tests/fixtures/policies/instructions/security_review.md b/tests/fixtures/policies/instructions/security_review.md
deleted file mode 100644
index b64978b..0000000
--- a/tests/fixtures/policies/instructions/security_review.md
+++ /dev/null
@@ -1,8 +0,0 @@
-## Security Review Required
-
-Authentication code has been modified. Please:
-
-1. Check for hardcoded credentials
-2. Verify input validation
-3. Review access control logic
-4. Update security documentation
diff --git a/tests/fixtures/policies/invalid_missing_instructions.yml b/tests/fixtures/policies/invalid_missing_instructions.yml
deleted file mode 100644
index 6c47934..0000000
--- a/tests/fixtures/policies/invalid_missing_instructions.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-- name: "Invalid policy"
-  trigger: "src/**/*"
diff --git a/tests/fixtures/policies/invalid_missing_trigger.yml b/tests/fixtures/policies/invalid_missing_trigger.yml
deleted file mode 100644
index a5c8949..0000000
--- a/tests/fixtures/policies/invalid_missing_trigger.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-- name: "Invalid policy"
-  safety: "some/file.md"
-  instructions: "This policy is missing a trigger"
diff --git a/tests/fixtures/policies/multiple_policies.yml b/tests/fixtures/policies/multiple_policies.yml
deleted file mode 100644
index da29231..0000000
--- a/tests/fixtures/policies/multiple_policies.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-- name: "Update install guide on config changes"
-  trigger: "app/config/**/*"
-  safety: "docs/install_guide.md"
-  instructions: "Update docs/install_guide.md if needed."
-
-- name: "Security review for auth changes"
-  trigger:
-    - "src/auth/**/*"
-    - "src/security/**/*"
-  safety:
-    - "SECURITY.md"
-    - "docs/security_review.md"
-  instructions: |
-    Authentication or security code has changed.
-    Please ensure:
-    1. No secrets are exposed
-    2. Security review documentation is updated
-
-- name: "API documentation update"
-  trigger: "src/api/**/*.py"
-  instructions: "API code changed. Update API documentation."
diff --git a/tests/fixtures/policies/policy_with_instructions_file.yml b/tests/fixtures/policies/policy_with_instructions_file.yml
deleted file mode 100644
index 267bfc6..0000000
--- a/tests/fixtures/policies/policy_with_instructions_file.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-- name: "Security review"
-  trigger: "src/auth/**/*"
-  instructions_file: "instructions/security_review.md"
diff --git a/tests/fixtures/policies/valid_policy.yml b/tests/fixtures/policies/valid_policy.yml
deleted file mode 100644
index a2b0b6b..0000000
--- a/tests/fixtures/policies/valid_policy.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-- name: "Update install guide on config changes"
-  trigger: "app/config/**/*"
-  safety: "docs/install_guide.md"
-  instructions: |
-    Configuration files have changed. Please review docs/install_guide.md
-    and update it if the installation instructions need to change.
diff --git a/tests/shell_script_tests/conftest.py b/tests/shell_script_tests/conftest.py
index 085cf2f..e9b9768 100644
--- a/tests/shell_script_tests/conftest.py
+++ b/tests/shell_script_tests/conftest.py
@@ -32,20 +32,24 @@ def git_repo_with_policy(tmp_path: Path) -> Path:
     repo.index.add(["README.md"])
     repo.index.commit("Initial commit")
 
-    # Policy that triggers on any Python file
-    policy_file = tmp_path / ".deepwork.policy.yml"
+    # Create v2 policy directory and file
+    policies_dir = tmp_path / ".deepwork" / "policies"
+    policies_dir.mkdir(parents=True, exist_ok=True)
+
+    # Policy that triggers on any Python file (v2 format)
+    policy_file = policies_dir / "python-file-policy.md"
     policy_file.write_text(
-        """- name: "Python File Policy"
-  trigger: "**/*.py"
-  compare_to: prompt
-  instructions: |
-    Review Python files for quality.
+        """---
+name: Python File Policy
+trigger: "**/*.py"
+compare_to: prompt
+---
+Review Python files for quality.
 """
     )
 
     # Empty baseline so new files trigger
     deepwork_dir = tmp_path / ".deepwork"
-    deepwork_dir.mkdir(exist_ok=True)
     (deepwork_dir / ".last_work_tree").write_text("")
 
     return tmp_path
diff --git a/tests/shell_script_tests/test_policy_stop_hook.py b/tests/shell_script_tests/test_policy_stop_hook.py
index 07a2d22..bfe9c04 100644
--- a/tests/shell_script_tests/test_policy_stop_hook.py
+++ b/tests/shell_script_tests/test_policy_stop_hook.py
@@ -17,7 +17,7 @@
 
 @pytest.fixture
 def git_repo_with_src_policy(tmp_path: Path) -> Path:
-    """Create a git repo with a policy file that triggers on src/** changes."""
+    """Create a git repo with a v2 policy file that triggers on src/** changes."""
     repo = Repo.init(tmp_path)
 
     readme = tmp_path / "README.md"
@@ -25,21 +25,25 @@ def git_repo_with_src_policy(tmp_path: Path) -> Path:
     repo.index.add(["README.md"])
     repo.index.commit("Initial commit")
 
+    # Create v2 policy directory and file
+    policies_dir = tmp_path / ".deepwork" / "policies"
+    policies_dir.mkdir(parents=True, exist_ok=True)
+
     # Use compare_to: prompt since test repos don't have origin remote
-    policy_file = tmp_path / ".deepwork.policy.yml"
+    policy_file = policies_dir / "test-policy.md"
     policy_file.write_text(
-        """- name: "Test Policy"
-  trigger: "src/**/*"
-  compare_to: prompt
-  instructions: |
-    This is a test policy that fires when src/ files change.
-    Please address this policy.
+        """---
+name: Test Policy
+trigger: "src/**/*"
+compare_to: prompt
+---
+This is a test policy that fires when src/ files change.
+Please address this policy.
 """
     )
 
     # Empty baseline means all current files are "new"
     deepwork_dir = tmp_path / ".deepwork"
-    deepwork_dir.mkdir(exist_ok=True)
     (deepwork_dir / ".last_work_tree").write_text("")
 
     return tmp_path
@@ -112,14 +116,14 @@ def test_outputs_empty_json_when_no_policy_fires(
         # Should be empty JSON (no blocking)
         assert result == {}, f"Expected empty JSON when no policies fire, got: {result}"
 
-    def test_exits_early_when_no_policy_file(self, policy_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook exits cleanly when no policy file exists."""
+    def test_exits_early_when_no_policy_dir(self, policy_hooks_dir: Path, git_repo: Path) -> None:
+        """Test that the hook exits cleanly when no policy directory exists."""
         script_path = policy_hooks_dir / "policy_stop_hook.sh"
         stdout, stderr, code = run_stop_hook(script_path, git_repo)
 
         # Should exit with code 0 and produce no output (or empty)
         assert code == 0, f"Expected exit code 0, got {code}. stderr: {stderr}"
-        # No output is fine when there's no policy file
+        # No output is fine when there's no policy directory
         output = stdout.strip()
         if output:
             # If there is output, it should be valid JSON
@@ -167,7 +171,7 @@ def test_respects_promise_tags(
         try:
             # Run the stop hook with transcript path
             script_path = policy_hooks_dir / "policy_stop_hook.sh"
-            hook_input = {"transcript_path": transcript_path}
+            hook_input = {"transcript_path": transcript_path, "hook_event_name": "Stop"}
             stdout, stderr, code = run_stop_hook(script_path, git_repo_with_src_policy, hook_input)
 
             # Parse the output
@@ -191,22 +195,24 @@ def test_safety_pattern_prevents_firing(self, policy_hooks_dir: Path, tmp_path:
         repo.index.add(["README.md"])
         repo.index.commit("Initial commit")
 
-        # Create a policy with a safety pattern
-        # Use compare_to: prompt since test repos don't have origin remote
-        policy_file = tmp_path / ".deepwork.policy.yml"
+        # Create v2 policy with a safety pattern
+        policies_dir = tmp_path / ".deepwork" / "policies"
+        policies_dir.mkdir(parents=True, exist_ok=True)
+
+        policy_file = policies_dir / "documentation-policy.md"
         policy_file.write_text(
-            """- name: "Documentation Policy"
-  trigger: "src/**/*"
-  safety: "docs/**/*"
-  compare_to: prompt
-  instructions: |
-    Update documentation when changing source files.
+            """---
+name: Documentation Policy
+trigger: "src/**/*"
+safety: "docs/**/*"
+compare_to: prompt
+---
+Update documentation when changing source files.
 """
         )
 
         # Create .deepwork directory with empty baseline
         deepwork_dir = tmp_path / ".deepwork"
-        deepwork_dir.mkdir(exist_ok=True)
         (deepwork_dir / ".last_work_tree").write_text("")
 
         # Create both trigger and safety files
diff --git a/tests/unit/test_evaluate_policies.py b/tests/unit/test_evaluate_policies.py
deleted file mode 100644
index c0abdce..0000000
--- a/tests/unit/test_evaluate_policies.py
+++ /dev/null
@@ -1,101 +0,0 @@
-"""Tests for the hooks evaluate_policies module."""
-
-from deepwork.core.policy_parser import PolicyV1
-from deepwork.hooks.evaluate_policies import extract_promise_tags, format_policy_message
-
-
-class TestExtractPromiseTags:
-    """Tests for extract_promise_tags function."""
-
-    def test_extracts_policy_name_from_promise(self) -> None:
-        """Test extracting policy name from promise tag body."""
-        text = "<promise>✓ Update Docs</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Update Docs"}
-
-    def test_extracts_multiple_promises(self) -> None:
-        """Test extracting multiple promise tags."""
-        text = """
-        I've addressed the policies.
-        <promise>✓ Update Docs</promise>
-        <promise>✓ Security Review</promise>
-        """
-        result = extract_promise_tags(text)
-        assert result == {"Update Docs", "Security Review"}
-
-    def test_case_insensitive(self) -> None:
-        """Test that promise tag matching is case insensitive."""
-        text = "<PROMISE>✓ Test Policy</PROMISE>"
-        result = extract_promise_tags(text)
-        assert result == {"Test Policy"}
-
-    def test_returns_empty_set_for_no_promises(self) -> None:
-        """Test that empty set is returned when no promises found."""
-        text = "This is just some regular text without any promise tags."
-        result = extract_promise_tags(text)
-        assert result == set()
-
-    def test_strips_whitespace_from_policy_name(self) -> None:
-        """Test that whitespace is stripped from extracted policy names."""
-        text = "<promise>✓   Policy With Spaces   </promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Policy With Spaces"}
-
-
-class TestFormatPolicyMessage:
-    """Tests for format_policy_message function."""
-
-    def test_formats_single_policy(self) -> None:
-        """Test formatting a single policy."""
-        policies = [
-            PolicyV1(
-                name="Test Policy",
-                triggers=["src/*"],
-                safety=[],
-                instructions="Please update the documentation.",
-            )
-        ]
-        result = format_policy_message(policies)
-
-        assert "## DeepWork Policies Triggered" in result
-        assert "### Policy: Test Policy" in result
-        assert "Please update the documentation." in result
-        assert "<promise>✓ Policy Name</promise>" in result
-
-    def test_formats_multiple_policies(self) -> None:
-        """Test formatting multiple policies."""
-        policies = [
-            PolicyV1(
-                name="Policy 1",
-                triggers=["src/*"],
-                safety=[],
-                instructions="Do thing 1.",
-            ),
-            PolicyV1(
-                name="Policy 2",
-                triggers=["test/*"],
-                safety=[],
-                instructions="Do thing 2.",
-            ),
-        ]
-        result = format_policy_message(policies)
-
-        assert "### Policy: Policy 1" in result
-        assert "### Policy: Policy 2" in result
-        assert "Do thing 1." in result
-        assert "Do thing 2." in result
-
-    def test_strips_instruction_whitespace(self) -> None:
-        """Test that instruction whitespace is stripped."""
-        policies = [
-            PolicyV1(
-                name="Test",
-                triggers=["*"],
-                safety=[],
-                instructions="  \n  Instructions here  \n  ",
-            )
-        ]
-        result = format_policy_message(policies)
-
-        # Should be stripped but present
-        assert "Instructions here" in result
diff --git a/tests/unit/test_policy_parser.py b/tests/unit/test_policy_parser.py
index 24e537c..62c73cb 100644
--- a/tests/unit/test_policy_parser.py
+++ b/tests/unit/test_policy_parser.py
@@ -10,162 +10,12 @@
     DetectionMode,
     Policy,
     PolicyParseError,
-    PolicyV1,
     evaluate_policies,
     evaluate_policy,
-    parse_policy_file,
+    load_policies_from_directory,
 )
 
 
-class TestPolicyV1:
-    """Tests for PolicyV1 dataclass (legacy format)."""
-
-    def test_from_dict_with_inline_instructions(self) -> None:
-        """Test creating policy from dict with inline instructions."""
-        data = {
-            "name": "Test Policy",
-            "trigger": "src/**/*",
-            "safety": "docs/readme.md",
-            "instructions": "Do something",
-        }
-        policy = PolicyV1.from_dict(data)
-
-        assert policy.name == "Test Policy"
-        assert policy.triggers == ["src/**/*"]
-        assert policy.safety == ["docs/readme.md"]
-        assert policy.instructions == "Do something"
-
-    def test_from_dict_normalizes_trigger_string_to_list(self) -> None:
-        """Test that trigger string is normalized to list."""
-        data = {
-            "name": "Test",
-            "trigger": "*.py",
-            "instructions": "Check it",
-        }
-        policy = PolicyV1.from_dict(data)
-
-        assert policy.triggers == ["*.py"]
-
-    def test_from_dict_preserves_trigger_list(self) -> None:
-        """Test that trigger list is preserved."""
-        data = {
-            "name": "Test",
-            "trigger": ["*.py", "*.js"],
-            "instructions": "Check it",
-        }
-        policy = PolicyV1.from_dict(data)
-
-        assert policy.triggers == ["*.py", "*.js"]
-
-    def test_from_dict_normalizes_safety_string_to_list(self) -> None:
-        """Test that safety string is normalized to list."""
-        data = {
-            "name": "Test",
-            "trigger": "src/*",
-            "safety": "docs/README.md",
-            "instructions": "Check it",
-        }
-        policy = PolicyV1.from_dict(data)
-
-        assert policy.safety == ["docs/README.md"]
-
-    def test_from_dict_safety_defaults_to_empty_list(self) -> None:
-        """Test that missing safety defaults to empty list."""
-        data = {
-            "name": "Test",
-            "trigger": "src/*",
-            "instructions": "Check it",
-        }
-        policy = PolicyV1.from_dict(data)
-
-        assert policy.safety == []
-
-    def test_from_dict_with_instructions_file(self, temp_dir: Path) -> None:
-        """Test creating policy from dict with instructions_file."""
-        # Create instructions file
-        instructions_file = temp_dir / "instructions.md"
-        instructions_file.write_text("# Instructions\nDo this and that.")
-
-        data = {
-            "name": "Test Policy",
-            "trigger": "src/*",
-            "instructions_file": "instructions.md",
-        }
-        policy = PolicyV1.from_dict(data, base_dir=temp_dir)
-
-        assert policy.instructions == "# Instructions\nDo this and that."
-
-    def test_from_dict_instructions_file_not_found(self, temp_dir: Path) -> None:
-        """Test error when instructions_file doesn't exist."""
-        data = {
-            "name": "Test Policy",
-            "trigger": "src/*",
-            "instructions_file": "nonexistent.md",
-        }
-
-        with pytest.raises(PolicyParseError, match="instructions file not found"):
-            PolicyV1.from_dict(data, base_dir=temp_dir)
-
-    def test_from_dict_instructions_file_without_base_dir(self) -> None:
-        """Test error when instructions_file used without base_dir."""
-        data = {
-            "name": "Test Policy",
-            "trigger": "src/*",
-            "instructions_file": "instructions.md",
-        }
-
-        with pytest.raises(PolicyParseError, match="no base_dir provided"):
-            PolicyV1.from_dict(data, base_dir=None)
-
-    def test_from_dict_compare_to_defaults_to_base(self) -> None:
-        """Test that compare_to defaults to 'base'."""
-        data = {
-            "name": "Test",
-            "trigger": "src/*",
-            "instructions": "Check it",
-        }
-        policy = PolicyV1.from_dict(data)
-
-        assert policy.compare_to == DEFAULT_COMPARE_TO
-        assert policy.compare_to == "base"
-
-    def test_from_dict_compare_to_explicit_base(self) -> None:
-        """Test explicit compare_to: base."""
-        data = {
-            "name": "Test",
-            "trigger": "src/*",
-            "instructions": "Check it",
-            "compare_to": "base",
-        }
-        policy = PolicyV1.from_dict(data)
-
-        assert policy.compare_to == "base"
-
-    def test_from_dict_compare_to_default_tip(self) -> None:
-        """Test compare_to: default_tip."""
-        data = {
-            "name": "Test",
-            "trigger": "src/*",
-            "instructions": "Check it",
-            "compare_to": "default_tip",
-        }
-        policy = PolicyV1.from_dict(data)
-
-        assert policy.compare_to == "default_tip"
-
-    def test_from_dict_compare_to_prompt(self) -> None:
-        """Test compare_to: prompt."""
-        data = {
-            "name": "Test",
-            "trigger": "src/*",
-            "instructions": "Check it",
-            "compare_to": "prompt",
-        }
-        policy = PolicyV1.from_dict(data)
-
-        assert policy.compare_to == "prompt"
-
-
 class TestMatchesPattern:
     """Tests for matches_pattern function."""
 
@@ -362,72 +212,153 @@ def test_returns_empty_when_no_policies_fire(self) -> None:
         assert len(fired) == 0
 
 
-class TestParsePolicyFile:
-    """Tests for parse_policy_file function."""
+class TestLoadPoliciesFromDirectory:
+    """Tests for load_policies_from_directory function."""
 
-    def test_parses_valid_policy_file(self, fixtures_dir: Path) -> None:
-        """Test parsing a valid policy file."""
-        policy_file = fixtures_dir / "policies" / "valid_policy.yml"
-        policies = parse_policy_file(policy_file)
+    def test_loads_policies_from_directory(self, temp_dir: Path) -> None:
+        """Test loading policies from a directory."""
+        policies_dir = temp_dir / "policies"
+        policies_dir.mkdir()
 
-        assert len(policies) == 1
-        assert policies[0].name == "Update install guide on config changes"
-        assert policies[0].triggers == ["app/config/**/*"]
-        assert policies[0].safety == ["docs/install_guide.md"]
-        assert "Configuration files have changed" in policies[0].instructions
-
-    def test_parses_multiple_policies(self, fixtures_dir: Path) -> None:
-        """Test parsing a file with multiple policies."""
-        policy_file = fixtures_dir / "policies" / "multiple_policies.yml"
-        policies = parse_policy_file(policy_file)
-
-        assert len(policies) == 3
-        assert policies[0].name == "Update install guide on config changes"
-        assert policies[1].name == "Security review for auth changes"
-        assert policies[2].name == "API documentation update"
-
-        # Check that arrays are parsed correctly
-        assert policies[1].triggers == ["src/auth/**/*", "src/security/**/*"]
-        assert policies[1].safety == ["SECURITY.md", "docs/security_review.md"]
-
-    def test_parses_policy_with_instructions_file(self, fixtures_dir: Path) -> None:
-        """Test parsing a policy with instructions_file."""
-        policy_file = fixtures_dir / "policies" / "policy_with_instructions_file.yml"
-        policies = parse_policy_file(policy_file)
+        # Create a policy file
+        policy_file = policies_dir / "test-policy.md"
+        policy_file.write_text(
+            """---
+name: Test Policy
+trigger: "src/**/*"
+---
+Please check the source files.
+"""
+        )
+
+        policies = load_policies_from_directory(policies_dir)
 
         assert len(policies) == 1
-        assert "Security Review Required" in policies[0].instructions
-        assert "hardcoded credentials" in policies[0].instructions
+        assert policies[0].name == "Test Policy"
+        assert policies[0].triggers == ["src/**/*"]
+        assert policies[0].detection_mode == DetectionMode.TRIGGER_SAFETY
+        assert "check the source files" in policies[0].instructions
+
+    def test_loads_multiple_policies(self, temp_dir: Path) -> None:
+        """Test loading multiple policies."""
+        policies_dir = temp_dir / "policies"
+        policies_dir.mkdir()
+
+        # Create policy files
+        (policies_dir / "policy1.md").write_text(
+            """---
+name: Policy 1
+trigger: "src/**/*"
+---
+Instructions for policy 1.
+"""
+        )
+        (policies_dir / "policy2.md").write_text(
+            """---
+name: Policy 2
+trigger: "test/**/*"
+---
+Instructions for policy 2.
+"""
+        )
+
+        policies = load_policies_from_directory(policies_dir)
+
+        assert len(policies) == 2
+        names = {p.name for p in policies}
+        assert names == {"Policy 1", "Policy 2"}
 
-    def test_empty_policy_file_returns_empty_list(self, fixtures_dir: Path) -> None:
-        """Test that empty policy file returns empty list."""
-        policy_file = fixtures_dir / "policies" / "empty_policy.yml"
-        policies = parse_policy_file(policy_file)
+    def test_returns_empty_for_empty_directory(self, temp_dir: Path) -> None:
+        """Test that empty directory returns empty list."""
+        policies_dir = temp_dir / "policies"
+        policies_dir.mkdir()
+
+        policies = load_policies_from_directory(policies_dir)
 
         assert policies == []
 
-    def test_raises_for_missing_trigger(self, fixtures_dir: Path) -> None:
-        """Test error when policy is missing trigger."""
-        policy_file = fixtures_dir / "policies" / "invalid_missing_trigger.yml"
+    def test_returns_empty_for_nonexistent_directory(self, temp_dir: Path) -> None:
+        """Test that nonexistent directory returns empty list."""
+        policies_dir = temp_dir / "nonexistent"
 
-        with pytest.raises(PolicyParseError, match="validation failed"):
-            parse_policy_file(policy_file)
+        policies = load_policies_from_directory(policies_dir)
 
-    def test_raises_for_missing_instructions(self, fixtures_dir: Path) -> None:
-        """Test error when policy is missing both instructions and instructions_file."""
-        policy_file = fixtures_dir / "policies" / "invalid_missing_instructions.yml"
+        assert policies == []
 
-        with pytest.raises(PolicyParseError, match="validation failed"):
-            parse_policy_file(policy_file)
+    def test_loads_policy_with_set_detection_mode(self, temp_dir: Path) -> None:
+        """Test loading a policy with set detection mode."""
+        policies_dir = temp_dir / "policies"
+        policies_dir.mkdir()
+
+        policy_file = policies_dir / "source-test-pairing.md"
+        policy_file.write_text(
+            """---
+name: Source/Test Pairing
+set:
+  - src/{path}.py
+  - tests/{path}_test.py
+---
+Source and test files should change together.
+"""
+        )
+
+        policies = load_policies_from_directory(policies_dir)
+
+        assert len(policies) == 1
+        assert policies[0].name == "Source/Test Pairing"
+        assert policies[0].detection_mode == DetectionMode.SET
+        assert policies[0].set_patterns == ["src/{path}.py", "tests/{path}_test.py"]
+
+    def test_loads_policy_with_pair_detection_mode(self, temp_dir: Path) -> None:
+        """Test loading a policy with pair detection mode."""
+        policies_dir = temp_dir / "policies"
+        policies_dir.mkdir()
+
+        policy_file = policies_dir / "api-docs.md"
+        policy_file.write_text(
+            """---
+name: API Documentation
+pair:
+  trigger: src/api/{name}.py
+  expects: docs/api/{name}.md
+---
+API code requires documentation.
+"""
+        )
 
-    def test_raises_for_nonexistent_file(self, temp_dir: Path) -> None:
-        """Test error when policy file doesn't exist."""
-        policy_file = temp_dir / "nonexistent.yml"
+        policies = load_policies_from_directory(policies_dir)
 
-        with pytest.raises(PolicyParseError, match="does not exist"):
-            parse_policy_file(policy_file)
+        assert len(policies) == 1
+        assert policies[0].name == "API Documentation"
+        assert policies[0].detection_mode == DetectionMode.PAIR
+        assert policies[0].pair_config is not None
+        assert policies[0].pair_config.trigger == "src/api/{name}.py"
+        assert policies[0].pair_config.expects == ["docs/api/{name}.md"]
+
+    def test_loads_policy_with_command_action(self, temp_dir: Path) -> None:
+        """Test loading a policy with command action."""
+        policies_dir = temp_dir / "policies"
+        policies_dir.mkdir()
+
+        policy_file = policies_dir / "format-python.md"
+        policy_file.write_text(
+            """---
+name: Format Python
+trigger: "**/*.py"
+action:
+  command: "ruff format {file}"
+  run_for: each_match
+---
+"""
+        )
+
+        policies = load_policies_from_directory(policies_dir)
+
+        assert len(policies) == 1
+        assert policies[0].name == "Format Python"
+        from deepwork.core.policy_parser import ActionType
 
-    def test_raises_for_directory_path(self, temp_dir: Path) -> None:
-        """Test error when path is a directory."""
-        with pytest.raises(PolicyParseError, match="is not a file"):
-            parse_policy_file(temp_dir)
+        assert policies[0].action_type == ActionType.COMMAND
+        assert policies[0].command_action is not None
+        assert policies[0].command_action.command == "ruff format {file}"
+        assert policies[0].command_action.run_for == "each_match"

From 52b0a75213efcba4cda741bdc9faeca5cf0f3790 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 21:42:33 +0000
Subject: [PATCH 09/10] Format policy_parser.py with ruff

---
 src/deepwork/core/policy_parser.py | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/src/deepwork/core/policy_parser.py b/src/deepwork/core/policy_parser.py
index 0637209..2172607 100644
--- a/src/deepwork/core/policy_parser.py
+++ b/src/deepwork/core/policy_parser.py
@@ -115,13 +115,9 @@ def from_frontmatter(
 
         mode_count = sum([has_trigger, has_set, has_pair])
         if mode_count == 0:
-            raise PolicyParseError(
-                f"Policy '{name}' must have 'trigger', 'set', or 'pair'"
-            )
+            raise PolicyParseError(f"Policy '{name}' must have 'trigger', 'set', or 'pair'")
         if mode_count > 1:
-            raise PolicyParseError(
-                f"Policy '{name}' has multiple detection modes - use only one"
-            )
+            raise PolicyParseError(f"Policy '{name}' has multiple detection modes - use only one")
 
         # Parse based on detection mode
         detection_mode: DetectionMode
@@ -141,9 +137,7 @@ def from_frontmatter(
             detection_mode = DetectionMode.SET
             set_patterns = list(frontmatter["set"])
             if len(set_patterns) < 2:
-                raise PolicyParseError(
-                    f"Policy '{name}' set requires at least 2 patterns"
-                )
+                raise PolicyParseError(f"Policy '{name}' set requires at least 2 patterns")
 
         elif has_pair:
             detection_mode = DetectionMode.PAIR
@@ -170,9 +164,7 @@ def from_frontmatter(
             action_type = ActionType.PROMPT
             # Markdown body is the instructions
             if not markdown_body.strip():
-                raise PolicyParseError(
-                    f"Policy '{name}' with prompt action requires markdown body"
-                )
+                raise PolicyParseError(f"Policy '{name}' with prompt action requires markdown body")
 
         # Get compare_to
         compare_to = frontmatter.get("compare_to", DEFAULT_COMPARE_TO)
@@ -230,9 +222,7 @@ def parse_frontmatter_file(filepath: Path) -> tuple[dict[str, Any], str]:
     try:
         frontmatter = yaml.safe_load(frontmatter_str)
     except yaml.YAMLError as e:
-        raise PolicyParseError(
-            f"Invalid YAML frontmatter in '{filepath.name}': {e}"
-        ) from e
+        raise PolicyParseError(f"Invalid YAML frontmatter in '{filepath.name}': {e}") from e
 
     if frontmatter is None:
         frontmatter = {}
@@ -270,9 +260,7 @@ def parse_policy_file_v2(filepath: Path) -> Policy:
     try:
         validate_against_schema(frontmatter, POLICY_FRONTMATTER_SCHEMA)
     except ValidationError as e:
-        raise PolicyParseError(
-            f"Policy '{filepath.name}' validation failed: {e}"
-        ) from e
+        raise PolicyParseError(f"Policy '{filepath.name}' validation failed: {e}") from e
 
     # Create Policy object
     filename = filepath.stem  # filename without .md extension

From a6cd779564713643d9915dcefcaabbc6e94c17a8 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 16 Jan 2026 21:43:09 +0000
Subject: [PATCH 10/10] Update uv.lock

---
 uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index c4091ca..cd4110a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -126,7 +126,7 @@ toml = [
 
 [[package]]
 name = "deepwork"
-version = "0.3.0"
+version = "0.4.0"
 source = { editable = "." }
 dependencies = [
     { name = "click" },