feat: Implement phases 3-5 of compositional workflow architecture

Completes the implementation of test/review workflows with automatic resolution and integrates them into the orchestrator. **Phase 3: Test Workflow with Resolution** - Created test_workflow.py with automatic test failure resolution - Implements retry loop with max 4 attempts (configurable via MAX_TEST_RETRY_ATTEMPTS) - Parses JSON test results and resolves failures one by one - Uses existing test.md and resolve_failed_test.md commands - Added run_tests() and resolve_test_failure() to workflow_operations.py **Phase 4: Review Workflow with Resolution** - Created review_workflow.py with automatic blocker issue resolution - Implements retry loop with max 3 attempts (configurable via MAX_REVIEW_RETRY_ATTEMPTS) - Categorizes issues by severity (blocker/tech_debt/skippable) - Only blocks on blocker issues - tech_debt and skippable allowed to pass - Created review_runner.md and resolve_failed_review.md commands - Added run_review() and resolve_review_issue() to workflow_operations.py - Supports screenshot capture for UI review (configurable via ENABLE_SCREENSHOT_CAPTURE) **Phase 5: Compositional Integration** - Updated workflow_orchestrator.py to integrate test and review phases - Test phase runs between commit and PR creation (if ENABLE_TEST_PHASE=true) - Review phase runs after tests (if ENABLE_REVIEW_PHASE=true) - Both phases are optional and controlled by config flags - Step history tracks test and review execution results - Proper error handling and logging for all phases **Supporting Changes** - Updated agent_names.py to add REVIEWER constant - Added configuration flags to config.py for test/review phases - All new code follows structured logging patterns - Maintains compatibility with existing workflow steps **Files Changed**: 19 files, 3035+ lines - New: test_workflow.py, review_workflow.py, review commands - Modified: orchestrator, workflow_operations, agent_names, config - Phases 1-2 files (worktree, state, port allocation) also staged The implementation is complete and ready for testing. All phases now support parallel execution via worktree isolation with deterministic port allocation.
2025-12-30 21:49:30 -05:00 · 2025-10-08 22:23:49 +03:00
parent 9a60d6ae89
commit 1c0020946b
19 changed files with 3046 additions and 6 deletions
--- a/python/src/agent_work_orders/api/routes.py
+++ b/python/src/agent_work_orders/api/routes.py
@@ -25,7 +25,7 @@ from ..models import (
    StepHistory,
 )
 from ..sandbox_manager.sandbox_factory import SandboxFactory
-from ..state_manager.work_order_repository import WorkOrderRepository
+from ..state_manager.repository_factory import create_repository
 from ..utils.id_generator import generate_work_order_id
 from ..utils.structured_logger import get_logger
 from ..workflow_engine.workflow_orchestrator import WorkflowOrchestrator
@@ -35,7 +35,7 @@ logger = get_logger(__name__)
 router = APIRouter()

 # Initialize dependencies (singletons for MVP)
-state_repository = WorkOrderRepository()
+state_repository = create_repository()
 agent_executor = AgentCLIExecutor()
 sandbox_factory = SandboxFactory()
 github_client = GitHubClient()
--- a/python/src/agent_work_orders/config.py
+++ b/python/src/agent_work_orders/config.py
@@ -49,6 +49,28 @@ class AgentWorkOrdersConfig:
    ENABLE_PROMPT_LOGGING: bool = os.getenv("ENABLE_PROMPT_LOGGING", "true").lower() == "true"
    ENABLE_OUTPUT_ARTIFACTS: bool = os.getenv("ENABLE_OUTPUT_ARTIFACTS", "true").lower() == "true"

+    # Worktree configuration
+    WORKTREE_BASE_DIR: str = os.getenv("WORKTREE_BASE_DIR", "trees")
+
+    # Port allocation for parallel execution
+    BACKEND_PORT_RANGE_START: int = int(os.getenv("BACKEND_PORT_START", "9100"))
+    BACKEND_PORT_RANGE_END: int = int(os.getenv("BACKEND_PORT_END", "9114"))
+    FRONTEND_PORT_RANGE_START: int = int(os.getenv("FRONTEND_PORT_START", "9200"))
+    FRONTEND_PORT_RANGE_END: int = int(os.getenv("FRONTEND_PORT_END", "9214"))
+
+    # Test workflow configuration
+    MAX_TEST_RETRY_ATTEMPTS: int = int(os.getenv("MAX_TEST_RETRY_ATTEMPTS", "4"))
+    ENABLE_TEST_PHASE: bool = os.getenv("ENABLE_TEST_PHASE", "true").lower() == "true"
+
+    # Review workflow configuration
+    MAX_REVIEW_RETRY_ATTEMPTS: int = int(os.getenv("MAX_REVIEW_RETRY_ATTEMPTS", "3"))
+    ENABLE_REVIEW_PHASE: bool = os.getenv("ENABLE_REVIEW_PHASE", "true").lower() == "true"
+    ENABLE_SCREENSHOT_CAPTURE: bool = os.getenv("ENABLE_SCREENSHOT_CAPTURE", "true").lower() == "true"
+
+    # State management configuration
+    STATE_STORAGE_TYPE: str = os.getenv("STATE_STORAGE_TYPE", "memory")  # "memory" or "file"
+    FILE_STATE_DIRECTORY: str = os.getenv("FILE_STATE_DIRECTORY", "agent-work-orders-state")
+
    @classmethod
    def ensure_temp_dir(cls) -> Path:
        """Ensure temp directory exists and return Path"""
--- a/python/src/agent_work_orders/models.py
+++ b/python/src/agent_work_orders/models.py
@@ -49,8 +49,10 @@ class WorkflowStep(str, Enum):
    IMPLEMENT = "implement"
    GENERATE_BRANCH = "generate_branch"
    COMMIT = "commit"
-    REVIEW = "review"
    TEST = "test"
+    RESOLVE_TEST = "resolve_test"
+    REVIEW = "review"
+    RESOLVE_REVIEW = "resolve_review"
    CREATE_PR = "create_pr"


@@ -232,6 +234,8 @@ class StepHistory(BaseModel):
            WorkflowStep.GENERATE_BRANCH,
            WorkflowStep.IMPLEMENT,
            WorkflowStep.COMMIT,
+            WorkflowStep.TEST,
+            WorkflowStep.REVIEW,
            WorkflowStep.CREATE_PR,
        ]

--- a/python/src/agent_work_orders/sandbox_manager/git_worktree_sandbox.py
+++ b/python/src/agent_work_orders/sandbox_manager/git_worktree_sandbox.py
@@ -0,0 +1,215 @@
+"""Git Worktree Sandbox Implementation
+
+Provides isolated execution environment using git worktrees.
+Enables parallel execution of multiple work orders without conflicts.
+"""
+
+import asyncio
+import time
+
+from ..models import CommandExecutionResult, SandboxSetupError
+from ..utils.git_operations import get_current_branch
+from ..utils.port_allocation import find_next_available_ports
+from ..utils.structured_logger import get_logger
+from ..utils.worktree_operations import (
+    create_worktree,
+    get_worktree_path,
+    remove_worktree,
+    setup_worktree_environment,
+)
+
+logger = get_logger(__name__)
+
+
+class GitWorktreeSandbox:
+    """Git worktree-based sandbox implementation
+
+    Creates a git worktree under trees/<work_order_id>/ where the agent
+    executes workflows. Enables parallel execution with isolated environments
+    and deterministic port allocation.
+    """
+
+    def __init__(self, repository_url: str, sandbox_identifier: str):
+        self.repository_url = repository_url
+        self.sandbox_identifier = sandbox_identifier
+        self.working_dir = get_worktree_path(repository_url, sandbox_identifier)
+        self.backend_port: int | None = None
+        self.frontend_port: int | None = None
+        self._logger = logger.bind(
+            sandbox_identifier=sandbox_identifier,
+            repository_url=repository_url,
+        )
+
+    async def setup(self) -> None:
+        """Create worktree and set up isolated environment
+
+        Creates worktree from origin/main and allocates unique ports.
+        """
+        self._logger.info("worktree_sandbox_setup_started")
+
+        try:
+            # Allocate ports deterministically
+            self.backend_port, self.frontend_port = find_next_available_ports(
+                self.sandbox_identifier
+            )
+            self._logger.info(
+                "ports_allocated",
+                backend_port=self.backend_port,
+                frontend_port=self.frontend_port,
+            )
+
+            # Create worktree with temporary branch name
+            # Agent will create the actual feature branch during execution
+            temp_branch = f"wo-{self.sandbox_identifier}"
+
+            worktree_path, error = create_worktree(
+                self.repository_url,
+                self.sandbox_identifier,
+                temp_branch,
+                self._logger
+            )
+
+            if error or not worktree_path:
+                raise SandboxSetupError(f"Failed to create worktree: {error}")
+
+            # Set up environment with port configuration
+            setup_worktree_environment(
+                worktree_path,
+                self.backend_port,
+                self.frontend_port,
+                self._logger
+            )
+
+            self._logger.info(
+                "worktree_sandbox_setup_completed",
+                working_dir=self.working_dir,
+                backend_port=self.backend_port,
+                frontend_port=self.frontend_port,
+            )
+
+        except Exception as e:
+            self._logger.error(
+                "worktree_sandbox_setup_failed",
+                error=str(e),
+                exc_info=True
+            )
+            raise SandboxSetupError(f"Worktree sandbox setup failed: {e}") from e
+
+    async def execute_command(
+        self, command: str, timeout: int = 300
+    ) -> CommandExecutionResult:
+        """Execute command in the worktree directory
+
+        Args:
+            command: Shell command to execute
+            timeout: Timeout in seconds
+
+        Returns:
+            CommandExecutionResult
+        """
+        self._logger.info("command_execution_started", command=command)
+        start_time = time.time()
+
+        try:
+            process = await asyncio.create_subprocess_shell(
+                command,
+                cwd=self.working_dir,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+
+            try:
+                stdout, stderr = await asyncio.wait_for(
+                    process.communicate(), timeout=timeout
+                )
+            except TimeoutError:
+                process.kill()
+                await process.wait()
+                duration = time.time() - start_time
+                self._logger.error(
+                    "command_execution_timeout", command=command, timeout=timeout
+                )
+                return CommandExecutionResult(
+                    success=False,
+                    stdout=None,
+                    stderr=None,
+                    exit_code=-1,
+                    error_message=f"Command timed out after {timeout}s",
+                    duration_seconds=duration,
+                )
+
+            duration = time.time() - start_time
+            success = process.returncode == 0
+
+            result = CommandExecutionResult(
+                success=success,
+                stdout=stdout.decode() if stdout else None,
+                stderr=stderr.decode() if stderr else None,
+                exit_code=process.returncode or 0,
+                error_message=None if success else stderr.decode() if stderr else "Command failed",
+                duration_seconds=duration,
+            )
+
+            if success:
+                self._logger.info(
+                    "command_execution_completed", command=command, duration=duration
+                )
+            else:
+                self._logger.error(
+                    "command_execution_failed",
+                    command=command,
+                    exit_code=process.returncode,
+                    duration=duration,
+                )
+
+            return result
+
+        except Exception as e:
+            duration = time.time() - start_time
+            self._logger.error(
+                "command_execution_error", command=command, error=str(e), exc_info=True
+            )
+            return CommandExecutionResult(
+                success=False,
+                stdout=None,
+                stderr=None,
+                exit_code=-1,
+                error_message=str(e),
+                duration_seconds=duration,
+            )
+
+    async def get_git_branch_name(self) -> str | None:
+        """Get current git branch name in worktree
+
+        Returns:
+            Current branch name or None
+        """
+        try:
+            return await get_current_branch(self.working_dir)
+        except Exception as e:
+            self._logger.error("git_branch_query_failed", error=str(e))
+            return None
+
+    async def cleanup(self) -> None:
+        """Remove worktree"""
+        self._logger.info("worktree_sandbox_cleanup_started")
+
+        try:
+            success, error = remove_worktree(
+                self.repository_url,
+                self.sandbox_identifier,
+                self._logger
+            )
+            if success:
+                self._logger.info("worktree_sandbox_cleanup_completed")
+            else:
+                self._logger.error(
+                    "worktree_sandbox_cleanup_failed",
+                    error=error
+                )
+        except Exception as e:
+            self._logger.error(
+                "worktree_sandbox_cleanup_failed",
+                error=str(e),
+                exc_info=True
+            )
--- a/python/src/agent_work_orders/sandbox_manager/sandbox_factory.py
+++ b/python/src/agent_work_orders/sandbox_manager/sandbox_factory.py
@@ -5,6 +5,7 @@ Creates appropriate sandbox instances based on sandbox type.

 from ..models import SandboxType
 from .git_branch_sandbox import GitBranchSandbox
+from .git_worktree_sandbox import GitWorktreeSandbox
 from .sandbox_protocol import AgentSandbox


@@ -33,7 +34,7 @@ class SandboxFactory:
        if sandbox_type == SandboxType.GIT_BRANCH:
            return GitBranchSandbox(repository_url, sandbox_identifier)
        elif sandbox_type == SandboxType.GIT_WORKTREE:
-            raise NotImplementedError("Git worktree sandbox not implemented (Phase 2+)")
+            return GitWorktreeSandbox(repository_url, sandbox_identifier)
        elif sandbox_type == SandboxType.E2B:
            raise NotImplementedError("E2B sandbox not implemented (Phase 2+)")
        elif sandbox_type == SandboxType.DAGGER:
--- a/python/src/agent_work_orders/state_manager/init.py
+++ b/python/src/agent_work_orders/state_manager/init.py
@@ -1,4 +1,15 @@
 """State Manager Module

-Manages agent work order state (in-memory for MVP).
+Manages agent work order state with pluggable storage backends.
+Supports both in-memory (development) and file-based (production) storage.
 """
+
+from .file_state_repository import FileStateRepository
+from .repository_factory import create_repository
+from .work_order_repository import WorkOrderRepository
+
+__all__ = [
+    "WorkOrderRepository",
+    "FileStateRepository",
+    "create_repository",
+]
--- a/python/src/agent_work_orders/state_manager/file_state_repository.py
+++ b/python/src/agent_work_orders/state_manager/file_state_repository.py
@@ -0,0 +1,343 @@
+"""File-based Work Order Repository
+
+Provides persistent JSON-based storage for agent work orders.
+Enables state persistence across service restarts and debugging.
+"""
+
+import asyncio
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, cast
+
+from ..models import AgentWorkOrderState, AgentWorkOrderStatus, StepHistory
+from ..utils.structured_logger import get_logger
+
+if TYPE_CHECKING:
+    import structlog
+
+logger = get_logger(__name__)
+
+
+class FileStateRepository:
+    """File-based repository for work order state
+
+    Stores state as JSON files in <state_directory>/<work_order_id>.json
+    Each file contains: state, metadata, and step_history
+    """
+
+    def __init__(self, state_directory: str):
+        self.state_directory = Path(state_directory)
+        self.state_directory.mkdir(parents=True, exist_ok=True)
+        self._lock = asyncio.Lock()
+        self._logger: structlog.stdlib.BoundLogger = logger.bind(
+            state_directory=str(self.state_directory)
+        )
+        self._logger.info("file_state_repository_initialized")
+
+    def _get_state_file_path(self, agent_work_order_id: str) -> Path:
+        """Get path to state file for work order
+
+        Args:
+            agent_work_order_id: Work order ID
+
+        Returns:
+            Path to state file
+        """
+        return self.state_directory / f"{agent_work_order_id}.json"
+
+    def _serialize_datetime(self, obj):
+        """JSON serializer for datetime objects
+
+        Args:
+            obj: Object to serialize
+
+        Returns:
+            ISO format string for datetime objects
+        """
+        if isinstance(obj, datetime):
+            return obj.isoformat()
+        raise TypeError(f"Type {type(obj)} not serializable")
+
+    async def _read_state_file(self, agent_work_order_id: str) -> dict[str, Any] | None:
+        """Read state file
+
+        Args:
+            agent_work_order_id: Work order ID
+
+        Returns:
+            State dictionary or None if file doesn't exist
+        """
+        state_file = self._get_state_file_path(agent_work_order_id)
+        if not state_file.exists():
+            return None
+
+        try:
+            with state_file.open("r") as f:
+                data = json.load(f)
+                return cast(dict[str, Any], data)
+        except Exception as e:
+            self._logger.error(
+                "state_file_read_failed",
+                agent_work_order_id=agent_work_order_id,
+                error=str(e),
+                exc_info=True
+            )
+            return None
+
+    async def _write_state_file(self, agent_work_order_id: str, data: dict[str, Any]) -> None:
+        """Write state file
+
+        Args:
+            agent_work_order_id: Work order ID
+            data: State dictionary to write
+        """
+        state_file = self._get_state_file_path(agent_work_order_id)
+
+        try:
+            with state_file.open("w") as f:
+                json.dump(data, f, indent=2, default=self._serialize_datetime)
+        except Exception as e:
+            self._logger.error(
+                "state_file_write_failed",
+                agent_work_order_id=agent_work_order_id,
+                error=str(e),
+                exc_info=True
+            )
+            raise
+
+    async def create(self, work_order: AgentWorkOrderState, metadata: dict[str, Any]) -> None:
+        """Create a new work order
+
+        Args:
+            work_order: Core work order state
+            metadata: Additional metadata (status, workflow_type, etc.)
+        """
+        async with self._lock:
+            data = {
+                "state": work_order.model_dump(mode="json"),
+                "metadata": metadata,
+                "step_history": None
+            }
+
+            await self._write_state_file(work_order.agent_work_order_id, data)
+
+            self._logger.info(
+                "work_order_created",
+                agent_work_order_id=work_order.agent_work_order_id,
+            )
+
+    async def get(self, agent_work_order_id: str) -> tuple[AgentWorkOrderState, dict[str, Any]] | None:
+        """Get a work order by ID
+
+        Args:
+            agent_work_order_id: Work order ID
+
+        Returns:
+            Tuple of (state, metadata) or None if not found
+        """
+        async with self._lock:
+            data = await self._read_state_file(agent_work_order_id)
+            if not data:
+                return None
+
+            state = AgentWorkOrderState(**data["state"])
+            metadata = data["metadata"]
+
+            return (state, metadata)
+
+    async def list(self, status_filter: AgentWorkOrderStatus | None = None) -> list[tuple[AgentWorkOrderState, dict[str, Any]]]:
+        """List all work orders
+
+        Args:
+            status_filter: Optional status to filter by
+
+        Returns:
+            List of (state, metadata) tuples
+        """
+        async with self._lock:
+            results = []
+
+            # Iterate over all JSON files in state directory
+            for state_file in self.state_directory.glob("*.json"):
+                try:
+                    with state_file.open("r") as f:
+                        data = json.load(f)
+
+                    state = AgentWorkOrderState(**data["state"])
+                    metadata = data["metadata"]
+
+                    if status_filter is None or metadata.get("status") == status_filter:
+                        results.append((state, metadata))
+
+                except Exception as e:
+                    self._logger.error(
+                        "state_file_load_failed",
+                        file=str(state_file),
+                        error=str(e)
+                    )
+                    continue
+
+            return results
+
+    async def update_status(
+        self,
+        agent_work_order_id: str,
+        status: AgentWorkOrderStatus,
+        **kwargs,
+    ) -> None:
+        """Update work order status and other fields
+
+        Args:
+            agent_work_order_id: Work order ID
+            status: New status
+            **kwargs: Additional fields to update
+        """
+        async with self._lock:
+            data = await self._read_state_file(agent_work_order_id)
+            if not data:
+                self._logger.warning(
+                    "work_order_not_found_for_update",
+                    agent_work_order_id=agent_work_order_id
+                )
+                return
+
+            data["metadata"]["status"] = status
+            data["metadata"]["updated_at"] = datetime.now().isoformat()
+
+            for key, value in kwargs.items():
+                data["metadata"][key] = value
+
+            await self._write_state_file(agent_work_order_id, data)
+
+            self._logger.info(
+                "work_order_status_updated",
+                agent_work_order_id=agent_work_order_id,
+                status=status.value,
+            )
+
+    async def update_git_branch(
+        self, agent_work_order_id: str, git_branch_name: str
+    ) -> None:
+        """Update git branch name in state
+
+        Args:
+            agent_work_order_id: Work order ID
+            git_branch_name: Git branch name
+        """
+        async with self._lock:
+            data = await self._read_state_file(agent_work_order_id)
+            if not data:
+                self._logger.warning(
+                    "work_order_not_found_for_update",
+                    agent_work_order_id=agent_work_order_id
+                )
+                return
+
+            data["state"]["git_branch_name"] = git_branch_name
+            data["metadata"]["updated_at"] = datetime.now().isoformat()
+
+            await self._write_state_file(agent_work_order_id, data)
+
+            self._logger.info(
+                "work_order_git_branch_updated",
+                agent_work_order_id=agent_work_order_id,
+                git_branch_name=git_branch_name,
+            )
+
+    async def update_session_id(
+        self, agent_work_order_id: str, agent_session_id: str
+    ) -> None:
+        """Update agent session ID in state
+
+        Args:
+            agent_work_order_id: Work order ID
+            agent_session_id: Claude CLI session ID
+        """
+        async with self._lock:
+            data = await self._read_state_file(agent_work_order_id)
+            if not data:
+                self._logger.warning(
+                    "work_order_not_found_for_update",
+                    agent_work_order_id=agent_work_order_id
+                )
+                return
+
+            data["state"]["agent_session_id"] = agent_session_id
+            data["metadata"]["updated_at"] = datetime.now().isoformat()
+
+            await self._write_state_file(agent_work_order_id, data)
+
+            self._logger.info(
+                "work_order_session_id_updated",
+                agent_work_order_id=agent_work_order_id,
+                agent_session_id=agent_session_id,
+            )
+
+    async def save_step_history(
+        self, agent_work_order_id: str, step_history: StepHistory
+    ) -> None:
+        """Save step execution history
+
+        Args:
+            agent_work_order_id: Work order ID
+            step_history: Step execution history
+        """
+        async with self._lock:
+            data = await self._read_state_file(agent_work_order_id)
+            if not data:
+                # Create minimal state if doesn't exist
+                data = {
+                    "state": {"agent_work_order_id": agent_work_order_id},
+                    "metadata": {},
+                    "step_history": None
+                }
+
+            data["step_history"] = step_history.model_dump(mode="json")
+
+            await self._write_state_file(agent_work_order_id, data)
+
+            self._logger.info(
+                "step_history_saved",
+                agent_work_order_id=agent_work_order_id,
+                step_count=len(step_history.steps),
+            )
+
+    async def get_step_history(self, agent_work_order_id: str) -> StepHistory | None:
+        """Get step execution history
+
+        Args:
+            agent_work_order_id: Work order ID
+
+        Returns:
+            Step history or None if not found
+        """
+        async with self._lock:
+            data = await self._read_state_file(agent_work_order_id)
+            if not data or not data.get("step_history"):
+                return None
+
+            return StepHistory(**data["step_history"])
+
+    async def delete(self, agent_work_order_id: str) -> None:
+        """Delete a work order state file
+
+        Args:
+            agent_work_order_id: Work order ID
+        """
+        async with self._lock:
+            state_file = self._get_state_file_path(agent_work_order_id)
+            if state_file.exists():
+                state_file.unlink()
+                self._logger.info(
+                    "work_order_deleted",
+                    agent_work_order_id=agent_work_order_id
+                )
+
+    def list_state_ids(self) -> "list[str]":  # type: ignore[valid-type]
+        """List all work order IDs with state files
+
+        Returns:
+            List of work order IDs
+        """
+        return [f.stem for f in self.state_directory.glob("*.json")]
--- a/python/src/agent_work_orders/state_manager/repository_factory.py
+++ b/python/src/agent_work_orders/state_manager/repository_factory.py
@@ -0,0 +1,43 @@
+"""Repository Factory
+
+Creates appropriate repository instances based on configuration.
+Supports both in-memory (for development/testing) and file-based (for production) storage.
+"""
+
+from ..config import config
+from ..utils.structured_logger import get_logger
+from .file_state_repository import FileStateRepository
+from .work_order_repository import WorkOrderRepository
+
+logger = get_logger(__name__)
+
+
+def create_repository() -> WorkOrderRepository | FileStateRepository:
+    """Create a work order repository based on configuration
+
+    Returns:
+        Repository instance (either in-memory or file-based)
+    """
+    storage_type = config.STATE_STORAGE_TYPE.lower()
+
+    if storage_type == "file":
+        state_dir = config.FILE_STATE_DIRECTORY
+        logger.info(
+            "repository_created",
+            storage_type="file",
+            state_directory=state_dir
+        )
+        return FileStateRepository(state_dir)
+    elif storage_type == "memory":
+        logger.info(
+            "repository_created",
+            storage_type="memory"
+        )
+        return WorkOrderRepository()
+    else:
+        logger.warning(
+            "unknown_storage_type",
+            storage_type=storage_type,
+            fallback="memory"
+        )
+        return WorkOrderRepository()
--- a/python/src/agent_work_orders/utils/port_allocation.py
+++ b/python/src/agent_work_orders/utils/port_allocation.py
@@ -0,0 +1,94 @@
+"""Port allocation utilities for isolated agent work order execution.
+
+Provides deterministic port allocation (backend: 9100-9114, frontend: 9200-9214)
+based on work order ID to enable parallel execution without port conflicts.
+"""
+
+import os
+import socket
+
+
+def get_ports_for_work_order(work_order_id: str) -> tuple[int, int]:
+    """Deterministically assign ports based on work order ID.
+
+    Args:
+        work_order_id: The work order identifier
+
+    Returns:
+        Tuple of (backend_port, frontend_port)
+    """
+    # Convert first 8 chars of work order ID to index (0-14)
+    # Using base 36 conversion and modulo for consistent mapping
+    try:
+        # Take first 8 alphanumeric chars and convert from base 36
+        id_chars = ''.join(c for c in work_order_id[:8] if c.isalnum())
+        index = int(id_chars, 36) % 15
+    except ValueError:
+        # Fallback to simple hash if conversion fails
+        index = hash(work_order_id) % 15
+
+    backend_port = 9100 + index
+    frontend_port = 9200 + index
+
+    return backend_port, frontend_port
+
+
+def is_port_available(port: int) -> bool:
+    """Check if a port is available for binding.
+
+    Args:
+        port: Port number to check
+
+    Returns:
+        True if port is available, False otherwise
+    """
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.settimeout(1)
+            s.bind(('localhost', port))
+            return True
+    except OSError:
+        return False
+
+
+def find_next_available_ports(work_order_id: str, max_attempts: int = 15) -> tuple[int, int]:
+    """Find available ports starting from deterministic assignment.
+
+    Args:
+        work_order_id: The work order ID
+        max_attempts: Maximum number of attempts (default 15)
+
+    Returns:
+        Tuple of (backend_port, frontend_port)
+
+    Raises:
+        RuntimeError: If no available ports found
+    """
+    base_backend, base_frontend = get_ports_for_work_order(work_order_id)
+    base_index = base_backend - 9100
+
+    for offset in range(max_attempts):
+        index = (base_index + offset) % 15
+        backend_port = 9100 + index
+        frontend_port = 9200 + index
+
+        if is_port_available(backend_port) and is_port_available(frontend_port):
+            return backend_port, frontend_port
+
+    raise RuntimeError("No available ports in the allocated range")
+
+
+def create_ports_env_file(worktree_path: str, backend_port: int, frontend_port: int) -> None:
+    """Create .ports.env file in worktree with port configuration.
+
+    Args:
+        worktree_path: Path to the worktree
+        backend_port: Backend port number
+        frontend_port: Frontend port number
+    """
+    ports_env_path = os.path.join(worktree_path, ".ports.env")
+
+    with open(ports_env_path, "w") as f:
+        f.write(f"BACKEND_PORT={backend_port}\n")
+        f.write(f"FRONTEND_PORT={frontend_port}\n")
+        f.write(f"VITE_BACKEND_URL=http://localhost:{backend_port}\n")
--- a/python/src/agent_work_orders/utils/worktree_operations.py
+++ b/python/src/agent_work_orders/utils/worktree_operations.py
@@ -0,0 +1,285 @@
+"""Worktree management operations for isolated agent work order execution.
+
+Provides utilities for creating and managing git worktrees under trees/<work_order_id>/
+to enable parallel execution in isolated environments.
+"""
+
+import hashlib
+import os
+import shutil
+import subprocess
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from ..config import config
+from .port_allocation import create_ports_env_file
+
+if TYPE_CHECKING:
+    import structlog
+
+
+def _get_repo_hash(repository_url: str) -> str:
+    """Get a short hash for repository URL.
+
+    Args:
+        repository_url: Git repository URL
+
+    Returns:
+        8-character hash of the repository URL
+    """
+    return hashlib.sha256(repository_url.encode()).hexdigest()[:8]
+
+
+def get_base_repo_path(repository_url: str) -> str:
+    """Get path to base repository clone.
+
+    Args:
+        repository_url: Git repository URL
+
+    Returns:
+        Absolute path to base repository directory
+    """
+    repo_hash = _get_repo_hash(repository_url)
+    base_path = config.ensure_temp_dir() / "repos" / repo_hash / "main"
+    return str(base_path)
+
+
+def get_worktree_path(repository_url: str, work_order_id: str) -> str:
+    """Get absolute path to worktree.
+
+    Args:
+        repository_url: Git repository URL
+        work_order_id: The work order ID
+
+    Returns:
+        Absolute path to worktree directory
+    """
+    repo_hash = _get_repo_hash(repository_url)
+    worktree_path = config.ensure_temp_dir() / "repos" / repo_hash / "trees" / work_order_id
+    return str(worktree_path)
+
+
+def ensure_base_repository(repository_url: str, logger: "structlog.stdlib.BoundLogger") -> tuple[str | None, str | None]:
+    """Ensure base repository clone exists.
+
+    Args:
+        repository_url: Git repository URL to clone
+        logger: Logger instance
+
+    Returns:
+        Tuple of (base_repo_path, error_message)
+    """
+    base_repo_path = get_base_repo_path(repository_url)
+
+    # If base repo already exists, just fetch latest
+    if os.path.exists(base_repo_path):
+        logger.info(f"Base repository exists at {base_repo_path}, fetching latest")
+        fetch_result = subprocess.run(
+            ["git", "fetch", "origin"],
+            capture_output=True,
+            text=True,
+            cwd=base_repo_path
+        )
+        if fetch_result.returncode != 0:
+            logger.warning(f"Failed to fetch from origin: {fetch_result.stderr}")
+        return base_repo_path, None
+
+    # Create parent directory
+    Path(base_repo_path).parent.mkdir(parents=True, exist_ok=True)
+
+    # Clone the repository
+    logger.info(f"Cloning base repository from {repository_url} to {base_repo_path}")
+    clone_result = subprocess.run(
+        ["git", "clone", repository_url, base_repo_path],
+        capture_output=True,
+        text=True
+    )
+
+    if clone_result.returncode != 0:
+        error_msg = f"Failed to clone repository: {clone_result.stderr}"
+        logger.error(error_msg)
+        return None, error_msg
+
+    logger.info(f"Created base repository at {base_repo_path}")
+    return base_repo_path, None
+
+
+def create_worktree(
+    repository_url: str,
+    work_order_id: str,
+    branch_name: str,
+    logger: "structlog.stdlib.BoundLogger"
+) -> tuple[str | None, str | None]:
+    """Create a git worktree for isolated execution.
+
+    Args:
+        repository_url: Git repository URL
+        work_order_id: The work order ID for this worktree
+        branch_name: The branch name to create the worktree from
+        logger: Logger instance
+
+    Returns:
+        Tuple of (worktree_path, error_message)
+        worktree_path is the absolute path if successful, None if error
+    """
+    # Ensure base repository exists
+    base_repo_path, error = ensure_base_repository(repository_url, logger)
+    if error or not base_repo_path:
+        return None, error
+
+    # Construct worktree path
+    worktree_path = get_worktree_path(repository_url, work_order_id)
+
+    # Check if worktree already exists
+    if os.path.exists(worktree_path):
+        logger.warning(f"Worktree already exists at {worktree_path}")
+        return worktree_path, None
+
+    # Create parent directory for worktrees
+    Path(worktree_path).parent.mkdir(parents=True, exist_ok=True)
+
+    # Fetch latest changes from origin
+    logger.info("Fetching latest changes from origin")
+    fetch_result = subprocess.run(
+        ["git", "fetch", "origin"],
+        capture_output=True,
+        text=True,
+        cwd=base_repo_path
+    )
+    if fetch_result.returncode != 0:
+        logger.warning(f"Failed to fetch from origin: {fetch_result.stderr}")
+
+    # Create the worktree using git, branching from origin/main
+    # Use -b to create the branch as part of worktree creation
+    cmd = ["git", "worktree", "add", "-b", branch_name, worktree_path, "origin/main"]
+    result = subprocess.run(cmd, capture_output=True, text=True, cwd=base_repo_path)
+
+    if result.returncode != 0:
+        # If branch already exists, try without -b
+        if "already exists" in result.stderr:
+            cmd = ["git", "worktree", "add", worktree_path, branch_name]
+            result = subprocess.run(cmd, capture_output=True, text=True, cwd=base_repo_path)
+
+        if result.returncode != 0:
+            error_msg = f"Failed to create worktree: {result.stderr}"
+            logger.error(error_msg)
+            return None, error_msg
+
+    logger.info(f"Created worktree at {worktree_path} for branch {branch_name}")
+    return worktree_path, None
+
+
+def validate_worktree(
+    repository_url: str,
+    work_order_id: str,
+    state: dict[str, Any]
+) -> tuple[bool, str | None]:
+    """Validate worktree exists in state, filesystem, and git.
+
+    Performs three-way validation to ensure consistency:
+    1. State has worktree_path
+    2. Directory exists on filesystem
+    3. Git knows about the worktree
+
+    Args:
+        repository_url: Git repository URL
+        work_order_id: The work order ID to validate
+        state: The work order state dictionary
+
+    Returns:
+        Tuple of (is_valid, error_message)
+    """
+    # Check state has worktree_path
+    worktree_path = state.get("worktree_path")
+    if not worktree_path:
+        return False, "No worktree_path in state"
+
+    # Check directory exists
+    if not os.path.exists(worktree_path):
+        return False, f"Worktree directory not found: {worktree_path}"
+
+    # Check git knows about it (query from base repository)
+    base_repo_path = get_base_repo_path(repository_url)
+    if not os.path.exists(base_repo_path):
+        return False, f"Base repository not found: {base_repo_path}"
+
+    result = subprocess.run(
+        ["git", "worktree", "list"],
+        capture_output=True,
+        text=True,
+        cwd=base_repo_path
+    )
+    if worktree_path not in result.stdout:
+        return False, "Worktree not registered with git"
+
+    return True, None
+
+
+def remove_worktree(
+    repository_url: str,
+    work_order_id: str,
+    logger: "structlog.stdlib.BoundLogger"
+) -> tuple[bool, str | None]:
+    """Remove a worktree and clean up.
+
+    Args:
+        repository_url: Git repository URL
+        work_order_id: The work order ID for the worktree to remove
+        logger: Logger instance
+
+    Returns:
+        Tuple of (success, error_message)
+    """
+    worktree_path = get_worktree_path(repository_url, work_order_id)
+    base_repo_path = get_base_repo_path(repository_url)
+
+    # First remove via git (if base repo exists)
+    if os.path.exists(base_repo_path):
+        cmd = ["git", "worktree", "remove", worktree_path, "--force"]
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            cwd=base_repo_path
+        )
+
+        if result.returncode != 0:
+            # Try to clean up manually if git command failed
+            if os.path.exists(worktree_path):
+                try:
+                    shutil.rmtree(worktree_path)
+                    logger.warning(f"Manually removed worktree directory: {worktree_path}")
+                except Exception as e:
+                    return False, f"Failed to remove worktree: {result.stderr}, manual cleanup failed: {e}"
+    else:
+        # If base repo doesn't exist, just remove directory
+        if os.path.exists(worktree_path):
+            try:
+                shutil.rmtree(worktree_path)
+                logger.info(f"Removed worktree directory (no base repo): {worktree_path}")
+            except Exception as e:
+                return False, f"Failed to remove worktree directory: {e}"
+
+    logger.info(f"Removed worktree at {worktree_path}")
+    return True, None
+
+
+def setup_worktree_environment(
+    worktree_path: str,
+    backend_port: int,
+    frontend_port: int,
+    logger: "structlog.stdlib.BoundLogger"
+) -> None:
+    """Set up worktree environment by creating .ports.env file.
+
+    The actual environment setup (copying .env files, installing dependencies) is handled
+    by separate commands which run inside the worktree.
+
+    Args:
+        worktree_path: Path to the worktree
+        backend_port: Backend port number
+        frontend_port: Frontend port number
+        logger: Logger instance
+    """
+    create_ports_env_file(worktree_path, backend_port, frontend_port)
+    logger.info(f"Created .ports.env with Backend: {backend_port}, Frontend: {frontend_port}")
--- a/python/src/agent_work_orders/workflow_engine/agent_names.py
+++ b/python/src/agent_work_orders/workflow_engine/agent_names.py
@@ -20,6 +20,7 @@ IMPLEMENTOR = "implementor"  # Implements changes
 # Validate Phase
 CODE_REVIEWER = "code_reviewer"  # Reviews code quality
 TESTER = "tester"  # Runs tests
+REVIEWER = "reviewer"  # Reviews against spec

 # Git Operations (support all phases)
 BRANCH_GENERATOR = "branch_generator"  # Creates branches
--- a/python/src/agent_work_orders/workflow_engine/review_workflow.py
+++ b/python/src/agent_work_orders/workflow_engine/review_workflow.py
@@ -0,0 +1,308 @@
+"""Review Workflow with Automatic Blocker Resolution
+
+Reviews implementation against spec and automatically resolves blocker issues with retry logic (max 3 attempts).
+"""
+
+import json
+from typing import TYPE_CHECKING
+
+from ..agent_executor.agent_cli_executor import AgentCLIExecutor
+from ..command_loader.claude_command_loader import ClaudeCommandLoader
+from ..models import StepExecutionResult, WorkflowStep
+from ..utils.structured_logger import get_logger
+from .agent_names import REVIEWER
+
+if TYPE_CHECKING:
+    import structlog
+
+logger = get_logger(__name__)
+
+
+class ReviewIssue:
+    """Represents a single review issue"""
+
+    def __init__(
+        self,
+        issue_title: str,
+        issue_description: str,
+        issue_severity: str,
+        affected_files: list[str],
+        screenshots: list[str] | None = None,
+    ):
+        self.issue_title = issue_title
+        self.issue_description = issue_description
+        self.issue_severity = issue_severity
+        self.affected_files = affected_files
+        self.screenshots = screenshots or []
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON serialization"""
+        return {
+            "issue_title": self.issue_title,
+            "issue_description": self.issue_description,
+            "issue_severity": self.issue_severity,
+            "affected_files": self.affected_files,
+            "screenshots": self.screenshots,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "ReviewIssue":
+        """Create ReviewIssue from dictionary"""
+        return cls(
+            issue_title=data["issue_title"],
+            issue_description=data["issue_description"],
+            issue_severity=data["issue_severity"],
+            affected_files=data["affected_files"],
+            screenshots=data.get("screenshots", []),
+        )
+
+
+class ReviewResult:
+    """Represents review execution result"""
+
+    def __init__(
+        self,
+        review_passed: bool,
+        review_issues: list[ReviewIssue],
+        screenshots: list[str] | None = None,
+    ):
+        self.review_passed = review_passed
+        self.review_issues = review_issues
+        self.screenshots = screenshots or []
+
+    def get_blocker_count(self) -> int:
+        """Get count of blocker issues"""
+        return sum(1 for issue in self.review_issues if issue.issue_severity == "blocker")
+
+    def get_blocker_issues(self) -> list[ReviewIssue]:
+        """Get list of blocker issues"""
+        return [issue for issue in self.review_issues if issue.issue_severity == "blocker"]
+
+
+async def run_review(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    spec_file: str,
+    work_order_id: str,
+    working_dir: str,
+    bound_logger: "structlog.stdlib.BoundLogger",
+) -> ReviewResult:
+    """Execute review against specification
+
+    Args:
+        executor: Agent CLI executor
+        command_loader: Command loader
+        spec_file: Path to specification file
+        work_order_id: Work order ID
+        working_dir: Working directory
+        bound_logger: Logger instance
+
+    Returns:
+        ReviewResult with issues found
+    """
+    bound_logger.info("review_execution_started", spec_file=spec_file)
+
+    # Execute review command
+    result = await executor.execute_command(
+        command_name="review_runner",
+        arguments=[spec_file, work_order_id],
+        working_directory=working_dir,
+        logger=bound_logger,
+    )
+
+    if not result.success:
+        bound_logger.error("review_execution_failed", error=result.error_message)
+        # Return empty review result indicating failure
+        return ReviewResult(review_passed=False, review_issues=[])
+
+    # Parse review results from output
+    return parse_review_results(result.result_text or result.stdout or "", bound_logger)
+
+
+def parse_review_results(
+    output: str, logger: "structlog.stdlib.BoundLogger"
+) -> ReviewResult:
+    """Parse review results from JSON output
+
+    Args:
+        output: Command output (should be JSON object)
+        logger: Logger instance
+
+    Returns:
+        ReviewResult
+    """
+    try:
+        # Try to parse as JSON
+        data = json.loads(output)
+
+        if not isinstance(data, dict):
+            logger.error("review_results_invalid_format", error="Expected JSON object")
+            return ReviewResult(review_passed=False, review_issues=[])
+
+        review_issues = [
+            ReviewIssue.from_dict(issue) for issue in data.get("review_issues", [])
+        ]
+        review_passed = data.get("review_passed", False)
+        screenshots = data.get("screenshots", [])
+
+        blocker_count = sum(1 for issue in review_issues if issue.issue_severity == "blocker")
+
+        logger.info(
+            "review_results_parsed",
+            review_passed=review_passed,
+            total_issues=len(review_issues),
+            blockers=blocker_count,
+        )
+
+        return ReviewResult(
+            review_passed=review_passed,
+            review_issues=review_issues,
+            screenshots=screenshots,
+        )
+
+    except json.JSONDecodeError as e:
+        logger.error("review_results_parse_failed", error=str(e), output_preview=output[:500])
+        return ReviewResult(review_passed=False, review_issues=[])
+
+
+async def resolve_review_issue(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    review_issue: ReviewIssue,
+    work_order_id: str,
+    working_dir: str,
+    bound_logger: "structlog.stdlib.BoundLogger",
+) -> StepExecutionResult:
+    """Resolve a single blocker review issue
+
+    Args:
+        executor: Agent CLI executor
+        command_loader: Command loader
+        review_issue: Review issue to resolve
+        work_order_id: Work order ID
+        working_dir: Working directory
+        bound_logger: Logger instance
+
+    Returns:
+        StepExecutionResult with resolution outcome
+    """
+    bound_logger.info(
+        "review_issue_resolution_started",
+        issue_title=review_issue.issue_title,
+        severity=review_issue.issue_severity,
+    )
+
+    # Convert review issue to JSON for passing to resolve command
+    issue_json = json.dumps(review_issue.to_dict())
+
+    # Execute resolve_failed_review command
+    result = await executor.execute_command(
+        command_name="resolve_failed_review",
+        arguments=[issue_json],
+        working_directory=working_dir,
+        logger=bound_logger,
+    )
+
+    if not result.success:
+        return StepExecutionResult(
+            step=WorkflowStep.RESOLVE_REVIEW,
+            agent_name=REVIEWER,
+            success=False,
+            output=result.result_text or result.stdout,
+            error_message=f"Review issue resolution failed: {result.error_message}",
+            duration_seconds=result.duration_seconds or 0,
+            session_id=result.session_id,
+        )
+
+    return StepExecutionResult(
+        step=WorkflowStep.RESOLVE_REVIEW,
+        agent_name=REVIEWER,
+        success=True,
+        output=f"Resolved review issue: {review_issue.issue_title}",
+        error_message=None,
+        duration_seconds=result.duration_seconds or 0,
+        session_id=result.session_id,
+    )
+
+
+async def run_review_with_resolution(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    spec_file: str,
+    work_order_id: str,
+    working_dir: str,
+    bound_logger: "structlog.stdlib.BoundLogger",
+    max_attempts: int = 3,
+) -> ReviewResult:
+    """Run review with automatic blocker resolution and retry logic
+
+    Tech debt and skippable issues are allowed to pass. Only blockers prevent completion.
+
+    Args:
+        executor: Agent CLI executor
+        command_loader: Command loader
+        spec_file: Path to specification file
+        work_order_id: Work order ID
+        working_dir: Working directory
+        bound_logger: Logger instance
+        max_attempts: Maximum retry attempts (default 3)
+
+    Returns:
+        Final ReviewResult
+    """
+    bound_logger.info("review_workflow_started", max_attempts=max_attempts)
+
+    for attempt in range(1, max_attempts + 1):
+        bound_logger.info("review_attempt_started", attempt=attempt)
+
+        # Run review
+        review_result = await run_review(
+            executor, command_loader, spec_file, work_order_id, working_dir, bound_logger
+        )
+
+        blocker_count = review_result.get_blocker_count()
+
+        if blocker_count == 0:
+            # No blockers, review passes (tech_debt and skippable are acceptable)
+            bound_logger.info(
+                "review_workflow_completed",
+                attempt=attempt,
+                outcome="no_blockers",
+                total_issues=len(review_result.review_issues),
+            )
+            return review_result
+
+        if attempt >= max_attempts:
+            # Max attempts reached
+            bound_logger.warning(
+                "review_workflow_max_attempts_reached",
+                attempt=attempt,
+                blocker_count=blocker_count,
+            )
+            return review_result
+
+        # Resolve each blocker issue
+        blocker_issues = review_result.get_blocker_issues()
+        bound_logger.info(
+            "review_issue_resolution_batch_started",
+            blocker_count=len(blocker_issues),
+        )
+
+        for blocker_issue in blocker_issues:
+            resolution_result = await resolve_review_issue(
+                executor,
+                command_loader,
+                blocker_issue,
+                work_order_id,
+                working_dir,
+                bound_logger,
+            )
+
+            if not resolution_result.success:
+                bound_logger.warning(
+                    "review_issue_resolution_failed",
+                    issue_title=blocker_issue.issue_title,
+                )
+
+    # Should not reach here, but return last result if we do
+    return review_result
--- a/python/src/agent_work_orders/workflow_engine/test_workflow.py
+++ b/python/src/agent_work_orders/workflow_engine/test_workflow.py
@@ -0,0 +1,311 @@
+"""Test Workflow with Automatic Resolution
+
+Executes test suite and automatically resolves failures with retry logic (max 4 attempts).
+"""
+
+import json
+from typing import TYPE_CHECKING
+
+from ..agent_executor.agent_cli_executor import AgentCLIExecutor
+from ..command_loader.claude_command_loader import ClaudeCommandLoader
+from ..models import StepExecutionResult, WorkflowStep
+from ..utils.structured_logger import get_logger
+from .agent_names import TESTER
+
+if TYPE_CHECKING:
+    import structlog
+
+logger = get_logger(__name__)
+
+
+class TestResult:
+    """Represents a single test result"""
+
+    def __init__(
+        self,
+        test_name: str,
+        passed: bool,
+        execution_command: str,
+        test_purpose: str,
+        error: str | None = None,
+    ):
+        self.test_name = test_name
+        self.passed = passed
+        self.execution_command = execution_command
+        self.test_purpose = test_purpose
+        self.error = error
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON serialization"""
+        return {
+            "test_name": self.test_name,
+            "passed": self.passed,
+            "execution_command": self.execution_command,
+            "test_purpose": self.test_purpose,
+            "error": self.error,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "TestResult":
+        """Create TestResult from dictionary"""
+        return cls(
+            test_name=data["test_name"],
+            passed=data["passed"],
+            execution_command=data["execution_command"],
+            test_purpose=data["test_purpose"],
+            error=data.get("error"),
+        )
+
+
+async def run_tests(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    work_order_id: str,
+    working_dir: str,
+    bound_logger: "structlog.stdlib.BoundLogger",
+) -> StepExecutionResult:
+    """Execute test suite and return results
+
+    Args:
+        executor: Agent CLI executor
+        command_loader: Command loader
+        work_order_id: Work order ID
+        working_dir: Working directory
+        bound_logger: Logger instance
+
+    Returns:
+        StepExecutionResult with test results
+    """
+    bound_logger.info("test_execution_started")
+
+    # Execute test command
+    result = await executor.execute_command(
+        command_name="test",
+        arguments=[],
+        working_directory=working_dir,
+        logger=bound_logger,
+    )
+
+    if not result.success:
+        return StepExecutionResult(
+            step=WorkflowStep.TEST,
+            agent_name=TESTER,
+            success=False,
+            output=result.result_text or result.stdout,
+            error_message=f"Test execution failed: {result.error_message}",
+            duration_seconds=result.duration_seconds or 0,
+            session_id=result.session_id,
+        )
+
+    # Parse test results from output
+    test_results, passed_count, failed_count = parse_test_results(
+        result.result_text or result.stdout or "", bound_logger
+    )
+
+    success = failed_count == 0
+    output_summary = f"Tests: {passed_count} passed, {failed_count} failed"
+
+    return StepExecutionResult(
+        step=WorkflowStep.TEST,
+        agent_name=TESTER,
+        success=success,
+        output=output_summary,
+        error_message=None if success else f"{failed_count} test(s) failed",
+        duration_seconds=result.duration_seconds or 0,
+        session_id=result.session_id,
+    )
+
+
+def parse_test_results(
+    output: str, logger: "structlog.stdlib.BoundLogger"
+) -> tuple[list[TestResult], int, int]:
+    """Parse test results from JSON output
+
+    Args:
+        output: Command output (should be JSON array)
+        logger: Logger instance
+
+    Returns:
+        Tuple of (test_results, passed_count, failed_count)
+    """
+    try:
+        # Try to parse as JSON
+        data = json.loads(output)
+
+        if not isinstance(data, list):
+            logger.error("test_results_invalid_format", error="Expected JSON array")
+            return [], 0, 0
+
+        test_results = [TestResult.from_dict(item) for item in data]
+        passed_count = sum(1 for t in test_results if t.passed)
+        failed_count = sum(1 for t in test_results if not t.passed)
+
+        logger.info(
+            "test_results_parsed",
+            passed=passed_count,
+            failed=failed_count,
+            total=len(test_results),
+        )
+
+        return test_results, passed_count, failed_count
+
+    except json.JSONDecodeError as e:
+        logger.error("test_results_parse_failed", error=str(e), output_preview=output[:500])
+        return [], 0, 0
+
+
+async def resolve_failed_test(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    test_result: TestResult,
+    work_order_id: str,
+    working_dir: str,
+    bound_logger: "structlog.stdlib.BoundLogger",
+) -> StepExecutionResult:
+    """Resolve a single failed test
+
+    Args:
+        executor: Agent CLI executor
+        command_loader: Command loader
+        test_result: Failed test result
+        work_order_id: Work order ID
+        working_dir: Working directory
+        bound_logger: Logger instance
+
+    Returns:
+        StepExecutionResult with resolution outcome
+    """
+    bound_logger.info(
+        "test_resolution_started",
+        test_name=test_result.test_name,
+    )
+
+    # Convert test result to JSON for passing to resolve command
+    test_json = json.dumps(test_result.to_dict())
+
+    # Execute resolve_failed_test command
+    result = await executor.execute_command(
+        command_name="resolve_failed_test",
+        arguments=[test_json],
+        working_directory=working_dir,
+        logger=bound_logger,
+    )
+
+    if not result.success:
+        return StepExecutionResult(
+            step=WorkflowStep.RESOLVE_TEST,
+            agent_name=TESTER,
+            success=False,
+            output=result.result_text or result.stdout,
+            error_message=f"Test resolution failed: {result.error_message}",
+            duration_seconds=result.duration_seconds or 0,
+            session_id=result.session_id,
+        )
+
+    return StepExecutionResult(
+        step=WorkflowStep.RESOLVE_TEST,
+        agent_name=TESTER,
+        success=True,
+        output=f"Resolved test: {test_result.test_name}",
+        error_message=None,
+        duration_seconds=result.duration_seconds or 0,
+        session_id=result.session_id,
+    )
+
+
+async def run_tests_with_resolution(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    work_order_id: str,
+    working_dir: str,
+    bound_logger: "structlog.stdlib.BoundLogger",
+    max_attempts: int = 4,
+) -> tuple[list[TestResult], int, int]:
+    """Run tests with automatic failure resolution and retry logic
+
+    Args:
+        executor: Agent CLI executor
+        command_loader: Command loader
+        work_order_id: Work order ID
+        working_dir: Working directory
+        bound_logger: Logger instance
+        max_attempts: Maximum retry attempts (default 4)
+
+    Returns:
+        Tuple of (final_test_results, passed_count, failed_count)
+    """
+    bound_logger.info("test_workflow_started", max_attempts=max_attempts)
+
+    for attempt in range(1, max_attempts + 1):
+        bound_logger.info("test_attempt_started", attempt=attempt)
+
+        # Run tests
+        test_result = await run_tests(
+            executor, command_loader, work_order_id, working_dir, bound_logger
+        )
+
+        if test_result.success:
+            bound_logger.info("test_workflow_completed", attempt=attempt, outcome="all_passed")
+            # Parse final results
+            # Re-run to get the actual test results
+            final_result = await executor.execute_command(
+                command_name="test",
+                arguments=[],
+                working_directory=working_dir,
+                logger=bound_logger,
+            )
+            final_results, passed, failed = parse_test_results(
+                final_result.result_text or final_result.stdout or "", bound_logger
+            )
+            return final_results, passed, failed
+
+        # Parse failures
+        test_execution = await executor.execute_command(
+            command_name="test",
+            arguments=[],
+            working_directory=working_dir,
+            logger=bound_logger,
+        )
+        test_results, passed_count, failed_count = parse_test_results(
+            test_execution.result_text or test_execution.stdout or "", bound_logger
+        )
+
+        if failed_count == 0:
+            # No failures, we're done
+            bound_logger.info("test_workflow_completed", attempt=attempt, outcome="all_passed")
+            return test_results, passed_count, failed_count
+
+        if attempt >= max_attempts:
+            # Max attempts reached
+            bound_logger.warning(
+                "test_workflow_max_attempts_reached",
+                attempt=attempt,
+                failed_count=failed_count,
+            )
+            return test_results, passed_count, failed_count
+
+        # Resolve each failed test
+        failed_tests = [t for t in test_results if not t.passed]
+        bound_logger.info(
+            "test_resolution_batch_started",
+            failed_count=len(failed_tests),
+        )
+
+        for failed_test in failed_tests:
+            resolution_result = await resolve_failed_test(
+                executor,
+                command_loader,
+                failed_test,
+                work_order_id,
+                working_dir,
+                bound_logger,
+            )
+
+            if not resolution_result.success:
+                bound_logger.warning(
+                    "test_resolution_failed",
+                    test_name=failed_test.test_name,
+                )
+
+    # Should not reach here, but return last results if we do
+    return test_results, passed_count, failed_count
--- a/python/src/agent_work_orders/workflow_engine/workflow_operations.py
+++ b/python/src/agent_work_orders/workflow_engine/workflow_operations.py
@@ -18,6 +18,8 @@ from .agent_names import (
    PLAN_FINDER,
    PLANNER,
    PR_CREATOR,
+    REVIEWER,
+    TESTER,
 )

 logger = get_logger(__name__)
@@ -442,3 +444,227 @@ async def create_pull_request(
            error_message=str(e),
            duration_seconds=duration,
        )
+
+
+async def run_tests(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    work_order_id: str,
+    working_dir: str,
+) -> StepExecutionResult:
+    """Execute test suite
+
+    Returns: StepExecutionResult with test results summary
+    """
+    start_time = time.time()
+
+    try:
+        command_file = command_loader.load_command("test")
+
+        cli_command, prompt_text = executor.build_command(command_file, args=[])
+
+        result = await executor.execute_async(
+            cli_command, working_dir, prompt_text=prompt_text, work_order_id=work_order_id
+        )
+
+        duration = time.time() - start_time
+
+        if result.success:
+            return StepExecutionResult(
+                step=WorkflowStep.TEST,
+                agent_name=TESTER,
+                success=True,
+                output=result.result_text or "Tests passed",
+                duration_seconds=duration,
+                session_id=result.session_id,
+            )
+        else:
+            return StepExecutionResult(
+                step=WorkflowStep.TEST,
+                agent_name=TESTER,
+                success=False,
+                error_message=result.error_message or "Tests failed",
+                output=result.result_text,
+                duration_seconds=duration,
+            )
+
+    except Exception as e:
+        duration = time.time() - start_time
+        logger.error("run_tests_error", error=str(e), exc_info=True)
+        return StepExecutionResult(
+            step=WorkflowStep.TEST,
+            agent_name=TESTER,
+            success=False,
+            error_message=str(e),
+            duration_seconds=duration,
+        )
+
+
+async def resolve_test_failure(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    test_failure_json: str,
+    work_order_id: str,
+    working_dir: str,
+) -> StepExecutionResult:
+    """Resolve a failed test
+
+    Args:
+        test_failure_json: JSON string with test failure details
+
+    Returns: StepExecutionResult with resolution outcome
+    """
+    start_time = time.time()
+
+    try:
+        command_file = command_loader.load_command("resolve_failed_test")
+
+        cli_command, prompt_text = executor.build_command(command_file, args=[test_failure_json])
+
+        result = await executor.execute_async(
+            cli_command, working_dir, prompt_text=prompt_text, work_order_id=work_order_id
+        )
+
+        duration = time.time() - start_time
+
+        if result.success:
+            return StepExecutionResult(
+                step=WorkflowStep.RESOLVE_TEST,
+                agent_name=TESTER,
+                success=True,
+                output=result.result_text or "Test failure resolved",
+                duration_seconds=duration,
+                session_id=result.session_id,
+            )
+        else:
+            return StepExecutionResult(
+                step=WorkflowStep.RESOLVE_TEST,
+                agent_name=TESTER,
+                success=False,
+                error_message=result.error_message or "Resolution failed",
+                duration_seconds=duration,
+            )
+
+    except Exception as e:
+        duration = time.time() - start_time
+        logger.error("resolve_test_failure_error", error=str(e), exc_info=True)
+        return StepExecutionResult(
+            step=WorkflowStep.RESOLVE_TEST,
+            agent_name=TESTER,
+            success=False,
+            error_message=str(e),
+            duration_seconds=duration,
+        )
+
+
+async def run_review(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    spec_file: str,
+    work_order_id: str,
+    working_dir: str,
+) -> StepExecutionResult:
+    """Execute review against specification
+
+    Returns: StepExecutionResult with review results
+    """
+    start_time = time.time()
+
+    try:
+        command_file = command_loader.load_command("review_runner")
+
+        cli_command, prompt_text = executor.build_command(
+            command_file, args=[spec_file, work_order_id]
+        )
+
+        result = await executor.execute_async(
+            cli_command, working_dir, prompt_text=prompt_text, work_order_id=work_order_id
+        )
+
+        duration = time.time() - start_time
+
+        if result.success:
+            return StepExecutionResult(
+                step=WorkflowStep.REVIEW,
+                agent_name=REVIEWER,
+                success=True,
+                output=result.result_text or "Review completed",
+                duration_seconds=duration,
+                session_id=result.session_id,
+            )
+        else:
+            return StepExecutionResult(
+                step=WorkflowStep.REVIEW,
+                agent_name=REVIEWER,
+                success=False,
+                error_message=result.error_message or "Review failed",
+                duration_seconds=duration,
+            )
+
+    except Exception as e:
+        duration = time.time() - start_time
+        logger.error("run_review_error", error=str(e), exc_info=True)
+        return StepExecutionResult(
+            step=WorkflowStep.REVIEW,
+            agent_name=REVIEWER,
+            success=False,
+            error_message=str(e),
+            duration_seconds=duration,
+        )
+
+
+async def resolve_review_issue(
+    executor: AgentCLIExecutor,
+    command_loader: ClaudeCommandLoader,
+    review_issue_json: str,
+    work_order_id: str,
+    working_dir: str,
+) -> StepExecutionResult:
+    """Resolve a review blocker issue
+
+    Args:
+        review_issue_json: JSON string with review issue details
+
+    Returns: StepExecutionResult with resolution outcome
+    """
+    start_time = time.time()
+
+    try:
+        command_file = command_loader.load_command("resolve_failed_review")
+
+        cli_command, prompt_text = executor.build_command(command_file, args=[review_issue_json])
+
+        result = await executor.execute_async(
+            cli_command, working_dir, prompt_text=prompt_text, work_order_id=work_order_id
+        )
+
+        duration = time.time() - start_time
+
+        if result.success:
+            return StepExecutionResult(
+                step=WorkflowStep.RESOLVE_REVIEW,
+                agent_name=REVIEWER,
+                success=True,
+                output=result.result_text or "Review issue resolved",
+                duration_seconds=duration,
+                session_id=result.session_id,
+            )
+        else:
+            return StepExecutionResult(
+                step=WorkflowStep.RESOLVE_REVIEW,
+                agent_name=REVIEWER,
+                success=False,
+                error_message=result.error_message or "Resolution failed",
+                duration_seconds=duration,
+            )
+
+    except Exception as e:
+        duration = time.time() - start_time
+        logger.error("resolve_review_issue_error", error=str(e), exc_info=True)
+        return StepExecutionResult(
+            step=WorkflowStep.RESOLVE_REVIEW,
+            agent_name=REVIEWER,
+            success=False,
+            error_message=str(e),
+            duration_seconds=duration,
+        )
--- a/python/src/agent_work_orders/workflow_engine/workflow_orchestrator.py
+++ b/python/src/agent_work_orders/workflow_engine/workflow_orchestrator.py
@@ -234,7 +234,78 @@ class WorkflowOrchestrator:

            bound_logger.info("step_completed", step="commit")

-            # Step 7: Create PR
+            # Step 7: Run tests (if enabled)
+            from ..config import config
+            if config.ENABLE_TEST_PHASE:
+                from .test_workflow import run_tests_with_resolution
+
+                bound_logger.info("test_phase_started")
+                test_results, passed_count, failed_count = await run_tests_with_resolution(
+                    self.agent_executor,
+                    self.command_loader,
+                    agent_work_order_id,
+                    sandbox.working_dir,
+                    bound_logger,
+                    max_attempts=config.MAX_TEST_RETRY_ATTEMPTS,
+                )
+
+                # Record test execution in step history
+                test_summary = f"Tests: {passed_count} passed, {failed_count} failed"
+                from ..models import StepExecutionResult
+                test_step = StepExecutionResult(
+                    step=WorkflowStep.TEST,
+                    agent_name="Tester",
+                    success=(failed_count == 0),
+                    output=test_summary,
+                    error_message=f"{failed_count} test(s) failed" if failed_count > 0 else None,
+                    duration_seconds=0,
+                )
+                step_history.steps.append(test_step)
+                await self.state_repository.save_step_history(agent_work_order_id, step_history)
+
+                if failed_count > 0:
+                    bound_logger.warning("test_phase_completed_with_failures", failed_count=failed_count)
+                else:
+                    bound_logger.info("test_phase_completed", passed_count=passed_count)
+
+            # Step 8: Run review (if enabled)
+            if config.ENABLE_REVIEW_PHASE:
+                from .review_workflow import run_review_with_resolution
+
+                # Determine spec file path from plan_file or default
+                spec_file = plan_file if plan_file else f"PRPs/specs/{issue_class}-spec.md"
+
+                bound_logger.info("review_phase_started", spec_file=spec_file)
+                review_result = await run_review_with_resolution(
+                    self.agent_executor,
+                    self.command_loader,
+                    spec_file,
+                    agent_work_order_id,
+                    sandbox.working_dir,
+                    bound_logger,
+                    max_attempts=config.MAX_REVIEW_RETRY_ATTEMPTS,
+                )
+
+                # Record review execution in step history
+                blocker_count = review_result.get_blocker_count()
+                review_summary = f"Review: {len(review_result.review_issues)} issues found, {blocker_count} blockers"
+                review_step = StepExecutionResult(
+                    step=WorkflowStep.REVIEW,
+                    agent_name="Reviewer",
+                    success=(blocker_count == 0),
+                    output=review_summary,
+                    error_message=f"{blocker_count} blocker(s) remaining" if blocker_count > 0 else None,
+                    duration_seconds=0,
+                )
+                step_history.steps.append(review_step)
+                await self.state_repository.save_step_history(agent_work_order_id, step_history)
+
+                if blocker_count > 0:
+                    bound_logger.warning("review_phase_completed_with_blockers", blocker_count=blocker_count)
+                else:
+                    bound_logger.info("review_phase_completed", issue_count=len(review_result.review_issues))
+
+            # Step 9: Create PR
            pr_result = await workflow_operations.create_pull_request(
                self.agent_executor,
                self.command_loader,