From 6fe9c110e2d2d02968de418bc84635d78964667b Mon Sep 17 00:00:00 2001
From: Rasmus Widing <rasmus.widing@gmail.com>
Date: Thu, 16 Oct 2025 19:33:45 +0300
Subject: [PATCH] test: update agent work order tests for new workflow
 architecture

---
 python/tests/agent_work_orders/test_api.py    | 18 +++---
 .../agent_work_orders/test_sandbox_manager.py |  7 ---
 .../agent_work_orders/test_state_manager.py   | 24 ++++----
 .../test_workflow_orchestrator.py             | 59 ++++++++++---------
 4 files changed, 53 insertions(+), 55 deletions(-)

diff --git a/python/tests/agent_work_orders/test_api.py b/python/tests/agent_work_orders/test_api.py
index 3a863496..9fa4abf0 100644
--- a/python/tests/agent_work_orders/test_api.py
+++ b/python/tests/agent_work_orders/test_api.py
@@ -309,15 +309,15 @@ def test_get_agent_work_order_steps():
         agent_work_order_id="wo-test123",
         steps=[
             StepExecutionResult(
-                step=WorkflowStep.CLASSIFY,
-                agent_name="classifier",
+                step=WorkflowStep.CREATE_BRANCH,
+                agent_name="BranchCreator",
                 success=True,
-                output="/feature",
+                output="feat/test-feature",
                 duration_seconds=1.0,
             ),
             StepExecutionResult(
-                step=WorkflowStep.PLAN,
-                agent_name="planner",
+                step=WorkflowStep.PLANNING,
+                agent_name="Planner",
                 success=True,
                 output="Plan created",
                 duration_seconds=5.0,
@@ -334,11 +334,11 @@ def test_get_agent_work_order_steps():
         data = response.json()
         assert data["agent_work_order_id"] == "wo-test123"
         assert len(data["steps"]) == 2
-        assert data["steps"][0]["step"] == "classify"
-        assert data["steps"][0]["agent_name"] == "classifier"
+        assert data["steps"][0]["step"] == "create-branch"
+        assert data["steps"][0]["agent_name"] == "BranchCreator"
         assert data["steps"][0]["success"] is True
-        assert data["steps"][1]["step"] == "plan"
-        assert data["steps"][1]["agent_name"] == "planner"
+        assert data["steps"][1]["step"] == "planning"
+        assert data["steps"][1]["agent_name"] == "Planner"
 
 
 def test_get_agent_work_order_steps_not_found():
diff --git a/python/tests/agent_work_orders/test_sandbox_manager.py b/python/tests/agent_work_orders/test_sandbox_manager.py
index 01ef9007..87ba8c33 100644
--- a/python/tests/agent_work_orders/test_sandbox_manager.py
+++ b/python/tests/agent_work_orders/test_sandbox_manager.py
@@ -183,13 +183,6 @@ def test_sandbox_factory_not_implemented():
     """Test creating unsupported sandbox types"""
     factory = SandboxFactory()
 
-    with pytest.raises(NotImplementedError):
-        factory.create_sandbox(
-            sandbox_type=SandboxType.GIT_WORKTREE,
-            repository_url="https://github.com/owner/repo",
-            sandbox_identifier="sandbox-test",
-        )
-
     with pytest.raises(NotImplementedError):
         factory.create_sandbox(
             sandbox_type=SandboxType.E2B,
diff --git a/python/tests/agent_work_orders/test_state_manager.py b/python/tests/agent_work_orders/test_state_manager.py
index 3e01e9af..69da5a82 100644
--- a/python/tests/agent_work_orders/test_state_manager.py
+++ b/python/tests/agent_work_orders/test_state_manager.py
@@ -243,16 +243,16 @@ async def test_save_and_get_step_history():
     repo = WorkOrderRepository()
 
     step1 = StepExecutionResult(
-        step=WorkflowStep.CLASSIFY,
-        agent_name="classifier",
+        step=WorkflowStep.CREATE_BRANCH,
+        agent_name="BranchCreator",
         success=True,
-        output="/feature",
+        output="feat/test-feature",
         duration_seconds=1.0,
     )
 
     step2 = StepExecutionResult(
-        step=WorkflowStep.PLAN,
-        agent_name="planner",
+        step=WorkflowStep.PLANNING,
+        agent_name="Planner",
         success=True,
         output="Plan created",
         duration_seconds=5.0,
@@ -266,8 +266,8 @@ async def test_save_and_get_step_history():
     assert retrieved is not None
     assert retrieved.agent_work_order_id == "wo-test123"
     assert len(retrieved.steps) == 2
-    assert retrieved.steps[0].step == WorkflowStep.CLASSIFY
-    assert retrieved.steps[1].step == WorkflowStep.PLAN
+    assert retrieved.steps[0].step == WorkflowStep.CREATE_BRANCH
+    assert retrieved.steps[1].step == WorkflowStep.PLANNING
 
 
 @pytest.mark.asyncio
@@ -286,10 +286,10 @@ async def test_update_step_history():
 
     # Initial history
     step1 = StepExecutionResult(
-        step=WorkflowStep.CLASSIFY,
-        agent_name="classifier",
+        step=WorkflowStep.CREATE_BRANCH,
+        agent_name="BranchCreator",
         success=True,
-        output="/feature",
+        output="feat/test-feature",
         duration_seconds=1.0,
     )
 
@@ -298,8 +298,8 @@ async def test_update_step_history():
 
     # Add more steps
     step2 = StepExecutionResult(
-        step=WorkflowStep.PLAN,
-        agent_name="planner",
+        step=WorkflowStep.PLANNING,
+        agent_name="Planner",
         success=True,
         output="Plan created",
         duration_seconds=5.0,
diff --git a/python/tests/agent_work_orders/test_workflow_orchestrator.py b/python/tests/agent_work_orders/test_workflow_orchestrator.py
index 9fb05bff..832492c7 100644
--- a/python/tests/agent_work_orders/test_workflow_orchestrator.py
+++ b/python/tests/agent_work_orders/test_workflow_orchestrator.py
@@ -191,15 +191,14 @@ async def test_execute_workflow_stop_on_failure(mock_dependencies):
             duration_seconds=5.0,
         )
 
-        # Execute workflow - should stop at planning
-        with pytest.raises(WorkflowExecutionError, match="Planning failed"):
-            await orchestrator.execute_workflow(
-                agent_work_order_id="wo-test",
-                repository_url="https://github.com/owner/repo",
-                sandbox_type=SandboxType.GIT_BRANCH,
-                user_request="Test feature",
-                selected_commands=["create-branch", "planning", "execute"],
-            )
+        # Execute workflow - should stop at planning and save error to state
+        await orchestrator.execute_workflow(
+            agent_work_order_id="wo-test",
+            repository_url="https://github.com/owner/repo",
+            sandbox_type=SandboxType.GIT_BRANCH,
+            user_request="Test feature",
+            selected_commands=["create-branch", "planning", "execute"],
+        )
 
         # Verify only first 2 commands executed, not the third
         assert mock_branch.called
@@ -334,17 +333,24 @@ async def test_execute_workflow_updates_pr_url(mock_dependencies):
 
 @pytest.mark.asyncio
 async def test_execute_workflow_unknown_command(mock_dependencies):
-    """Test that unknown commands raise error"""
+    """Test that unknown commands save error to state"""
     orchestrator, mocks = mock_dependencies
 
-    with pytest.raises(WorkflowExecutionError, match="Unknown command"):
-        await orchestrator.execute_workflow(
-            agent_work_order_id="wo-test",
-            repository_url="https://github.com/owner/repo",
-            sandbox_type=SandboxType.GIT_BRANCH,
-            user_request="Test feature",
-            selected_commands=["invalid-command"],
-        )
+    await orchestrator.execute_workflow(
+        agent_work_order_id="wo-test",
+        repository_url="https://github.com/owner/repo",
+        sandbox_type=SandboxType.GIT_BRANCH,
+        user_request="Test feature",
+        selected_commands=["invalid-command"],
+    )
+
+    # Verify error was saved to state
+    status_calls = [call for call in mocks["state_repository"].update_status.call_args_list
+                   if call[0][1] == AgentWorkOrderStatus.FAILED]
+    assert len(status_calls) > 0
+    # Check that error message contains "Unknown command"
+    error_messages = [call.kwargs.get("error_message", "") for call in status_calls]
+    assert any("Unknown command" in msg for msg in error_messages)
 
 
 @pytest.mark.asyncio
@@ -362,14 +368,13 @@ async def test_execute_workflow_sandbox_cleanup(mock_dependencies):
             duration_seconds=1.0,
         )
 
-        with pytest.raises(WorkflowExecutionError):
-            await orchestrator.execute_workflow(
-                agent_work_order_id="wo-test",
-                repository_url="https://github.com/owner/repo",
-                sandbox_type=SandboxType.GIT_BRANCH,
-                user_request="Test feature",
-                selected_commands=["create-branch"],
-            )
+        await orchestrator.execute_workflow(
+            agent_work_order_id="wo-test",
+            repository_url="https://github.com/owner/repo",
+            sandbox_type=SandboxType.GIT_BRANCH,
+            user_request="Test feature",
+            selected_commands=["create-branch"],
+        )
 
-        # Verify sandbox cleanup was called
+        # Verify sandbox cleanup was called even on failure
         assert mocks["sandbox"].cleanup.called