Skip to content

Commit 08877af

Browse files
cexllSWE-Agent.ai
andcommitted
fix(harness): replace implicit .harness-active absence with explicit .harness-reflect marker for self-reflect triggering
Fixes two bugs: - False positive: stale harness-tasks.json from previous runs triggered self-reflect when harness wasn't active - False negative: self-reflect didn't trigger after harness completed all tasks Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>
1 parent 6834094 commit 08877af

3 files changed

Lines changed: 53 additions & 13 deletions

File tree

skills/harness/hooks/harness-stop.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,11 @@ def main() -> int:
260260
# If nothing left to do, allow stop
261261
if not pending_eligible and not retryable and not in_progress_blocking:
262262
_reset_block_counter(root)
263+
# Signal self-reflect hook BEFORE removing active marker
264+
try:
265+
(root / ".harness-reflect").touch()
266+
except Exception:
267+
pass
263268
try:
264269
(root / ".harness-active").unlink(missing_ok=True)
265270
except Exception:

skills/harness/hooks/self-reflect-stop.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,15 @@ def main() -> int:
146146
if not session_id:
147147
return 0 # 无 session_id,放行
148148

149-
# 守卫 1:harness 从未初始化过 → 完全不触发自检
149+
# 守卫:仅当 harness 完成所有任务后(.harness-reflect 存在)才触发自省
150+
# 这避免了两个问题:
151+
# 1. 历史残留的 harness-tasks.json 导致误触发(false positive)
152+
# 2. harness-stop.py 移除 .harness-active 后 Claude Code 跳过后续 hook(false negative)
150153
root = _find_harness_root(payload)
151154
if root is None:
152-
return 0 # harness 未曾使用,不触发自省
155+
return 0
153156

154-
# 守卫 2:harness 仍活跃 → 由 harness-stop.py 全权管理
155-
if (root / ".harness-active").is_file():
157+
if not (root / ".harness-reflect").is_file():
156158
return 0
157159

158160
# 读取最大迭代次数
@@ -168,8 +170,12 @@ def main() -> int:
168170
# 读取当前计数
169171
count = _read_counter(session_id)
170172

171-
# 超过最大次数,放行
173+
# 超过最大次数,清理 marker 并放行
172174
if count >= max_iter:
175+
try:
176+
(root / ".harness-reflect").unlink(missing_ok=True)
177+
except Exception:
178+
pass
173179
return 0
174180

175181
# 递增计数

skills/harness/tests/test_hooks.py

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def _payload(self, **extra):
200200
return {"cwd": self.tmpdir, **extra}
201201

202202
def test_all_completed_allows_stop(self):
203-
"""When all tasks are completed, stop is allowed."""
203+
"""When all tasks are completed, stop is allowed and .harness-reflect created."""
204204
write_tasks(self.root, [
205205
{"id": "t1", "status": "completed"},
206206
{"id": "t2", "status": "completed"},
@@ -209,6 +209,10 @@ def test_all_completed_allows_stop(self):
209209
self.assertEqual(code, 0)
210210
self.assertEqual(stdout, "")
211211
self.assertFalse((self.root / ".harness-active").exists())
212+
self.assertTrue(
213+
(self.root / ".harness-reflect").exists(),
214+
".harness-reflect should be created when all tasks complete",
215+
)
212216

213217
def test_pending_with_unmet_deps_allows_stop(self):
214218
"""Pending tasks with unmet dependencies don't block stop."""
@@ -644,7 +648,7 @@ def test_tasks_not_a_list(self):
644648

645649

646650
class TestSelfReflectStopHook(unittest.TestCase):
647-
"""self-reflect-stop.py must only trigger when harness was used and completed."""
651+
"""self-reflect-stop.py must only trigger when .harness-reflect marker exists."""
648652

649653
def setUp(self):
650654
self.tmpdir = tempfile.mkdtemp()
@@ -663,14 +667,18 @@ def tearDown(self):
663667
def _payload(self, session_id="test-reflect-001", **extra):
664668
return {"cwd": self.tmpdir, "session_id": session_id, **extra}
665669

670+
def _set_reflect(self):
671+
"""Create .harness-reflect marker (simulates harness completion)."""
672+
(self.root / ".harness-reflect").touch()
673+
666674
def test_no_harness_root_is_noop(self):
667675
"""When harness-tasks.json doesn't exist, hook is a complete no-op."""
668676
code, stdout, stderr = run_hook(REFLECT_HOOK, self._payload())
669677
self.assertEqual(code, 0)
670678
self.assertEqual(stdout, "", "Should produce no output when harness never used")
671679

672-
def test_harness_active_defers(self):
673-
"""When .harness-active exists, hook defers to harness-stop.py."""
680+
def test_harness_active_no_reflect_marker(self):
681+
"""When .harness-active exists but no .harness-reflect, hook is no-op."""
674682
write_tasks(self.root, [
675683
{"id": "t1", "status": "pending", "depends_on": []},
676684
])
@@ -679,12 +687,24 @@ def test_harness_active_defers(self):
679687
self.assertEqual(code, 0)
680688
self.assertEqual(stdout, "", "Should not self-reflect while harness is active")
681689

690+
def test_stale_tasks_without_reflect_marker_is_noop(self):
691+
"""Stale harness-tasks.json without .harness-reflect does NOT trigger (fixes false positive)."""
692+
write_tasks(self.root, [
693+
{"id": "t1", "status": "completed"},
694+
])
695+
deactivate(self.root)
696+
# No .harness-reflect marker — this is a stale file from a previous run
697+
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id="test-stale"))
698+
self.assertEqual(code, 0)
699+
self.assertEqual(stdout, "", "Stale harness-tasks.json should NOT trigger self-reflect")
700+
682701
def test_harness_completed_triggers_reflection(self):
683-
"""When harness-tasks.json exists but .harness-active removed, triggers self-reflection."""
702+
"""When .harness-reflect marker exists, triggers self-reflection."""
684703
write_tasks(self.root, [
685704
{"id": "t1", "status": "completed"},
686705
])
687706
deactivate(self.root)
707+
self._set_reflect()
688708
sid = "test-reflect-trigger"
689709
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
690710
self.assertEqual(code, 0)
@@ -696,6 +716,7 @@ def test_counter_increments(self):
696716
"""Each invocation increments the iteration counter."""
697717
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
698718
deactivate(self.root)
719+
self._set_reflect()
699720
sid = "test-reflect-counter"
700721

701722
# First call: iteration 1
@@ -708,10 +729,11 @@ def test_counter_increments(self):
708729
data = json.loads(stdout)
709730
self.assertIn("2/5", data["reason"])
710731

711-
def test_max_iterations_allows_stop(self):
712-
"""After max iterations, hook allows stop (no output)."""
732+
def test_max_iterations_allows_stop_and_cleans_marker(self):
733+
"""After max iterations, hook allows stop and removes .harness-reflect."""
713734
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
714735
deactivate(self.root)
736+
self._set_reflect()
715737
sid = "test-reflect-max"
716738

717739
# Write counter at max
@@ -721,11 +743,16 @@ def test_max_iterations_allows_stop(self):
721743
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
722744
self.assertEqual(code, 0)
723745
self.assertEqual(stdout, "", "Should allow stop after max iterations")
746+
self.assertFalse(
747+
(self.root / ".harness-reflect").exists(),
748+
".harness-reflect should be cleaned up after max iterations",
749+
)
724750

725751
def test_disabled_via_env(self):
726752
"""REFLECT_MAX_ITERATIONS=0 disables self-reflection."""
727753
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
728754
deactivate(self.root)
755+
self._set_reflect()
729756
code, stdout, _ = run_hook(
730757
REFLECT_HOOK,
731758
self._payload(session_id="test-reflect-disabled"),
@@ -738,14 +765,15 @@ def test_no_session_id_is_noop(self):
738765
"""Missing session_id makes hook a no-op."""
739766
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
740767
deactivate(self.root)
768+
self._set_reflect()
741769
code, stdout, _ = run_hook(REFLECT_HOOK, {"cwd": self.tmpdir})
742770
self.assertEqual(code, 0)
743771
self.assertEqual(stdout, "")
744772

745773
def test_empty_stdin_no_crash(self):
746774
"""Empty stdin doesn't crash."""
747775
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
748-
activate(self.root)
776+
self._set_reflect()
749777
proc = subprocess.run(
750778
[sys.executable, str(REFLECT_HOOK)],
751779
input="",
@@ -760,6 +788,7 @@ def test_harness_state_root_env_respected(self):
760788
"""HARNESS_STATE_ROOT env var is used for root discovery."""
761789
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
762790
deactivate(self.root)
791+
self._set_reflect()
763792
sid = "test-reflect-env"
764793
code, stdout, _ = run_hook(
765794
REFLECT_HOOK,

0 commit comments

Comments
 (0)