fix: resolve 3 CRITICAL + 5 MAJOR issues from Codex review
C1: Arc<Mutex<EventStore>> changed from tokio::sync to std::sync + spawn_blocking C2: StateMachine::transition merged into single lock scope C3: Transaction boundaries (BEGIN/COMMIT) on all composite writes M4: retry_count no longer overwritten by update_task_status M5: RetryPolicy::handle_failure now atomic (single lock + transaction) M6: Per-task timeout_seconds used in SQL instead of global config M7: Explicit Priority::order() method instead of relying on variant order M8: dequeue_and_assign uses CAS-style WHERE status='created' for atomicity
This commit is contained in:
parent
b1a4d66c13
commit
2658a74730
7 changed files with 434 additions and 235 deletions
|
|
@ -1,5 +1,4 @@
|
|||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use super::event_store::EventStore;
|
||||
use super::models::*;
|
||||
|
|
@ -21,35 +20,48 @@ impl TaskQueue {
|
|||
self.sm.create_task(&task).await
|
||||
}
|
||||
|
||||
/// Dequeue the highest-priority task matching the given capabilities.
|
||||
/// M8: Dequeue the highest-priority task matching capabilities.
|
||||
/// Atomically transitions to `Assigned` inside a single DB transaction
|
||||
/// via `dequeue_and_assign`, preventing concurrent dequeue of the same task.
|
||||
pub async fn dequeue(
|
||||
&self,
|
||||
required_capabilities: &[String],
|
||||
agent_id: Option<&str>,
|
||||
) -> Result<Option<Task>, StateError> {
|
||||
let tasks = {
|
||||
let store = self.store.lock().await;
|
||||
store.query_queued_tasks()?
|
||||
};
|
||||
let caps = required_capabilities.to_vec();
|
||||
let agent_id_owned = agent_id.map(String::from);
|
||||
let store = self.store.clone();
|
||||
|
||||
if required_capabilities.is_empty() {
|
||||
return Ok(tasks.into_iter().next());
|
||||
}
|
||||
tokio::task::spawn_blocking(move || -> Result<Option<Task>, StateError> {
|
||||
let mut store = store.lock().map_err(|e| StateError::Poisoned(e.to_string()))?;
|
||||
let now = chrono::Utc::now();
|
||||
|
||||
for task in tasks {
|
||||
let all_match = required_capabilities
|
||||
.iter()
|
||||
.all(|cap| {
|
||||
task.labels.iter().any(|l| l == cap) || &task.task_type == cap
|
||||
});
|
||||
if all_match {
|
||||
return Ok(Some(task));
|
||||
}
|
||||
}
|
||||
let event = TaskEvent {
|
||||
event_id: uuid::Uuid::new_v4().to_string(),
|
||||
// task_id filled inside dequeue_and_assign
|
||||
task_id: String::new(),
|
||||
event_type: "task.assigned".into(),
|
||||
agent_id: agent_id_owned.clone(),
|
||||
timestamp: now,
|
||||
payload: serde_json::json!({
|
||||
"from_status": "created",
|
||||
"to_status": "assigned",
|
||||
"reason": "dequeued",
|
||||
}),
|
||||
};
|
||||
|
||||
Ok(None)
|
||||
Ok(store.dequeue_and_assign(
|
||||
&caps,
|
||||
agent_id_owned.as_deref(),
|
||||
now.to_rfc3339(),
|
||||
&event,
|
||||
)?)
|
||||
})
|
||||
.await
|
||||
.map_err(StateError::Join)?
|
||||
}
|
||||
|
||||
/// Re-queue a failed/agent_lost task (increment retry_count).
|
||||
/// Re-queue a failed/agent_lost task (delegates to state machine transition).
|
||||
pub async fn requeue(&self, task_id: &str) -> Result<Task, StateError> {
|
||||
self.sm
|
||||
.transition(task_id, TaskStatus::Assigned, None, "re-queued after failure")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue