agent-fleet/src/core/task_queue.rs
Zer4tul 2658a74730 fix: resolve 3 CRITICAL + 5 MAJOR issues from Codex review
C1: Arc<Mutex<EventStore>> changed from tokio::sync to std::sync + spawn_blocking
C2: StateMachine::transition merged into single lock scope
C3: Transaction boundaries (BEGIN/COMMIT) on all composite writes
M4: retry_count no longer overwritten by update_task_status
M5: RetryPolicy::handle_failure now atomic (single lock + transaction)
M6: Per-task timeout_seconds used in SQL instead of global config
M7: Explicit Priority::order() method instead of relying on variant order
M8: dequeue_and_assign uses CAS-style WHERE status='created' for atomicity
2026-05-11 19:08:18 +08:00

70 lines
2.4 KiB
Rust

use std::sync::{Arc, Mutex};
use super::event_store::EventStore;
use super::models::*;
use super::state_machine::{StateError, StateMachine};
/// Global task queue ordered by priority.
pub struct TaskQueue {
sm: Arc<StateMachine>,
store: Arc<Mutex<EventStore>>,
}
impl TaskQueue {
pub fn new(sm: Arc<StateMachine>, store: Arc<Mutex<EventStore>>) -> Self {
Self { sm, store }
}
/// Enqueue a new task (status = created).
pub async fn enqueue(&self, task: Task) -> Result<Task, StateError> {
self.sm.create_task(&task).await
}
/// M8: Dequeue the highest-priority task matching capabilities.
/// Atomically transitions to `Assigned` inside a single DB transaction
/// via `dequeue_and_assign`, preventing concurrent dequeue of the same task.
pub async fn dequeue(
&self,
required_capabilities: &[String],
agent_id: Option<&str>,
) -> Result<Option<Task>, StateError> {
let caps = required_capabilities.to_vec();
let agent_id_owned = agent_id.map(String::from);
let store = self.store.clone();
tokio::task::spawn_blocking(move || -> Result<Option<Task>, StateError> {
let mut store = store.lock().map_err(|e| StateError::Poisoned(e.to_string()))?;
let now = chrono::Utc::now();
let event = TaskEvent {
event_id: uuid::Uuid::new_v4().to_string(),
// task_id filled inside dequeue_and_assign
task_id: String::new(),
event_type: "task.assigned".into(),
agent_id: agent_id_owned.clone(),
timestamp: now,
payload: serde_json::json!({
"from_status": "created",
"to_status": "assigned",
"reason": "dequeued",
}),
};
Ok(store.dequeue_and_assign(
&caps,
agent_id_owned.as_deref(),
now.to_rfc3339(),
&event,
)?)
})
.await
.map_err(StateError::Join)?
}
/// Re-queue a failed/agent_lost task (delegates to state machine transition).
pub async fn requeue(&self, task_id: &str) -> Result<Task, StateError> {
self.sm
.transition(task_id, TaskStatus::Assigned, None, "re-queued after failure")
.await
}
}