fix: resolve 3 CRITICAL + 5 MAJOR issues from Codex review
C1: Arc<Mutex<EventStore>> changed from tokio::sync to std::sync + spawn_blocking C2: StateMachine::transition merged into single lock scope C3: Transaction boundaries (BEGIN/COMMIT) on all composite writes M4: retry_count no longer overwritten by update_task_status M5: RetryPolicy::handle_failure now atomic (single lock + transaction) M6: Per-task timeout_seconds used in SQL instead of global config M7: Explicit Priority::order() method instead of relying on variant order M8: dequeue_and_assign uses CAS-style WHERE status='created' for atomicity
This commit is contained in:
parent
b1a4d66c13
commit
2658a74730
7 changed files with 434 additions and 235 deletions
|
|
@ -1,7 +1,6 @@
|
|||
use chrono::Utc;
|
||||
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use super::event_store::EventStore;
|
||||
use super::models::*;
|
||||
|
|
@ -15,6 +14,7 @@ impl StateMachine {
|
|||
Self { store }
|
||||
}
|
||||
|
||||
/// C1 + C2: Single lock scope, spawn_blocking, transactional transition.
|
||||
pub async fn transition(
|
||||
&self,
|
||||
task_id: &str,
|
||||
|
|
@ -22,63 +22,86 @@ impl StateMachine {
|
|||
agent_id: Option<&str>,
|
||||
reason: &str,
|
||||
) -> Result<Task, StateError> {
|
||||
let store = self.store.lock().await;
|
||||
let task_id = task_id.to_string();
|
||||
let reason = reason.to_string();
|
||||
let agent_id_owned = agent_id.map(String::from);
|
||||
let store = self.store.clone();
|
||||
|
||||
let task = store.read_task(task_id)?
|
||||
.ok_or(StateError::TaskNotFound(task_id.to_string()))?;
|
||||
tokio::task::spawn_blocking(move || -> Result<Task, StateError> {
|
||||
let mut store = store.lock().map_err(|e| StateError::Poisoned(e.to_string()))?;
|
||||
|
||||
Self::validate_transition(&task.status, &new_status)?;
|
||||
let task = store
|
||||
.read_task(&task_id)?
|
||||
.ok_or_else(|| StateError::TaskNotFound(task_id.clone()))?;
|
||||
|
||||
let now = Utc::now();
|
||||
Self::validate_transition(&task.status, &new_status)?;
|
||||
|
||||
store.update_task_status(
|
||||
task_id,
|
||||
new_status.as_str(),
|
||||
agent_id,
|
||||
if new_status == TaskStatus::Assigned { Some(now.to_rfc3339()) } else { None },
|
||||
if new_status == TaskStatus::Running { Some(now.to_rfc3339()) } else { None },
|
||||
if matches!(new_status, TaskStatus::Completed | TaskStatus::Failed | TaskStatus::Cancelled) { Some(now.to_rfc3339()) } else { None },
|
||||
task.retry_count,
|
||||
)?;
|
||||
let now = Utc::now();
|
||||
let event = TaskEvent {
|
||||
event_id: uuid::Uuid::new_v4().to_string(),
|
||||
task_id: task_id.clone(),
|
||||
event_type: format!("task.{}", new_status.as_str()),
|
||||
agent_id: agent_id_owned.clone(),
|
||||
timestamp: now,
|
||||
payload: serde_json::json!({
|
||||
"from_status": task.status.as_str(),
|
||||
"to_status": new_status.as_str(),
|
||||
"reason": reason,
|
||||
}),
|
||||
};
|
||||
|
||||
let event = TaskEvent {
|
||||
event_id: uuid::Uuid::new_v4().to_string(),
|
||||
task_id: task_id.to_string(),
|
||||
event_type: format!("task.{}", new_status.as_str()),
|
||||
agent_id: agent_id.map(String::from),
|
||||
timestamp: now,
|
||||
payload: serde_json::json!({
|
||||
"from_status": task.status.as_str(),
|
||||
"to_status": new_status.as_str(),
|
||||
"reason": reason,
|
||||
}),
|
||||
};
|
||||
store.append_event(&event)?;
|
||||
|
||||
drop(store);
|
||||
|
||||
// Re-read to return updated task
|
||||
let store = self.store.lock().await;
|
||||
let updated = store.read_task(task_id)?.unwrap();
|
||||
Ok(updated)
|
||||
Ok(store.transition_task(
|
||||
&task_id,
|
||||
new_status.as_str(),
|
||||
agent_id_owned.as_deref(),
|
||||
if new_status == TaskStatus::Assigned {
|
||||
Some(now.to_rfc3339())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
if new_status == TaskStatus::Running {
|
||||
Some(now.to_rfc3339())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
if matches!(
|
||||
new_status,
|
||||
TaskStatus::Completed | TaskStatus::Failed | TaskStatus::Cancelled
|
||||
) {
|
||||
Some(now.to_rfc3339())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
&event,
|
||||
)?)
|
||||
})
|
||||
.await
|
||||
.map_err(StateError::Join)?
|
||||
}
|
||||
|
||||
pub async fn create_task(&self, task: &Task) -> Result<Task, StateError> {
|
||||
let store = self.store.lock().await;
|
||||
let task = task.clone();
|
||||
let store = self.store.clone();
|
||||
|
||||
store.insert_task(task)?;
|
||||
tokio::task::spawn_blocking(move || -> Result<Task, StateError> {
|
||||
let store = store.lock().map_err(|e| StateError::Poisoned(e.to_string()))?;
|
||||
|
||||
let event = TaskEvent {
|
||||
event_id: uuid::Uuid::new_v4().to_string(),
|
||||
task_id: task.task_id.clone(),
|
||||
event_type: "task.created".into(),
|
||||
agent_id: None,
|
||||
timestamp: Utc::now(),
|
||||
payload: serde_json::json!({ "source": task.source }),
|
||||
};
|
||||
store.append_event(&event)?;
|
||||
store.insert_task(&task)?;
|
||||
|
||||
Ok(task.clone())
|
||||
let event = TaskEvent {
|
||||
event_id: uuid::Uuid::new_v4().to_string(),
|
||||
task_id: task.task_id.clone(),
|
||||
event_type: "task.created".into(),
|
||||
agent_id: None,
|
||||
timestamp: Utc::now(),
|
||||
payload: serde_json::json!({ "source": task.source }),
|
||||
};
|
||||
store.append_event_direct(&event)?;
|
||||
|
||||
Ok(task)
|
||||
})
|
||||
.await
|
||||
.map_err(StateError::Join)?
|
||||
}
|
||||
|
||||
fn validate_transition(from: &TaskStatus, to: &TaskStatus) -> Result<(), StateError> {
|
||||
|
|
@ -87,7 +110,10 @@ impl StateMachine {
|
|||
TaskStatus::Assigned => matches!(to, TaskStatus::Running | TaskStatus::Cancelled),
|
||||
TaskStatus::Running => matches!(
|
||||
to,
|
||||
TaskStatus::Completed | TaskStatus::Failed | TaskStatus::AgentLost | TaskStatus::Cancelled
|
||||
TaskStatus::Completed
|
||||
| TaskStatus::Failed
|
||||
| TaskStatus::AgentLost
|
||||
| TaskStatus::Cancelled
|
||||
),
|
||||
TaskStatus::Failed | TaskStatus::AgentLost => {
|
||||
matches!(to, TaskStatus::Assigned | TaskStatus::Cancelled)
|
||||
|
|
@ -125,4 +151,8 @@ pub enum StateError {
|
|||
InvalidTransition(String, String),
|
||||
#[error("database error: {0}")]
|
||||
Database(#[from] rusqlite::Error),
|
||||
#[error("task join error: {0}")]
|
||||
Join(#[from] tokio::task::JoinError),
|
||||
#[error("mutex poisoned: {0}")]
|
||||
Poisoned(String),
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue