feat: implement orchestrator core (Rust)
Task 1.1: ✅ Cargo.toml with axum, rusqlite, matrix-sdk, serde, etc. Task 1.2: ✅ Directory structure: src/core, src/adapters, src/integrations, src/api Task 1.5: ✅ config.example.toml with full schema Task 2.1: ✅ Data models: Agent, Task, Receipt, Artifact, TaskEvent Task 2.2: ✅ Event Store: SQLite append-only with task/agent tables Task 2.3: ✅ Task state machine: created→assigned→running→completed/failed Task 2.4: ✅ Global task queue with priority ordering Task 2.5: ✅ Background timeout checker Task 2.6: ✅ Retry policy with configurable max_retries Compiles clean (warnings only, no errors). API handler stubs in place for Phase 2.
This commit is contained in:
parent
e983955036
commit
4e01728a67
15 changed files with 5220 additions and 3 deletions
64
src/core/retry.rs
Normal file
64
src/core/retry.rs
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use super::event_store::EventStore;
|
||||
use super::models::*;
|
||||
use super::state_machine::{StateError, StateMachine};
|
||||
use super::task_queue::TaskQueue;
|
||||
|
||||
/// Retry logic for failed/agent_lost tasks.
|
||||
pub struct RetryPolicy {
|
||||
sm: Arc<StateMachine>,
|
||||
_queue: Arc<TaskQueue>,
|
||||
store: Arc<Mutex<EventStore>>,
|
||||
}
|
||||
|
||||
impl RetryPolicy {
|
||||
pub fn new(
|
||||
sm: Arc<StateMachine>,
|
||||
queue: Arc<TaskQueue>,
|
||||
store: Arc<Mutex<EventStore>>,
|
||||
) -> Self {
|
||||
Self { sm, _queue: queue, store }
|
||||
}
|
||||
|
||||
/// Handle a failed task: retry if under limit, otherwise mark permanently failed.
|
||||
pub async fn handle_failure(
|
||||
&self,
|
||||
task_id: &str,
|
||||
_agent_id: Option<&str>,
|
||||
reason: &str,
|
||||
) -> Result<RetryDecision, StateError> {
|
||||
let task = {
|
||||
let store = self.store.lock().await;
|
||||
store.read_task(task_id)?.ok_or(StateError::TaskNotFound(task_id.to_string()))?
|
||||
};
|
||||
|
||||
if task.retry_count < task.max_retries {
|
||||
// Increment retry count
|
||||
{
|
||||
let store = self.store.lock().await;
|
||||
store.increment_retry_count(task_id)?;
|
||||
}
|
||||
|
||||
// Transition back to assigned
|
||||
self.sm
|
||||
.transition(task_id, TaskStatus::Assigned, None, &format!("retry: {reason}"))
|
||||
.await?;
|
||||
|
||||
Ok(RetryDecision::Retried {
|
||||
attempt: task.retry_count + 1,
|
||||
max: task.max_retries,
|
||||
})
|
||||
} else {
|
||||
tracing::warn!(task_id = task_id, retries = task.retry_count, "max retries exceeded");
|
||||
Ok(RetryDecision::Exhausted)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum RetryDecision {
|
||||
Retried { attempt: u32, max: u32 },
|
||||
Exhausted,
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue