agent-fleet/src/main.rs
Zer4tul 48c93e2ce9 feat: dynamic execution mode — Undecided tasks, two-phase dispatch, assign API
- ExecutionMode enum adds Undecided variant (default for new tasks)
- Webhook creates tasks as Undecided instead of hardcoded SshCli
- Dispatch loop: Phase 1 matches ssh_cli hosts, Phase 2 marks remaining as HttpPull
- Dequeue now returns http_pull AND undecided tasks (atomic claim)
- New endpoint: POST /api/v1/tasks/{id}/assign for coordinator explicit assignment
- Backward compatible: existing SshCli/HttpPull tasks unaffected
- 37 tests passing (6 new)
2026-05-13 05:29:12 +08:00

94 lines
3.8 KiB
Rust

mod adapters;
mod api;
mod config;
mod core;
mod dispatch;
mod execution;
mod integrations;
use clap::Parser;
#[derive(Parser)]
#[command(name = "agent-fleet", about = "Agent Fleet Orchestrator")]
struct Cli {
#[arg(short, long, default_value = "config.toml")]
config: String,
#[arg(long)]
bind: Option<String>,
#[arg(short, long)]
port: Option<u16>,
}
#[tokio::main]
async fn main() {
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "agent_fleet=info,tower_http=info".into()),
)
.init();
let cli = Cli::parse();
let mut config = match config::Config::load(&cli.config) {
Ok(c) => c,
Err(e) => {
tracing::warn!("could not load config from {}: {e}, using defaults", cli.config);
config::Config::default()
}
};
if let Some(bind) = cli.bind {
config.server.bind = bind;
}
if let Some(port) = cli.port {
config.server.port = port;
}
let event_store = core::event_store::EventStore::open(std::path::Path::new(&config.orchestrator.db_path))
.expect("failed to open event store");
let store = std::sync::Arc::new(std::sync::Mutex::new(event_store));
let state_machine = std::sync::Arc::new(core::state_machine::StateMachine::new(store.clone()));
let timeout_checker = std::sync::Arc::new(core::timeout::TimeoutChecker::new(
state_machine.clone(),
store.clone(),
std::time::Duration::from_secs(30),
std::time::Duration::from_secs(config.orchestrator.task_timeout_secs),
));
tokio::spawn(async move { timeout_checker.run().await });
let heartbeat_timeout = (config.orchestrator.heartbeat_interval_secs
* config.orchestrator.heartbeat_timeout_threshold as u64) as i64;
let heartbeat_checker = std::sync::Arc::new(api::HeartbeatChecker::new(
store.clone(),
std::time::Duration::from_secs(config.orchestrator.heartbeat_interval_secs),
heartbeat_timeout,
));
tokio::spawn(async move { heartbeat_checker.run().await });
let dispatcher = dispatch::Dispatcher::new(config.clone(), store.clone(), state_machine.clone());
tokio::spawn(async move { dispatcher.run().await });
let app_state = api::AppState::new(config.clone(), store.clone());
let app = axum::Router::new()
.route("/healthz", axum::routing::get(|| async { "ok" }))
.route("/api/v1/agents/register", axum::routing::post(api::register_agent))
.route("/api/v1/agents/heartbeat", axum::routing::post(api::heartbeat))
.route("/api/v1/agents/deregister", axum::routing::post(api::deregister))
.route("/api/v1/agents", axum::routing::get(api::list_agents))
.route("/api/v1/tasks", axum::routing::get(api::list_tasks))
.route("/api/v1/tasks/dequeue", axum::routing::post(api::dequeue_task))
.route("/api/v1/tasks/{task_id}", axum::routing::get(api::get_task))
.route("/api/v1/tasks/{task_id}/assign", axum::routing::post(api::assign_task))
.route("/api/v1/tasks/{task_id}/status", axum::routing::post(api::update_task_status))
.route("/api/v1/tasks/{task_id}/complete", axum::routing::post(api::complete_task))
.route("/api/v1/tasks/{task_id}/retry", axum::routing::post(api::retry_task))
.route("/api/v1/receipts", axum::routing::post(api::submit_receipt))
.route("/api/v1/webhooks/forgejo", axum::routing::post(api::forgejo_webhook))
.with_state(app_state);
let listener = tokio::net::TcpListener::bind(format!("{}:{}", config.server.bind, config.server.port))
.await
.expect("failed to bind");
tracing::info!("listening on {}", listener.local_addr().unwrap());
axum::serve(listener, app).await.expect("server error");
}