feat: agent registry API + heartbeat checker + core unit tests

Tasks completed:
- 2.7: Core unit tests (14 tests: state machine, event store, queue, timeout, retry)
- 3.1: POST /api/v1/agents/register (upsert on duplicate)
- 3.2: POST /api/v1/agents/heartbeat
- 3.3: POST /api/v1/agents/deregister (offline + requeue running tasks)
- 3.4: GET /api/v1/agents (filter by capability + status)
- 3.5: Background heartbeat checker (marks offline, sets tasks agent_lost)
- 3.6: API unit tests (register, duplicate, heartbeat, deregister, checker)

All 14 tests pass. cargo check clean (warnings only).
This commit is contained in:
Zer4tul 2026-05-11 19:29:16 +08:00
parent 2658a74730
commit b75546bda6
9 changed files with 1023 additions and 115 deletions

View file

@ -1,3 +1,4 @@
mod api;
mod config;
mod core;
@ -51,16 +52,18 @@ async fn main() {
config.server.port
);
// Initialize event store
let event_store = core::event_store::EventStore::open(std::path::Path::new(&config.orchestrator.db_path))
.expect("failed to open event store");
let event_store = core::event_store::EventStore::open(std::path::Path::new(
&config.orchestrator.db_path,
))
.expect("failed to open event store");
let store = std::sync::Arc::new(std::sync::Mutex::new(event_store));
// Initialize core components
let state_machine = std::sync::Arc::new(core::state_machine::StateMachine::new(store.clone()));
let task_queue = std::sync::Arc::new(core::task_queue::TaskQueue::new(state_machine.clone(), store.clone()));
let _task_queue = std::sync::Arc::new(core::task_queue::TaskQueue::new(
state_machine.clone(),
store.clone(),
));
// Start timeout checker
let timeout_checker = std::sync::Arc::new(core::timeout::TimeoutChecker::new(
state_machine.clone(),
store.clone(),
@ -69,32 +72,25 @@ async fn main() {
));
tokio::spawn(async move { timeout_checker.run().await });
// Build axum router (API stubs for now)
let heartbeat_timeout = (config.orchestrator.heartbeat_interval_secs
* config.orchestrator.heartbeat_timeout_threshold as u64) as i64;
let heartbeat_checker = std::sync::Arc::new(api::HeartbeatChecker::new(
store.clone(),
std::time::Duration::from_secs(config.orchestrator.heartbeat_interval_secs),
heartbeat_timeout,
));
tokio::spawn(async move { heartbeat_checker.run().await });
let app = axum::Router::new()
.route("/healthz", axum::routing::get(|| async { "ok" }))
.route(
"/api/v1/agents/register",
axum::routing::post(handlers::register_agent),
)
.route(
"/api/v1/agents/heartbeat",
axum::routing::post(handlers::heartbeat),
)
.route(
"/api/v1/agents/deregister",
axum::routing::post(handlers::deregister),
)
.route(
"/api/v1/agents",
axum::routing::get(handlers::list_agents),
)
.route(
"/api/v1/receipts",
axum::routing::post(handlers::submit_receipt),
)
.route("/api/v1/agents/register", axum::routing::post(api::register_agent))
.route("/api/v1/agents/heartbeat", axum::routing::post(api::heartbeat))
.route("/api/v1/agents/deregister", axum::routing::post(api::deregister))
.route("/api/v1/agents", axum::routing::get(api::list_agents))
.route("/api/v1/receipts", axum::routing::post(api::submit_receipt))
.route(
"/api/v1/webhooks/forgejo",
axum::routing::post(handlers::forgejo_webhook),
axum::routing::post(api::forgejo_webhook),
)
.with_state(store.clone());
@ -108,24 +104,3 @@ async fn main() {
tracing::info!("listening on {}", listener.local_addr().unwrap());
axum::serve(listener, app).await.expect("server error");
}
mod handlers {
pub async fn register_agent() -> &'static str {
"TODO"
}
pub async fn heartbeat() -> &'static str {
"TODO"
}
pub async fn deregister() -> &'static str {
"TODO"
}
pub async fn list_agents() -> &'static str {
"TODO"
}
pub async fn submit_receipt() -> &'static str {
"TODO"
}
pub async fn forgejo_webhook() -> &'static str {
"TODO"
}
}