//! Proxy latency overhead benchmark. //! Measures the time added by the proxy layer (routing + telemetry emission) //! excluding upstream provider latency. //! //! Run: cargo bench --bench proxy_latency //! CI gate: P99 must be < 5ms use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use std::sync::Arc; use tokio::sync::mpsc; use dd0c_route::{ proxy::{create_router, ProxyState}, AppConfig, RouterBrain, TelemetryEvent, }; struct NoOpAuth; #[async_trait::async_trait] impl dd0c_route::AuthProvider for NoOpAuth { async fn authenticate( &self, _headers: &axum::http::HeaderMap, ) -> Result { Ok(dd0c_route::AuthContext { org_id: "bench-org".to_string(), user_id: None, role: dd0c_route::Role::Member, }) } } fn bench_proxy_overhead(c: &mut Criterion) { let rt = tokio::runtime::Runtime::new().unwrap(); // We measure only the proxy overhead — no real upstream call. // The mock returns instantly, so measured time = pure proxy overhead. let mock_url = rt.block_on(async { let mock = wiremock::MockServer::start().await; wiremock::Mock::given(wiremock::matchers::any()) .respond_with( wiremock::ResponseTemplate::new(200) .set_body_string(r#"{"id":"bench","choices":[{"message":{"content":"ok"}}],"usage":{"prompt_tokens":1,"completion_tokens":1}}"#) .insert_header("content-type", "application/json"), ) .mount(&mock) .await; mock.uri() }); let (tx, _rx) = mpsc::channel::(10000); let mut providers = std::collections::HashMap::new(); providers.insert( "openai".to_string(), dd0c_route::config::ProviderConfig { api_key: "bench-key".to_string(), base_url: mock_url.clone(), }, ); let config = Arc::new(AppConfig { proxy_port: 0, api_port: 0, database_url: String::new(), redis_url: String::new(), timescale_url: String::new(), jwt_secret: "bench".to_string(), auth_mode: dd0c_route::config::AuthMode::Local, governance_mode: dd0c_route::config::GovernanceMode::Audit, providers, telemetry_channel_size: 10000, }); let state = Arc::new(ProxyState { auth: Arc::new(NoOpAuth), router: Arc::new(RouterBrain::new()), telemetry_tx: tx, http_client: reqwest::Client::new(), config, }); let app = create_router(state); let mut group = c.benchmark_group("proxy_overhead"); group.sample_size(1000); for msg_count in [1, 5, 10] { let mut messages = vec![]; for i in 0..msg_count { messages.push(serde_json::json!({"role": "user", "content": format!("msg {}", i)})); } let body = serde_json::json!({ "model": "gpt-4o", "messages": messages, }) .to_string(); group.bench_with_input( BenchmarkId::new("chat_completions", format!("{}_msgs", msg_count)), &body, |b, body| { let app = app.clone(); b.to_async(&rt).iter(|| { let app = app.clone(); let body = body.clone(); async move { let req = axum::http::Request::builder() .method("POST") .uri("/v1/chat/completions") .header("content-type", "application/json") .header("authorization", "Bearer bench-key") .body(axum::body::Body::from(body)) .unwrap(); tower::ServiceExt::oneshot(app, req).await.unwrap() } }); }, ); } group.finish(); } criterion_group!(benches, bench_proxy_overhead); criterion_main!(benches);