From e62660853560e09a15f081d003faeee432f027a7 Mon Sep 17 00:00:00 2001 From: Max Mayfield Date: Sun, 1 Mar 2026 02:40:45 +0000 Subject: [PATCH] Add proxy latency benchmark (criterion, 1000 samples, 1/5/10 msg variants) --- .../benches/proxy_latency.rs | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 products/01-llm-cost-router/benches/proxy_latency.rs diff --git a/products/01-llm-cost-router/benches/proxy_latency.rs b/products/01-llm-cost-router/benches/proxy_latency.rs new file mode 100644 index 0000000..0eb925a --- /dev/null +++ b/products/01-llm-cost-router/benches/proxy_latency.rs @@ -0,0 +1,122 @@ +//! Proxy latency overhead benchmark. +//! Measures the time added by the proxy layer (routing + telemetry emission) +//! excluding upstream provider latency. +//! +//! Run: cargo bench --bench proxy_latency +//! CI gate: P99 must be < 5ms + +use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId}; +use std::sync::Arc; +use tokio::sync::mpsc; + +use dd0c_route::{ + AppConfig, TelemetryEvent, RouterBrain, + proxy::{create_router, ProxyState}, +}; + +struct NoOpAuth; + +#[async_trait::async_trait] +impl dd0c_route::AuthProvider for NoOpAuth { + async fn authenticate( + &self, + _headers: &axum::http::HeaderMap, + ) -> Result { + Ok(dd0c_route::AuthContext { + org_id: "bench-org".to_string(), + user_id: None, + role: dd0c_route::Role::Member, + }) + } +} + +fn bench_proxy_overhead(c: &mut Criterion) { + let rt = tokio::runtime::Runtime::new().unwrap(); + + // We measure only the proxy overhead — no real upstream call. + // The mock returns instantly, so measured time = pure proxy overhead. + let mock_url = rt.block_on(async { + let mock = wiremock::MockServer::start().await; + wiremock::Mock::given(wiremock::matchers::any()) + .respond_with( + wiremock::ResponseTemplate::new(200) + .set_body_string(r#"{"id":"bench","choices":[{"message":{"content":"ok"}}],"usage":{"prompt_tokens":1,"completion_tokens":1}}"#) + .insert_header("content-type", "application/json"), + ) + .mount(&mock) + .await; + mock.uri() + }); + + let (tx, _rx) = mpsc::channel::(10000); + + let mut providers = std::collections::HashMap::new(); + providers.insert("openai".to_string(), dd0c_route::config::ProviderConfig { + api_key: "bench-key".to_string(), + base_url: mock_url.clone(), + }); + + let config = Arc::new(AppConfig { + proxy_port: 0, + api_port: 0, + database_url: String::new(), + redis_url: String::new(), + timescale_url: String::new(), + jwt_secret: "bench".to_string(), + auth_mode: dd0c_route::config::AuthMode::Local, + governance_mode: dd0c_route::config::GovernanceMode::Audit, + providers, + telemetry_channel_size: 10000, + }); + + let state = Arc::new(ProxyState { + auth: Arc::new(NoOpAuth), + router: Arc::new(RouterBrain::new()), + telemetry_tx: tx, + http_client: reqwest::Client::new(), + config, + }); + + let app = create_router(state); + + let mut group = c.benchmark_group("proxy_overhead"); + group.sample_size(1000); + + for msg_count in [1, 5, 10] { + let mut messages = vec![]; + for i in 0..msg_count { + messages.push(serde_json::json!({"role": "user", "content": format!("msg {}", i)})); + } + let body = serde_json::json!({ + "model": "gpt-4o", + "messages": messages, + }).to_string(); + + group.bench_with_input( + BenchmarkId::new("chat_completions", format!("{}_msgs", msg_count)), + &body, + |b, body| { + let app = app.clone(); + b.to_async(&rt).iter(|| { + let app = app.clone(); + let body = body.clone(); + async move { + let req = axum::http::Request::builder() + .method("POST") + .uri("/v1/chat/completions") + .header("content-type", "application/json") + .header("authorization", "Bearer bench-key") + .body(axum::body::Body::from(body)) + .unwrap(); + tower::ServiceExt::oneshot(app, req).await.unwrap() + } + }); + }, + ); + } + + group.finish(); +} + +criterion_group!(benches, bench_proxy_overhead); +criterion_main!(benches);