Add proxy latency benchmark (criterion, 1000 samples, 1/5/10 msg variants)

2026-03-01 02:40:45 +00:00
parent e882f181d5
commit e626608535
1 changed files with 122 additions and 0 deletions
--- a/products/01-llm-cost-router/benches/proxy_latency.rs
+++ b/products/01-llm-cost-router/benches/proxy_latency.rs
@@ -0,0 +1,122 @@
+//! Proxy latency overhead benchmark.
+//! Measures the time added by the proxy layer (routing + telemetry emission)
+//! excluding upstream provider latency.
+//!
+//! Run: cargo bench --bench proxy_latency
+//! CI gate: P99 must be < 5ms
+
+use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
+use std::sync::Arc;
+use tokio::sync::mpsc;
+
+use dd0c_route::{
+    AppConfig, TelemetryEvent, RouterBrain,
+    proxy::{create_router, ProxyState},
+};
+
+struct NoOpAuth;
+
+#[async_trait::async_trait]
+impl dd0c_route::AuthProvider for NoOpAuth {
+    async fn authenticate(
+        &self,
+        _headers: &axum::http::HeaderMap,
+    ) -> Result<dd0c_route::AuthContext, dd0c_route::AuthError> {
+        Ok(dd0c_route::AuthContext {
+            org_id: "bench-org".to_string(),
+            user_id: None,
+            role: dd0c_route::Role::Member,
+        })
+    }
+}
+
+fn bench_proxy_overhead(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    // We measure only the proxy overhead — no real upstream call.
+    // The mock returns instantly, so measured time = pure proxy overhead.
+    let mock_url = rt.block_on(async {
+        let mock = wiremock::MockServer::start().await;
+        wiremock::Mock::given(wiremock::matchers::any())
+            .respond_with(
+                wiremock::ResponseTemplate::new(200)
+                    .set_body_string(r#"{"id":"bench","choices":[{"message":{"content":"ok"}}],"usage":{"prompt_tokens":1,"completion_tokens":1}}"#)
+                    .insert_header("content-type", "application/json"),
+            )
+            .mount(&mock)
+            .await;
+        mock.uri()
+    });
+
+    let (tx, _rx) = mpsc::channel::<TelemetryEvent>(10000);
+
+    let mut providers = std::collections::HashMap::new();
+    providers.insert("openai".to_string(), dd0c_route::config::ProviderConfig {
+        api_key: "bench-key".to_string(),
+        base_url: mock_url.clone(),
+    });
+
+    let config = Arc::new(AppConfig {
+        proxy_port: 0,
+        api_port: 0,
+        database_url: String::new(),
+        redis_url: String::new(),
+        timescale_url: String::new(),
+        jwt_secret: "bench".to_string(),
+        auth_mode: dd0c_route::config::AuthMode::Local,
+        governance_mode: dd0c_route::config::GovernanceMode::Audit,
+        providers,
+        telemetry_channel_size: 10000,
+    });
+
+    let state = Arc::new(ProxyState {
+        auth: Arc::new(NoOpAuth),
+        router: Arc::new(RouterBrain::new()),
+        telemetry_tx: tx,
+        http_client: reqwest::Client::new(),
+        config,
+    });
+
+    let app = create_router(state);
+
+    let mut group = c.benchmark_group("proxy_overhead");
+    group.sample_size(1000);
+
+    for msg_count in [1, 5, 10] {
+        let mut messages = vec![];
+        for i in 0..msg_count {
+            messages.push(serde_json::json!({"role": "user", "content": format!("msg {}", i)}));
+        }
+        let body = serde_json::json!({
+            "model": "gpt-4o",
+            "messages": messages,
+        }).to_string();
+
+        group.bench_with_input(
+            BenchmarkId::new("chat_completions", format!("{}_msgs", msg_count)),
+            &body,
+            |b, body| {
+                let app = app.clone();
+                b.to_async(&rt).iter(|| {
+                    let app = app.clone();
+                    let body = body.clone();
+                    async move {
+                        let req = axum::http::Request::builder()
+                            .method("POST")
+                            .uri("/v1/chat/completions")
+                            .header("content-type", "application/json")
+                            .header("authorization", "Bearer bench-key")
+                            .body(axum::body::Body::from(body))
+                            .unwrap();
+                        tower::ServiceExt::oneshot(app, req).await.unwrap()
+                    }
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_proxy_overhead);
+criterion_main!(benches);