Files
dd0c/products/01-llm-cost-router/benches/proxy_latency.rs
Max Mayfield a8a8c53917
All checks were successful
CI — P1 Route (Rust) / test (push) Successful in 6m35s
cargo fmt: format all Rust source files
2026-03-01 17:53:28 +00:00

127 lines
4.0 KiB
Rust

//! Proxy latency overhead benchmark.
//! Measures the time added by the proxy layer (routing + telemetry emission)
//! excluding upstream provider latency.
//!
//! Run: cargo bench --bench proxy_latency
//! CI gate: P99 must be < 5ms
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use std::sync::Arc;
use tokio::sync::mpsc;
use dd0c_route::{
proxy::{create_router, ProxyState},
AppConfig, RouterBrain, TelemetryEvent,
};
struct NoOpAuth;
#[async_trait::async_trait]
impl dd0c_route::AuthProvider for NoOpAuth {
async fn authenticate(
&self,
_headers: &axum::http::HeaderMap,
) -> Result<dd0c_route::AuthContext, dd0c_route::AuthError> {
Ok(dd0c_route::AuthContext {
org_id: "bench-org".to_string(),
user_id: None,
role: dd0c_route::Role::Member,
})
}
}
fn bench_proxy_overhead(c: &mut Criterion) {
let rt = tokio::runtime::Runtime::new().unwrap();
// We measure only the proxy overhead — no real upstream call.
// The mock returns instantly, so measured time = pure proxy overhead.
let mock_url = rt.block_on(async {
let mock = wiremock::MockServer::start().await;
wiremock::Mock::given(wiremock::matchers::any())
.respond_with(
wiremock::ResponseTemplate::new(200)
.set_body_string(r#"{"id":"bench","choices":[{"message":{"content":"ok"}}],"usage":{"prompt_tokens":1,"completion_tokens":1}}"#)
.insert_header("content-type", "application/json"),
)
.mount(&mock)
.await;
mock.uri()
});
let (tx, _rx) = mpsc::channel::<TelemetryEvent>(10000);
let mut providers = std::collections::HashMap::new();
providers.insert(
"openai".to_string(),
dd0c_route::config::ProviderConfig {
api_key: "bench-key".to_string(),
base_url: mock_url.clone(),
},
);
let config = Arc::new(AppConfig {
proxy_port: 0,
api_port: 0,
database_url: String::new(),
redis_url: String::new(),
timescale_url: String::new(),
jwt_secret: "bench".to_string(),
auth_mode: dd0c_route::config::AuthMode::Local,
governance_mode: dd0c_route::config::GovernanceMode::Audit,
providers,
telemetry_channel_size: 10000,
});
let state = Arc::new(ProxyState {
auth: Arc::new(NoOpAuth),
router: Arc::new(RouterBrain::new()),
telemetry_tx: tx,
http_client: reqwest::Client::new(),
config,
});
let app = create_router(state);
let mut group = c.benchmark_group("proxy_overhead");
group.sample_size(1000);
for msg_count in [1, 5, 10] {
let mut messages = vec![];
for i in 0..msg_count {
messages.push(serde_json::json!({"role": "user", "content": format!("msg {}", i)}));
}
let body = serde_json::json!({
"model": "gpt-4o",
"messages": messages,
})
.to_string();
group.bench_with_input(
BenchmarkId::new("chat_completions", format!("{}_msgs", msg_count)),
&body,
|b, body| {
let app = app.clone();
b.to_async(&rt).iter(|| {
let app = app.clone();
let body = body.clone();
async move {
let req = axum::http::Request::builder()
.method("POST")
.uri("/v1/chat/completions")
.header("content-type", "application/json")
.header("authorization", "Bearer bench-key")
.body(axum::body::Body::from(body))
.unwrap();
tower::ServiceExt::oneshot(app, req).await.unwrap()
}
});
},
);
}
group.finish();
}
criterion_group!(benches, bench_proxy_overhead);
criterion_main!(benches);