Add proxy latency benchmark (criterion, 1000 samples, 1/5/10 msg variants)
This commit is contained in:
122
products/01-llm-cost-router/benches/proxy_latency.rs
Normal file
122
products/01-llm-cost-router/benches/proxy_latency.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
//! Proxy latency overhead benchmark.
|
||||
//! Measures the time added by the proxy layer (routing + telemetry emission)
|
||||
//! excluding upstream provider latency.
|
||||
//!
|
||||
//! Run: cargo bench --bench proxy_latency
|
||||
//! CI gate: P99 must be < 5ms
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use dd0c_route::{
|
||||
AppConfig, TelemetryEvent, RouterBrain,
|
||||
proxy::{create_router, ProxyState},
|
||||
};
|
||||
|
||||
struct NoOpAuth;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl dd0c_route::AuthProvider for NoOpAuth {
|
||||
async fn authenticate(
|
||||
&self,
|
||||
_headers: &axum::http::HeaderMap,
|
||||
) -> Result<dd0c_route::AuthContext, dd0c_route::AuthError> {
|
||||
Ok(dd0c_route::AuthContext {
|
||||
org_id: "bench-org".to_string(),
|
||||
user_id: None,
|
||||
role: dd0c_route::Role::Member,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_proxy_overhead(c: &mut Criterion) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
|
||||
// We measure only the proxy overhead — no real upstream call.
|
||||
// The mock returns instantly, so measured time = pure proxy overhead.
|
||||
let mock_url = rt.block_on(async {
|
||||
let mock = wiremock::MockServer::start().await;
|
||||
wiremock::Mock::given(wiremock::matchers::any())
|
||||
.respond_with(
|
||||
wiremock::ResponseTemplate::new(200)
|
||||
.set_body_string(r#"{"id":"bench","choices":[{"message":{"content":"ok"}}],"usage":{"prompt_tokens":1,"completion_tokens":1}}"#)
|
||||
.insert_header("content-type", "application/json"),
|
||||
)
|
||||
.mount(&mock)
|
||||
.await;
|
||||
mock.uri()
|
||||
});
|
||||
|
||||
let (tx, _rx) = mpsc::channel::<TelemetryEvent>(10000);
|
||||
|
||||
let mut providers = std::collections::HashMap::new();
|
||||
providers.insert("openai".to_string(), dd0c_route::config::ProviderConfig {
|
||||
api_key: "bench-key".to_string(),
|
||||
base_url: mock_url.clone(),
|
||||
});
|
||||
|
||||
let config = Arc::new(AppConfig {
|
||||
proxy_port: 0,
|
||||
api_port: 0,
|
||||
database_url: String::new(),
|
||||
redis_url: String::new(),
|
||||
timescale_url: String::new(),
|
||||
jwt_secret: "bench".to_string(),
|
||||
auth_mode: dd0c_route::config::AuthMode::Local,
|
||||
governance_mode: dd0c_route::config::GovernanceMode::Audit,
|
||||
providers,
|
||||
telemetry_channel_size: 10000,
|
||||
});
|
||||
|
||||
let state = Arc::new(ProxyState {
|
||||
auth: Arc::new(NoOpAuth),
|
||||
router: Arc::new(RouterBrain::new()),
|
||||
telemetry_tx: tx,
|
||||
http_client: reqwest::Client::new(),
|
||||
config,
|
||||
});
|
||||
|
||||
let app = create_router(state);
|
||||
|
||||
let mut group = c.benchmark_group("proxy_overhead");
|
||||
group.sample_size(1000);
|
||||
|
||||
for msg_count in [1, 5, 10] {
|
||||
let mut messages = vec![];
|
||||
for i in 0..msg_count {
|
||||
messages.push(serde_json::json!({"role": "user", "content": format!("msg {}", i)}));
|
||||
}
|
||||
let body = serde_json::json!({
|
||||
"model": "gpt-4o",
|
||||
"messages": messages,
|
||||
}).to_string();
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("chat_completions", format!("{}_msgs", msg_count)),
|
||||
&body,
|
||||
|b, body| {
|
||||
let app = app.clone();
|
||||
b.to_async(&rt).iter(|| {
|
||||
let app = app.clone();
|
||||
let body = body.clone();
|
||||
async move {
|
||||
let req = axum::http::Request::builder()
|
||||
.method("POST")
|
||||
.uri("/v1/chat/completions")
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer bench-key")
|
||||
.body(axum::body::Body::from(body))
|
||||
.unwrap();
|
||||
tower::ServiceExt::oneshot(app, req).await.unwrap()
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_proxy_overhead);
|
||||
criterion_main!(benches);
|
||||
Reference in New Issue
Block a user