Add proxy latency benchmark (criterion, 1000 samples, 1/5/10 msg variants)
This commit is contained in:
122
products/01-llm-cost-router/benches/proxy_latency.rs
Normal file
122
products/01-llm-cost-router/benches/proxy_latency.rs
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
//! Proxy latency overhead benchmark.
|
||||||
|
//! Measures the time added by the proxy layer (routing + telemetry emission)
|
||||||
|
//! excluding upstream provider latency.
|
||||||
|
//!
|
||||||
|
//! Run: cargo bench --bench proxy_latency
|
||||||
|
//! CI gate: P99 must be < 5ms
|
||||||
|
|
||||||
|
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
|
use dd0c_route::{
|
||||||
|
AppConfig, TelemetryEvent, RouterBrain,
|
||||||
|
proxy::{create_router, ProxyState},
|
||||||
|
};
|
||||||
|
|
||||||
|
struct NoOpAuth;
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl dd0c_route::AuthProvider for NoOpAuth {
|
||||||
|
async fn authenticate(
|
||||||
|
&self,
|
||||||
|
_headers: &axum::http::HeaderMap,
|
||||||
|
) -> Result<dd0c_route::AuthContext, dd0c_route::AuthError> {
|
||||||
|
Ok(dd0c_route::AuthContext {
|
||||||
|
org_id: "bench-org".to_string(),
|
||||||
|
user_id: None,
|
||||||
|
role: dd0c_route::Role::Member,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_proxy_overhead(c: &mut Criterion) {
|
||||||
|
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||||
|
|
||||||
|
// We measure only the proxy overhead — no real upstream call.
|
||||||
|
// The mock returns instantly, so measured time = pure proxy overhead.
|
||||||
|
let mock_url = rt.block_on(async {
|
||||||
|
let mock = wiremock::MockServer::start().await;
|
||||||
|
wiremock::Mock::given(wiremock::matchers::any())
|
||||||
|
.respond_with(
|
||||||
|
wiremock::ResponseTemplate::new(200)
|
||||||
|
.set_body_string(r#"{"id":"bench","choices":[{"message":{"content":"ok"}}],"usage":{"prompt_tokens":1,"completion_tokens":1}}"#)
|
||||||
|
.insert_header("content-type", "application/json"),
|
||||||
|
)
|
||||||
|
.mount(&mock)
|
||||||
|
.await;
|
||||||
|
mock.uri()
|
||||||
|
});
|
||||||
|
|
||||||
|
let (tx, _rx) = mpsc::channel::<TelemetryEvent>(10000);
|
||||||
|
|
||||||
|
let mut providers = std::collections::HashMap::new();
|
||||||
|
providers.insert("openai".to_string(), dd0c_route::config::ProviderConfig {
|
||||||
|
api_key: "bench-key".to_string(),
|
||||||
|
base_url: mock_url.clone(),
|
||||||
|
});
|
||||||
|
|
||||||
|
let config = Arc::new(AppConfig {
|
||||||
|
proxy_port: 0,
|
||||||
|
api_port: 0,
|
||||||
|
database_url: String::new(),
|
||||||
|
redis_url: String::new(),
|
||||||
|
timescale_url: String::new(),
|
||||||
|
jwt_secret: "bench".to_string(),
|
||||||
|
auth_mode: dd0c_route::config::AuthMode::Local,
|
||||||
|
governance_mode: dd0c_route::config::GovernanceMode::Audit,
|
||||||
|
providers,
|
||||||
|
telemetry_channel_size: 10000,
|
||||||
|
});
|
||||||
|
|
||||||
|
let state = Arc::new(ProxyState {
|
||||||
|
auth: Arc::new(NoOpAuth),
|
||||||
|
router: Arc::new(RouterBrain::new()),
|
||||||
|
telemetry_tx: tx,
|
||||||
|
http_client: reqwest::Client::new(),
|
||||||
|
config,
|
||||||
|
});
|
||||||
|
|
||||||
|
let app = create_router(state);
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("proxy_overhead");
|
||||||
|
group.sample_size(1000);
|
||||||
|
|
||||||
|
for msg_count in [1, 5, 10] {
|
||||||
|
let mut messages = vec![];
|
||||||
|
for i in 0..msg_count {
|
||||||
|
messages.push(serde_json::json!({"role": "user", "content": format!("msg {}", i)}));
|
||||||
|
}
|
||||||
|
let body = serde_json::json!({
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"messages": messages,
|
||||||
|
}).to_string();
|
||||||
|
|
||||||
|
group.bench_with_input(
|
||||||
|
BenchmarkId::new("chat_completions", format!("{}_msgs", msg_count)),
|
||||||
|
&body,
|
||||||
|
|b, body| {
|
||||||
|
let app = app.clone();
|
||||||
|
b.to_async(&rt).iter(|| {
|
||||||
|
let app = app.clone();
|
||||||
|
let body = body.clone();
|
||||||
|
async move {
|
||||||
|
let req = axum::http::Request::builder()
|
||||||
|
.method("POST")
|
||||||
|
.uri("/v1/chat/completions")
|
||||||
|
.header("content-type", "application/json")
|
||||||
|
.header("authorization", "Bearer bench-key")
|
||||||
|
.body(axum::body::Body::from(body))
|
||||||
|
.unwrap();
|
||||||
|
tower::ServiceExt::oneshot(app, req).await.unwrap()
|
||||||
|
}
|
||||||
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
group.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, bench_proxy_overhead);
|
||||||
|
criterion_main!(benches);
|
||||||
Reference in New Issue
Block a user