dd0c/products/01-llm-cost-router/benches/proxy_latency.rs

//! Proxy latency overhead benchmark.
//! Measures the time added by the proxy layer (routing + telemetry emission)
//! excluding upstream provider latency.
//!
//! Run: cargo bench --bench proxy_latency
//! CI gate: P99 must be < 5ms

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use std::sync::Arc;
use tokio::sync::mpsc;

use dd0c_route::{
    proxy::{create_router, ProxyState},
    AppConfig, RouterBrain, TelemetryEvent,
};

struct NoOpAuth;

#[async_trait::async_trait]
impl dd0c_route::AuthProvider for NoOpAuth {
    async fn authenticate(
        &self,
        _headers: &axum::http::HeaderMap,
    ) -> Result<dd0c_route::AuthContext, dd0c_route::AuthError> {
        Ok(dd0c_route::AuthContext {
            org_id: "bench-org".to_string(),
            user_id: None,
            role: dd0c_route::Role::Member,
        })
    }
}

fn bench_proxy_overhead(c: &mut Criterion) {
    let rt = tokio::runtime::Runtime::new().unwrap();

    // We measure only the proxy overhead — no real upstream call.
    // The mock returns instantly, so measured time = pure proxy overhead.
    let mock_url = rt.block_on(async {
        let mock = wiremock::MockServer::start().await;
        wiremock::Mock::given(wiremock::matchers::any())
            .respond_with(
                wiremock::ResponseTemplate::new(200)
                    .set_body_string(r#"{"id":"bench","choices":[{"message":{"content":"ok"}}],"usage":{"prompt_tokens":1,"completion_tokens":1}}"#)
                    .insert_header("content-type", "application/json"),
            )
            .mount(&mock)
            .await;
        mock.uri()
    });

    let (tx, _rx) = mpsc::channel::<TelemetryEvent>(10000);

    let mut providers = std::collections::HashMap::new();
    providers.insert(
        "openai".to_string(),
        dd0c_route::config::ProviderConfig {
            api_key: "bench-key".to_string(),
            base_url: mock_url.clone(),
        },
    );

    let config = Arc::new(AppConfig {
        proxy_port: 0,
        api_port: 0,
        database_url: String::new(),
        redis_url: String::new(),
        timescale_url: String::new(),
        jwt_secret: "bench".to_string(),
        auth_mode: dd0c_route::config::AuthMode::Local,
        governance_mode: dd0c_route::config::GovernanceMode::Audit,
        providers,
        telemetry_channel_size: 10000,
    });

    let state = Arc::new(ProxyState {
        auth: Arc::new(NoOpAuth),
        router: Arc::new(RouterBrain::new()),
        telemetry_tx: tx,
        http_client: reqwest::Client::new(),
        config,
    });

    let app = create_router(state);

    let mut group = c.benchmark_group("proxy_overhead");
    group.sample_size(1000);

    for msg_count in [1, 5, 10] {
        let mut messages = vec![];
        for i in 0..msg_count {
            messages.push(serde_json::json!({"role": "user", "content": format!("msg {}", i)}));
        }
        let body = serde_json::json!({
            "model": "gpt-4o",
            "messages": messages,
        })
        .to_string();

        group.bench_with_input(
            BenchmarkId::new("chat_completions", format!("{}_msgs", msg_count)),
            &body,
            |b, body| {
                let app = app.clone();
                b.to_async(&rt).iter(|| {
                    let app = app.clone();
                    let body = body.clone();
                    async move {
                        let req = axum::http::Request::builder()
                            .method("POST")
                            .uri("/v1/chat/completions")
                            .header("content-type", "application/json")
                            .header("authorization", "Bearer bench-key")
                            .body(axum::body::Body::from(body))
                            .unwrap();
                        tower::ServiceExt::oneshot(app, req).await.unwrap()
                    }
                });
            },
        );
    }

    group.finish();
}

criterion_group!(benches, bench_proxy_overhead);
criterion_main!(benches);