dev-intel-v2/promptfoo.yaml

description: "Dev Intel V3 - Documentation Quality Eval"

providers:
  - id: openai:chat:claude-haiku-4.5
    config:
      apiBaseUrl: http://192.168.86.11:8000/v1
      apiKey: my-super-secret-password-123
      temperature: 0

prompts:
  - |
    You are evaluating auto-generated infrastructure documentation.
    Read the docs directory at {{docsDir}} and answer this question:

    {{question}}

    Be specific and cite file paths where possible.

tests:
  # Structural
  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "How many subsystems does the Foxtrot monorepo contain? List them."
    assert:
      - type: llm-rubric
        value: "Answer should list 12 subsystems including account-common, app-common, app-tools, compute-common, compute-tools, control-core, ipam-core, ipam-tools, network-common, network-core, runtime, and root"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123

  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "Which 5 Helm charts produce the most Kubernetes resources?"
    assert:
      - type: llm-rubric
        value: "Answer should identify specific charts and their resource counts from the documentation"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123

  # Dependencies
  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "What are the dependencies of the external-dns chart?"
    assert:
      - type: llm-rubric
        value: "Answer should list the sub-chart dependencies of external-dns from the Helm index"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123

  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "What are the dependencies of the ingress-nginx chart?"
    assert:
      - type: llm-rubric
        value: "Answer should list the sub-chart dependencies of ingress-nginx"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123

  # Configuration
  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "How many Helm charts define an mdm-app deployment?"
    assert:
      - type: llm-rubric
        value: "Answer should provide a count of charts with mdm-app resources"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123

  # Terraform
  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "How many Terraform module directories are documented? List the top 5."
    assert:
      - type: llm-rubric
        value: "Answer should reference the terraform index and list specific module directories from control-core or other subsystems"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123

  # Architecture
  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "Which subsystem has the most files and why?"
    assert:
      - type: llm-rubric
        value: "Answer should identify runtime as having the most files (~16K) and explain it contains rendered manifests or deployment artifacts"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123

  # Cross-subsystem
  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "Which subsystems have zero functions and what does that indicate architecturally?"
    assert:
      - type: llm-rubric
        value: "Answer should identify account-common, network-common, network-core, control-core as zero-function subsystems and explain they are declarative/IaC configuration subsystems"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123

  # Impact
  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "What is the blast radius of modifying a Terraform module in control-core?"
    assert:
      - type: llm-rubric
        value: "Answer should reference the change impact analysis and describe downstream dependents"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123

  # Entry Points
  - vars:
      docsDir: ./foxtrot-docs-v3
      question: "What entry points were detected in the codebase?"
    assert:
      - type: llm-rubric
        value: "Answer should reference detected entry points like Helm workloads, Python main, shell main, or CI pipelines"
        provider: openai:chat:claude-haiku-4.5
        config:
          apiBaseUrl: http://192.168.86.11:8000/v1
          apiKey: my-super-secret-password-123