From 049609a35820331a339fec7a48c9fb753aa541a2 Mon Sep 17 00:00:00 2001 From: Jarvis Prime Date: Tue, 10 Mar 2026 00:46:37 +0000 Subject: [PATCH] Phase 9d: Human eval score improvement\n\n- Human readability score increased from 63.9% to 78.6%\n- Structural table additions and quick lookup index resolved navigation bottlenecks\n- NOT_FOUND rate dropped from 17.9% to 3.6% --- eval-human-report-v2.json | 765 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 765 insertions(+) create mode 100644 eval-human-report-v2.json diff --git a/eval-human-report-v2.json b/eval-human-report-v2.json new file mode 100644 index 0000000..f47a413 --- /dev/null +++ b/eval-human-report-v2.json @@ -0,0 +1,765 @@ +{ + "evalType": "human", + "timestamp": "2026-03-10T00:46:24.327Z", + "docsDir": "./foxtrot-docs", + "totalQuestions": 28, + "overallScore": "78.6", + "avgClarity": "4.11", + "avgActionability": "3.64", + "avgCompleteness": "3.82", + "avgFindability": "4.14", + "notFoundCount": 1, + "notFoundRate": "3.6%", + "byCategory": { + "structural": { + "avg": "88.8", + "count": 4 + }, + "dependency": { + "avg": "56.0", + "count": 5 + }, + "contract": { + "avg": "100.0", + "count": 1 + }, + "configuration": { + "avg": "67.0", + "count": 5 + }, + "interaction": { + "avg": "100.0", + "count": 2 + }, + "resource": { + "avg": "95.0", + "count": 4 + }, + "cross-subsystem": { + "avg": "92.5", + "count": 2 + }, + "architectural": { + "avg": "65.0", + "count": 2 + }, + "scenario": { + "avg": "78.3", + "count": 3 + } + }, + "byDifficulty": { + "easy": { + "avg": "78.5", + "count": 10 + }, + "medium": { + "avg": "80.8", + "count": 13 + }, + "hard": { + "avg": "73.0", + "count": 5 + } + }, + "results": [ + { + "id": "struct-001", + "category": "structural", + "difficulty": "easy", + "question": "How many Helm charts are defined across the Foxtrot monorepo?", + "groundTruth": "76", + "humanAnswer": "76", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md" + ], + "score": { + "clarity": 5, + "actionability": 2, + "completeness": 3, + "findability": 5, + "notes": "Reader extracted correct answer from well-organized reference docs. Helm index and system architecture are intuitive locations. However, knowing the chart count alone provides minimal actionability—engineers need chart names, purposes, and dependencies to act. Docs appear to contain the metric but likely lack operational context." + }, + "composite": 75, + "notFound": false + }, + { + "id": "struct-002", + "category": "structural", + "difficulty": "medium", + "question": "Which 5 Helm charts produce the most Kubernetes resources? List them with their resource counts.", + "groundTruth": "cluster (app-common/apps-provider/addons/charts/common/cluster): 55\ncluster (app-common/charts/cluster): 32\njenkins (app-common/charts/jenkins): 29\nkube-state-metrics (app-common/charts/otel-collector/charts/kube-state-metrics): 27\nfoxtrot-eks (compute-common/templates/foxtrot-eks): 24", + "humanAnswer": "\n\n1. cluster (app-common/apps-provider/addons/charts/common/cluster) — 55 resources\n2. cluster (app-common/charts/cluster) — 32 resources\n3. jenkins — 29 resources\n4. kube-state-metrics (app-common/charts/otel-collector/charts/kube-state-metrics) — 27 resources\n5. foxtrot-eks — 24 resources", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader extracted exact answer with all 5 charts and resource counts correct. Documentation presented information in structured, scannable format. Reference section organization (helm/index.md) is intuitive for this query type. Engineer could immediately identify resource-heavy charts and act on findings." + }, + "composite": 100, + "notFound": false + }, + { + "id": "struct-003", + "category": "structural", + "difficulty": "easy", + "question": "How many subsystems does the Foxtrot codebase contain?", + "groundTruth": "12", + "humanAnswer": "12\n\nThe System Architecture documentation explicitly states in the Summary Statistics section: \"**Subsystems:** 12\"\n\nThese are: account-common, app-common, app-tools, compute-common, compute-tools, control-core, ipam-core, ipam-tools, network-common, network-core, runtime, and root.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/control-core.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/network-core.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Documentation explicitly states subsystem count in Summary Statistics with complete enumeration. Reader found correct answer immediately. System Architecture is the natural entry point for this query, and subsystem files are logically organized. Information is actionable for architectural navigation and planning." + }, + "composite": 100, + "notFound": false + }, + { + "id": "struct-004", + "category": "structural", + "difficulty": "easy", + "question": "Which subsystem contains the most files, and how many?", + "groundTruth": "runtime: 16309 files", + "humanAnswer": "The runtime subsystem contains the most files with 16,309 files.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/control-core.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/network-core.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 4, + "actionability": 3, + "completeness": 5, + "findability": 4, + "notes": "Reader extracted correct answer with precise numbers, indicating information was present and understandable. Completeness is full since the answer was found. Clarity is high but not perfect—reader consulted multiple files rather than finding it immediately in system-architecture.md. Actionability is moderate because this is informational data about system composition rather than prescriptive guidance. Findability is good—logical doc structure, though the metric may not be prominently featured in the primary architecture document." + }, + "composite": 80, + "notFound": false + }, + { + "id": "dep-external-dns-001", + "category": "dependency", + "difficulty": "medium", + "question": "What are the dependencies of the \"external-dns\" chart (at app-common/apps-provider/addons/charts/common/external-dns)?", + "groundTruth": "external-dns (1.17.0) [condition: external-dns.enabled]", + "humanAnswer": "Looking at the \"All Charts\" table in the Helm Charts documentation, the external-dns chart at `app-common/apps-provider/addons/charts/common/external-dns` has the following dependency:\n\n- `external-dns`", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 2, + "actionability": 2, + "completeness": 1, + "findability": 2, + "notes": "Reader identified the dependency name but missed critical details: version (1.17.0) and condition (external-dns.enabled). Documentation either omitted these details or presented them unclearly. Without version constraints and conditional logic, an engineer cannot reliably act on this information. The consulted files suggest correct search path, but information extraction was incomplete." + }, + "composite": 35, + "notFound": false + }, + { + "id": "dep-ingress-nginx-001", + "category": "dependency", + "difficulty": "medium", + "question": "What are the dependencies of the \"ingress-nginx\" chart (at app-common/apps-provider/addons/charts/common/ingress-nginx)?", + "groundTruth": "ingress-nginx (4.12.0) [condition: ingress-nginx.enabled]", + "humanAnswer": "Looking at the \"All Charts\" table in the Helm Charts documentation, the ingress-nginx chart at `app-common/apps-provider/addons/charts/common/ingress-nginx` has the following dependencies:\n\n- `ingress-nginx`", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 2, + "actionability": 2, + "completeness": 1, + "findability": 3, + "notes": "Reader found the chart name but missed critical dependency metadata. Ground truth specifies version (4.12.0) and condition flag (ingress-nginx.enabled), neither of which appear in the reader's answer. Documentation either omits these details or presents them unclearly in the table format. An engineer acting on 'ingress-nginx' alone lacks version constraints and conditional logic needed for proper deployment." + }, + "composite": 40, + "notFound": false + }, + { + "id": "dep-kube-state-metrics-001", + "category": "dependency", + "difficulty": "medium", + "question": "What are the dependencies of the \"kube-state-metrics\" chart (at app-common/apps-provider/addons/charts/common/kube-state-metrics)?", + "groundTruth": "kube-state-metrics (5.26.0) [condition: kube-state-metrics.enabled]", + "humanAnswer": "Looking at the \"All Charts\" table in the Helm Charts documentation, the kube-state-metrics chart at `app-common/apps-provider/addons/charts/common/kube-state-metrics` has the following dependencies:\n\n- `kube-state-metrics`", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 2, + "actionability": 2, + "completeness": 1, + "findability": 3, + "notes": "Reader identified the dependency name but missed critical details: version (5.26.0) and condition (kube-state-metrics.enabled). Documentation either lacked this information or presented it unclearly. Helm chart dependencies require version and condition details to be actionable; incomplete information creates ambiguity for engineers implementing or troubleshooting deployments." + }, + "composite": 40, + "notFound": false + }, + { + "id": "dep-open-telemetry-001", + "category": "dependency", + "difficulty": "medium", + "question": "What are the dependencies of the \"open-telemetry\" chart (at app-common/apps-provider/addons/charts/common/otel-collector)?", + "groundTruth": "opentelemetry-operator (0.26.4) [condition: opentelemetry-operator.enabled]\nk8s-metrics (0.1.0) [condition: k8s-metrics.enabled]\nagent (0.1.1) [condition: agent.enabled]\ncommon (0.1.0) [condition: common.enabled]\ngw-metrics (0.1.0) [condition: gw-metrics.enabled]\ngw-traces (0.1.0) [condition: gw-traces.enabled]\nprom-scraper (0.1.0) [condition: prom-scraper.enabled]", + "humanAnswer": "The \"open-telemetry\" chart at `app-common/apps-provider/addons/charts/common/otel-collector` has the following dependencies:\n\n- **opentelemetry-operator** (0.26.4) *if opentelemetry-operator.enabled*\n- **k8s-metrics** (0.1.0) *if k8s-metrics.enabled*\n- **agent** (0.1.1) *if agent.enabled*\n- **common** (0.1.0) *if common.enabled*\n- **gw-metrics** (0.1.0) *if gw-metrics.enabled*\n- **gw-traces** (0.1.0) *if gw-traces.enabled*\n- **prom-scraper** (0.1.0) *if prom-scraper.enabled*\n\nAll dependencies are conditional based on their respective enabled flags in the values configuration.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-common-otel-collector-charts-agent.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-common-otel-collector-charts-common.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-common-otel-collector-charts-gw-metrics.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-common-otel-collector-charts-gw-traces.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-common-otel-collector-charts-k8s-metrics.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-common-otel-collector-charts-prom-scraper.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-common-otel-collector.md", + "reference/helm/charts/app-common-charts-otel-collector-charts-agent.md", + "reference/helm/charts/app-common-charts-otel-collector-charts-common.md", + "reference/helm/charts/app-common-charts-otel-collector-charts-gw-metrics.md", + "reference/helm/charts/app-common-charts-otel-collector-charts-gw-traces.md", + "reference/helm/charts/app-common-charts-otel-collector-charts-k8s-metrics.md", + "reference/helm/charts/app-common-charts-otel-collector-charts-kube-state-metrics.md", + "reference/helm/charts/app-common-charts-otel-collector-charts-opentelemetry-operator.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader's answer matches ground truth exactly with all 7 dependencies, versions, and conditions clearly presented. Information is well-formatted, immediately scannable, and contains everything needed for configuration. Main chart documentation consulted is the natural location for this information." + }, + "composite": 100, + "notFound": false + }, + { + "id": "dep-reltio-prometheus-001", + "category": "dependency", + "difficulty": "medium", + "question": "What are the dependencies of the \"reltio-prometheus\" chart (at app-common/apps-provider/addons/charts/common/prometheus)?", + "groundTruth": "kube-prometheus-stack (69.4.1) [condition: kube-prometheus-stack.enabled]\nprometheus-adapter (4.11.0) [condition: prometheus-adapter.enabled]", + "humanAnswer": "Looking at the \"All Charts\" table in `reference/helm/index.md`, the reltio-prometheus chart entry shows:\n\n**Dependencies:**\n- `kube-prometheus-stack`\n- `prometheus-adapter`", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 4, + "actionability": 3, + "completeness": 2, + "findability": 4, + "notes": "Reader correctly identified dependency chart names from the Helm index table, but documentation lacks version constraints (69.4.1, 4.11.0) and conditional flags (kube-prometheus-stack.enabled, prometheus-adapter.enabled). Engineer can identify what depends on what, but cannot determine version requirements or conditional deployment logic without additional sources. Information is in expected location but table format appears incomplete for production use." + }, + "composite": 65, + "notFound": false + }, + { + "id": "contract-001", + "category": "contract", + "difficulty": "medium", + "question": "Which secrets or ConfigMaps are shared across multiple Helm charts? List each with the charts that use it.", + "groundTruth": "logdna-agent-key: cluster\nlogdna-agent-tags: cluster\nvault-secret: ai-app, cleanse-app, pms-app, rdm-app, shared-app\nvault-secret-rdm-tasks: ai-app, cleanse-app, pms-app, rdm-app, shared-app", + "humanAnswer": "Based on the \"Shared Secrets & ConfigMaps\" section in the documentation:\n\n**Secrets/ConfigMaps shared across multiple charts:**\n\n- `vault-secret` — ai-app, cleanse-app, pms-app, rdm-app, shared-app\n- `vault-secret-rdm-tasks` — ai-app, cleanse-app, pms-app, rdm-app, shared-app\n\n**Single-chart secrets (listed but not multi-chart):**\n\n- `logdna-agent-key` — cluster\n- `logdna-agent-tags` — cluster\n\nThe vault secrets are the only ones genuinely shared across multiple distinct Helm charts. They're used by the same five charts in a dependency chain: ai-app → cleanse-app → pms-app → rdm-app → shared-app.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader extracted all four secrets correctly, distinguished shared vs. single-chart usage, and identified the exact chart dependencies. Documentation was sufficiently clear that the reader added interpretive value (dependency chain analysis). All information present and in expected location." + }, + "composite": 100, + "notFound": false + }, + { + "id": "config-mdm-app-domain", + "category": "configuration", + "difficulty": "easy", + "question": "What is the default value of \"domain\" in the mdm-app chart?", + "groundTruth": "cloud.reltio.com", + "humanAnswer": "\n\n`cloud.reltio.com`", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-charts-mdm-app.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader successfully located the exact default value in the mdm-app chart documentation. The correct answer was found in the expected location (app-common-charts-mdm-app.md), indicating the information is clearly presented, complete, and naturally discoverable for engineers seeking Helm chart configuration defaults." + }, + "composite": 100, + "notFound": false + }, + { + "id": "config-ai-app-domain", + "category": "configuration", + "difficulty": "easy", + "question": "What is the default value of \"domain\" in the ai-app chart?", + "groundTruth": "cloud.reltio.com", + "humanAnswer": "`cloud.reltio.com`", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-charts-ai-app.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader successfully located the correct default value. The ai-app chart reference file is the logical location for Helm chart defaults, and the information was presented clearly enough for accurate extraction." + }, + "composite": 100, + "notFound": false + }, + { + "id": "config-elasticsearch-domain", + "category": "configuration", + "difficulty": "easy", + "question": "What is the default value of \"domain\" in the elasticsearch chart?", + "groundTruth": "etalon.reltio.com", + "humanAnswer": "NOT_FOUND\n\nThe elasticsearch chart documentation lists `domain` as a configuration key with type `unknown` and indicates a default exists (marked as \"yes\"), but the actual default value is not specified in the provided documentation.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-state-cluster-elasticsearch.md", + "reference/helm/charts/app-common-charts-elasticsearch.md" + ], + "score": { + "clarity": 0, + "actionability": 0, + "completeness": 0, + "findability": 0, + "notes": "Documentation indicates a default value exists for 'domain' but fails to specify the actual value (etalon.reltio.com). Reader reached the correct chart documentation but found incomplete information, making the answer unfindable despite the right location being consulted." + }, + "composite": 0, + "notFound": true + }, + { + "id": "config-hazelcast-domain", + "category": "configuration", + "difficulty": "easy", + "question": "What is the default value of \"domain\" in the hazelcast chart?", + "groundTruth": "etalon.reltio.com", + "humanAnswer": "`internal.reltio.com`", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-state-cluster-hazelcast.md", + "reference/helm/charts/app-common-charts-hazelcast.md" + ], + "score": { + "clarity": 2, + "actionability": 1, + "completeness": 1, + "findability": 3, + "notes": "Reader located documentation in appropriate files but extracted incorrect default value (internal.reltio.com vs. etalon.reltio.com). Suggests docs either presented wrong default clearly, contained conflicting values, or lacked explicit default designation. Information was findable but inaccurate, making it actionable in the wrong direction—worse than missing entirely." + }, + "composite": 35, + "notFound": false + }, + { + "id": "config-redis-domain", + "category": "configuration", + "difficulty": "easy", + "question": "What is the default value of \"domain\" in the redis chart?", + "groundTruth": "internal.reltio.com", + "humanAnswer": "`internal.reltio.com`", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-charts-redis.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader located correct answer in appropriate chart reference documentation. The redis chart file is the logical location for this configuration value. Information was sufficiently clear to extract the exact default value without ambiguity." + }, + "composite": 100, + "notFound": false + }, + { + "id": "interaction-001", + "category": "interaction", + "difficulty": "medium", + "question": "Which Helm charts reference Kubernetes services from other components? List each chart and the service it calls.", + "groundTruth": "hazelcast (app-common/apps-provider/addons/charts/state-apps/hz) → kubernetes\nhazelcast (app-common/charts/hazelcast) → kubernetes\ngw-metrics (app-common/charts/otel-collector/charts/gw-metrics) → argo-rollouts-metrics\nredis (app-common/charts/redis) → headless", + "humanAnswer": "\n\nBased on the \"Service-to-Service References\" section in the documentation:\n\n- **hazelcast** → `kubernetes`\n- **gw-metrics** → `argo-rollouts-metrics`\n- **redis** → `headless`\n\nNote: hazelcast is listed twice in the table calling the `kubernetes` service, suggesting it may reference this service in multiple contexts or configurations.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader extracted all ground truth entries accurately. Documentation presented service references in a structured table format that enabled immediate comprehension. Reader's presentation choice (noting duplication separately) is reasonable and doesn't indicate documentation deficiency. All information was present and accessible in expected locations." + }, + "composite": 100, + "notFound": false + }, + { + "id": "interaction-002", + "category": "interaction", + "difficulty": "hard", + "question": "Which network ports are used by multiple Helm charts? List each port and the charts that expose it.", + "groundTruth": "Port 443: cluster, twistlock-defender, eck-operator, twistlock\nPort 8889: gw-metrics, gw-traces, kube-state-metrics\nPort 9200: eck-operator, elasticsearch\nPort 13133: agent, gw-metrics, gw-traces, k8s-metrics, prom-scraper, elasticsearch", + "humanAnswer": "Based on the \"Port Allocation (shared)\" section in the Helm Charts documentation:\n\n- **443**: cluster, twistlock-defender, eck-operator, twistlock\n- **8889**: gw-metrics, gw-traces, kube-state-metrics\n- **9200**: eck-operator, elasticsearch\n- **13133**: agent, gw-metrics, gw-traces, k8s-metrics, prom-scraper, elasticsearch", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader extracted exact answer from dedicated 'Port Allocation (shared)' section with perfect accuracy. Information presented in immediately scannable format. Helm charts documentation is the natural location for this query. Engineer can act on this directly for network configuration." + }, + "composite": 100, + "notFound": false + }, + { + "id": "resource-001", + "category": "resource", + "difficulty": "medium", + "question": "What are the 10 most common Kubernetes resource types generated across all Foxtrot Helm charts?", + "groundTruth": "ClusterRole: 51\nServiceAccount: 49\nSecret: 43\nExternalSecret: 38\nService: 36\nRole: 30\nClusterRoleBinding: 29\nDeployment: 29\nConfigMap: 25\nRoleBinding: 25", + "humanAnswer": "1. ClusterRole (51)\n2. ServiceAccount (49)\n3. Secret (43)\n4. ExternalSecret (38)\n5. Service (36)\n6. Role (30)\n7. ClusterRoleBinding (29)\n8. Deployment (29)\n9. ConfigMap (25)\n10. RoleBinding (25)", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md" + ], + "score": { + "clarity": 5, + "actionability": 3, + "completeness": 5, + "findability": 4, + "notes": "Reader extracted all 10 resource types with exact counts, indicating documentation presented the data clearly and completely. Reference paths are logical but somewhat generic; more specific naming (e.g., 'helm/resource-distribution.md') would improve discoverability. Actionability limited because this is reference data rather than operational guidance—engineers get the 'what' but not the 'why' or 'how to use this information.'" + }, + "composite": 85, + "notFound": false + }, + { + "id": "resource-mdm-app-001", + "category": "resource", + "difficulty": "medium", + "question": "What Kubernetes resource types does the mdm-app chart generate? List each type and count.", + "groundTruth": "Secret: 5\nJob: 4\nDeployment: 2\nPersistentVolumeClaim: 2\nService: 2\nExternalSecret: 2\nHorizontalPodAutoscaler: 1\nRollout: 1\nPersistentVolume: 1\nPodDisruptionBudget: 1\nServiceAccount: 1", + "humanAnswer": "Based on the \"Resources Generated\" section of the mdm-app chart documentation:\n\n- **HorizontalPodAutoscaler**: 1\n- **Deployment**: 2\n- **Rollout**: 1\n- **Job**: 4\n- **PersistentVolume**: 1\n- **PersistentVolumeClaim**: 2\n- **Service**: 2\n- **PodDisruptionBudget**: 1\n- **Secret**: 5\n- **ExternalSecret**: 2\n- **ServiceAccount**: 1\n\n**Total: 11 resource types, 22 resources**", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-charts-mdm-app.md" + ], + "score": { + "clarity": 5, + "actionability": 4, + "completeness": 5, + "findability": 5, + "notes": "Reader extracted complete answer with all 11 resource types and correct counts. Information was presented in a dedicated 'Resources Generated' section, making it immediately obvious. The mdm-app chart documentation file path is the natural location for this information. Actionability is high for understanding deployment scope but lacks configuration details for implementation." + }, + "composite": 95, + "notFound": false + }, + { + "id": "resource-cassandra-001", + "category": "resource", + "difficulty": "medium", + "question": "What Kubernetes resource types does the cassandra chart generate? List each type and count.", + "groundTruth": "Service: 2\nStatefulSet: 1\nExternalSecret: 1\nClusterSecretStore: 1\nConfigMap: 1\nSecret: 1\nServiceAccount: 1\nRole: 1\nRoleBinding: 1\nClusterRole: 1\nClusterRoleBinding: 1\nCronJob: 1\nJob: 1", + "humanAnswer": "Based on the \"Resources Generated\" section in the cassandra chart documentation:\n\n- **Service**: 2\n- **StatefulSet**: 1\n- **ExternalSecret**: 1\n- **ClusterSecretStore**: 1\n- **ConfigMap**: 1\n- **Secret**: 1\n- **ServiceAccount**: 1\n- **Role**: 1\n- **RoleBinding**: 1\n- **ClusterRole**: 1\n- **ClusterRoleBinding**: 1\n- **CronJob**: 1\n- **Job**: 1", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-state-cluster-cassandra.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader extracted all 13 resource types with exact counts from a dedicated 'Resources Generated' section. Documentation presented information in structured, unambiguous format. Cassandra chart documentation file path aligns with expected location for helm chart references." + }, + "composite": 100, + "notFound": false + }, + { + "id": "resource-jenkins-001", + "category": "resource", + "difficulty": "medium", + "question": "What Kubernetes resource types does the jenkins chart generate? List each type and count.", + "groundTruth": "ConfigMap: 5\nService: 3\nSecret: 3\nIngress: 2\nServiceAccount: 2\nPersistentVolumeClaim: 1\nSecurityGroupPolicy: 1\nPrometheusRule: 1\nBackendConfig: 1\nNetworkPolicy: 1\nPodDisruptionBudget: 1\nPodMonitoring: 1\nRoute: 1\nServiceMonitor: 1\nStatefulSet: 1\nRole: 1\nRoleBinding: 1\nSecretClaim: 1\nPod: 1", + "humanAnswer": "Based on the Jenkins chart documentation, here are the Kubernetes resource types it generates:\n\n| Resource Type | Count |\n|---|---|\n| ConfigMap | 5 |\n| Service | 3 |\n| Secret | 3 |\n| Ingress | 2 |\n| ServiceAccount | 2 |\n| PersistentVolumeClaim | 1 |\n| SecurityGroupPolicy | 1 |\n| PrometheusRule | 1 |\n| BackendConfig | 1 |\n| NetworkPolicy | 1 |\n| PodDisruptionBudget | 1 |\n| PodMonitoring | 1 |\n| Route | 1 |\n| ServiceMonitor | 1 |\n| StatefulSet | 1 |\n| Role | 1 |\n| RoleBinding | 1 |\n| SecretClaim | 1 |\n| Pod | 1 |\n\n**Total: 29 resources** (which matches the resource count listed in the chart index)", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-charts-jenkins.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Documentation provided complete resource inventory with all 19 types and accurate counts. Reader extracted information into clear tabular format. File structure (reference/helm/charts/app-common-charts-jenkins.md) places content exactly where engineers expect Jenkins chart details. Information is immediately actionable for infrastructure planning and RBAC configuration." + }, + "composite": 100, + "notFound": false + }, + { + "id": "cross-sub-001", + "category": "cross-subsystem", + "difficulty": "hard", + "question": "Which subsystems depend on app-tools, and how heavily (by call+import count)?", + "groundTruth": "ipam-tools: 6\ncompute-tools: 4\napp-common: 1", + "humanAnswer": "Based on the Cross-Subsystem Dependencies table in system-architecture.md:\n\n**Subsystems depending on app-tools:**\n\n1. **ipam-tools** — 6 calls + 0 imports = **6 total**\n2. **compute-tools** — 4 calls + 0 imports = **4 total**\n3. **app-common** — 1 call + 0 imports = **1 total**", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 5, + "actionability": 4, + "completeness": 5, + "findability": 4, + "notes": "Reader extracted exact dependency counts from a well-structured Cross-Subsystem Dependencies table. Information is precise and unambiguous. Actionability is strong for dependency analysis but lacks context on implications. All three dependent subsystems identified with correct metrics. Findability is good (system-architecture.md is intuitive) but reader consulted 11 files, suggesting dependencies could be more prominently surfaced or cross-referenced." + }, + "composite": 90, + "notFound": false + }, + { + "id": "cross-sub-002", + "category": "cross-subsystem", + "difficulty": "easy", + "question": "Which subsystems are identified as cross-cutting concerns?", + "groundTruth": "root", + "humanAnswer": "Based on the documentation, specifically the system architecture summary and the subsystems table:\n\n**root**\n\nThis is the only subsystem identified with `Kind: cross-cutting`. All other subsystems are classified as `domain` subsystems. The root subsystem serves as a cross-cutting concern providing foundational types, constants, and configuration that propagate across the application's layered architecture.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/control-core.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/network-core.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 5, + "actionability": 4, + "completeness": 5, + "findability": 5, + "notes": "Reader correctly identified root as the sole cross-cutting concern subsystem and articulated its foundational role. Documentation clearly distinguished Kind classifications across subsystems. Information was located in expected architectural reference files. Actionability is strong for architectural understanding, though implementation guidance could be more prescriptive." + }, + "composite": 95, + "notFound": false + }, + { + "id": "arch-001", + "category": "architectural", + "difficulty": "hard", + "question": "The following subsystems have 0 detected functions and 0 modules: account-common, network-common, network-core, root. Why might this be the case, and what do they actually contain?", + "groundTruth": "These subsystems primarily contain Helm charts with Go-templated YAML, Terraform HCL, and Crossplane compositions. The code analysis pipeline detects functions/modules from Python, Go, TypeScript, and shell scripts — but Helm templates use Go template syntax ({{ }}) which doesn't produce traditional function/module entities. Their content is captured through the Helm chart extraction phase instead.", + "humanAnswer": "\n\nThese subsystems contain **data structures, constants, configurations, and schemas** rather than executable functions, which explains the 0 function and 0 module counts.\n\n**account-common** holds foundational domain data structures and constants for account functionality—it's a reference layer with no executable logic, serving as a single source of truth for account-related data contracts.\n\n**network-common** and **network-core** establish shared constants, types, schemas, and configuration across the network domain. The docs explicitly describe them as \"configuration-driven or data-centric design rather than procedural logic,\" housing protocol definitions, message schemas, and network topology models rather than functions.\n\n**root** provides foundational types, constants, and configuration that propagate across the entire architecture—it's a cross-cutting reference layer imported by all subsystems for domain-agnostic primitives.\n\nAll four are **reference and configuration layers** rather than implementation layers. They're consumed through imports and data contracts (visible in the Helm Values and file listings showing YAML configs, Terraform files, and schema definitions) rather than through function calls. This is why the function and module counts are zero—they're not procedural subsystems but foundational data and configuration subsystems that establish the contracts and constants other domains depend on.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/control-core.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/network-core.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 2, + "actionability": 1, + "completeness": 1, + "findability": 2, + "notes": "Reader inferred 'configurations and schemas' but ground truth specifies Helm charts, Terraform HCL, and Crossplane compositions. Docs lack explicit technology/format declarations. Reader understood the 'why' (no functions detected) but missed the 'what' (specific IaC formats). An engineer acting on this answer wouldn't know to examine Helm templating syntax or Terraform structure. Docs should explicitly state: 'account-common contains Helm charts with Go-templated YAML and Terraform configurations' rather than requiring inference from context clues." + }, + "composite": 30, + "notFound": false + }, + { + "id": "arch-002", + "category": "architectural", + "difficulty": "easy", + "question": "What is the current version and appVersion of the mdm-app Helm chart?", + "groundTruth": "version: 0.7.4, appVersion: 2025.12-12-develop", + "humanAnswer": "**Version:** 0.7.4 \n**App Version:** 2025.12-12-develop", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-charts-mdm-app.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader extracted exact version values with clear formatting. Information is immediately usable for deployment. Helm chart versions are logically located in the dedicated chart reference file. No gaps in documentation coverage." + }, + "composite": 100, + "notFound": false + }, + { + "id": "scenario-001", + "category": "scenario", + "difficulty": "hard", + "question": "If you need to rotate the \"vault-secret\" shared secret, which Helm charts would be affected and need redeployment?", + "groundTruth": "ai-app, cleanse-app, pms-app, rdm-app, shared-app", + "humanAnswer": "Based on the \"Shared Secrets & ConfigMaps\" section in the Helm documentation, the following charts use the `vault-secret` and would be affected by rotation:\n\n- ai-app\n- cleanse-app\n- pms-app\n- rdm-app\n- shared-app\n\nThese five charts would require redeployment to pick up the rotated secret.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md" + ], + "score": { + "clarity": 5, + "actionability": 5, + "completeness": 5, + "findability": 5, + "notes": "Reader extracted complete answer with exact chart list matching ground truth. Cited specific section ('Shared Secrets & ConfigMaps') indicating information was clearly presented. Helm documentation is the natural location for this operational concern. Answer provides actionable deployment guidance." + }, + "composite": 100, + "notFound": false + }, + { + "id": "scenario-002", + "category": "scenario", + "difficulty": "hard", + "question": "A new engineer needs to deploy the MDM application stack. What charts, configuration values, and external dependencies should they understand first?", + "groundTruth": "Chart: mdm-app (app-common/charts/mdm-app), Version: 0.7.4\nKey values: clusterName, environmentType, arch, cloudProvider, region, vaultEnv, imagePullPolicy, clusterDomain, terminationGracePeriodSeconds, etalonsubDomain (+28 more)\nResources generated: 22 K8s resources\nInteractions: none detected", + "humanAnswer": "To deploy the MDM application stack, focus on these key elements:\n\n**Primary Chart:**\n- **mdm-app** (`app-common/charts/mdm-app`)\n - Version: 0.7.4\n - AppVersion: 2025.12-12-develop\n - Deploys 22 Kubernetes resources\n - Exposes 38 configuration value keys\n\n**External Dependencies:**\nNone declared. The chart stands independently without requiring other charts as prerequisites.\n\n**Configuration Context:**\nThe mdm-app chart belongs to the app-common subsystem, which imports from root (the cross-cutting concerns layer). This means your deployment inherits foundational types and configuration from the root subsystem, but you don't need to explicitly manage those—they propagate automatically.\n\n**Integration Pattern:**\nUnlike some other app charts (ai-app, cleanse-app, pms-app, rdm-app, shared-app), mdm-app does not share vault secrets or interact with other application charts. It operates as an isolated deployment unit.\n\n**What You Need:**\n1. The 38 values keys specific to mdm-app configuration\n2. Understanding of the 22 Kubernetes resources it provisions\n3. Familiarity with app-common subsystem patterns (since mdm-app inherits from there)\n\nThe detailed values schema and resource specifications aren't included in these reference docs—you'll need to consult the actual chart values.yaml file at `app-common/charts/mdm-app/values.yaml` for the specific configuration parameters.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/subsystems/account-common.md", + "reference/subsystems/app-tools.md", + "reference/subsystems/compute-common.md", + "reference/subsystems/compute-tools.md", + "reference/subsystems/ipam-core.md", + "reference/subsystems/ipam-tools.md", + "reference/subsystems/network-common.md", + "reference/subsystems/root.md" + ], + "score": { + "clarity": 3, + "actionability": 2, + "completeness": 2, + "findability": 2, + "notes": "Reader synthesized accurate answer from 11 scattered docs, but source material lacks consolidation. Docs identify the chart and resource count correctly, but omit the 30+ specific configuration keys needed for actual deployment. Engineer must still locate and parse values.yaml independently. No dedicated deployment guide or mdm-app-specific reference exists; information distributed across subsystem architecture docs rather than deployment-focused documentation." + }, + "composite": 45, + "notFound": false + }, + { + "id": "scenario-003", + "category": "scenario", + "difficulty": "medium", + "question": "What network ports does the Cassandra chart expose, and what are they typically used for?", + "groundTruth": "Ports: 7000, 7001, 7199, 9042, 9160\n7000: Cassandra inter-node cluster communication\n7001: Cassandra SSL inter-node\n7199: JMX monitoring\n9042: CQL native transport (client connections)\n9160: Thrift client API (legacy)", + "humanAnswer": "The Cassandra chart exposes five network ports:\n\n- **7000** — inter-node communication\n- **7001** — inter-node communication (TLS)\n- **7199** — JMX monitoring\n- **9042** — native protocol (client connections)\n- **9160** — Thrift protocol (legacy client connections)\n\nThese are defined in the StatefulSet template. The documentation lists the ports but doesn't elaborate on their specific purposes beyond what the port numbers themselves indicate. For detailed usage context, you'd need to reference Cassandra's own documentation or the StatefulSet template directly.", + "docsUsed": [ + "reference/system-architecture.md", + "reference/helm/index.md", + "reference/helm/charts/app-common-apps-provider-addons-charts-state-cluster-cassandra.md" + ], + "score": { + "clarity": 4, + "actionability": 4, + "completeness": 5, + "findability": 5, + "notes": "All five ports correctly identified with accurate purposes. Information was present and locatable in the expected Cassandra chart documentation. Clarity is strong for port mappings themselves, but docs lack operational depth—reader needed external references for usage context. Completeness is full for the factual port data. Findability is optimal; Cassandra chart docs are the natural location for this information." + }, + "composite": 90, + "notFound": false + } + ] +} \ No newline at end of file