Scaffold dd0c/drift Go agent: CLI, scanner, scrubber, reporter, models

- cobra CLI: check (one-shot), watch (SQS consumer), version - models: DriftReport, DriftedResource, severity classification (critical/high/medium/low) - scanner: Terraform v4 state parser, resource counter - scrubber: regex + Shannon entropy secret detection (strict/permissive/off modes) - reporter: mTLS HTTP client with nonce replay prevention - tests: severity classification (8 cases), scrubber (AWS keys, RSA, entropy, attributes)
2026-03-01 02:42:53 +00:00
parent e626608535
commit 31cb36fb77
8 changed files with 666 additions and 0 deletions
--- a/products/02-iac-drift-detection/agent/internal/scanner/scanner.go
+++ b/products/02-iac-drift-detection/agent/internal/scanner/scanner.go
@@ -0,0 +1,101 @@
+package scanner
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"time"
+
+	"github.com/dd0c/drift-agent/pkg/models"
+)
+
+// Scanner reads Terraform state and computes drift diffs.
+type Scanner struct {
+	scrubber *Scrubber
+}
+
+func New(scrubber *Scrubber) *Scanner {
+	return &Scanner{scrubber: scrubber}
+}
+
+// TerraformState represents the v4 state file format.
+type TerraformState struct {
+	Version          int              `json:"version"`
+	TerraformVersion string           `json:"terraform_version"`
+	Serial           int64            `json:"serial"`
+	Lineage          string           `json:"lineage"`
+	Resources        []StateResource  `json:"resources"`
+}
+
+type StateResource struct {
+	Module    string           `json:"module,omitempty"`
+	Mode      string           `json:"mode"`     // "managed" or "data"
+	Type      string           `json:"type"`
+	Name      string           `json:"name"`
+	Provider  string           `json:"provider"`
+	Instances []StateInstance  `json:"instances"`
+}
+
+type StateInstance struct {
+	SchemaVersion int                    `json:"schema_version"`
+	Attributes    map[string]interface{} `json:"attributes"`
+	Private       string                 `json:"private,omitempty"`
+}
+
+// ScanResult is the output of a drift scan.
+type ScanResult struct {
+	StackName    string              `json:"stack_name"`
+	ScannedAt    time.Time           `json:"scanned_at"`
+	StateSerial  int64               `json:"state_serial"`
+	Lineage      string              `json:"lineage"`
+	TotalResources int               `json:"total_resources"`
+	DriftedResources []models.DriftedResource `json:"drifted_resources"`
+}
+
+// ScanFromFile reads a local tfstate file and produces a ScanResult.
+func (s *Scanner) ScanFromFile(path string, stackName string) (*ScanResult, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("read state file: %w", err)
+	}
+
+	var state TerraformState
+	if err := json.Unmarshal(data, &state); err != nil {
+		return nil, fmt.Errorf("parse state file: %w", err)
+	}
+
+	if state.Version != 4 {
+		return nil, fmt.Errorf("unsupported state version %d (expected 4)", state.Version)
+	}
+
+	result := &ScanResult{
+		StackName:    stackName,
+		ScannedAt:    time.Now().UTC(),
+		StateSerial:  state.Serial,
+		Lineage:      state.Lineage,
+		TotalResources: countResources(&state),
+		DriftedResources: []models.DriftedResource{},
+	}
+
+	// TODO: Compare state attributes against live cloud resources
+	// For V1, we compare against the previous state snapshot stored in SaaS
+
+	// Scrub sensitive values from the result
+	if s.scrubber != nil {
+		for i := range result.DriftedResources {
+			result.DriftedResources[i] = s.scrubber.ScrubResource(result.DriftedResources[i])
+		}
+	}
+
+	return result, nil
+}
+
+func countResources(state *TerraformState) int {
+	count := 0
+	for _, r := range state.Resources {
+		if r.Mode == "managed" {
+			count += len(r.Instances)
+		}
+	}
+	return count
+}
--- a/products/02-iac-drift-detection/agent/internal/scanner/scrubber.go
+++ b/products/02-iac-drift-detection/agent/internal/scanner/scrubber.go
@@ -0,0 +1,112 @@
+package scanner
+
+import (
+	"math"
+	"regexp"
+	"strings"
+
+	"github.com/dd0c/drift-agent/pkg/models"
+)
+
+// Scrubber removes sensitive values from drift diffs before transmission.
+type Scrubber struct {
+	mode     string // "strict", "permissive", "off"
+	patterns []*regexp.Regexp
+}
+
+func NewScrubber(mode string) *Scrubber {
+	s := &Scrubber{mode: mode}
+	if mode != "off" {
+		s.patterns = []*regexp.Regexp{
+			// AWS keys
+			regexp.MustCompile(`(?i)(AKIA[0-9A-Z]{16})`),
+			regexp.MustCompile(`(?i)(aws_secret_access_key|secret_key)\s*=\s*\S+`),
+			// Generic secrets
+			regexp.MustCompile(`(?i)(password|secret|token|api_key|private_key)\s*=\s*\S+`),
+			// RSA/PEM keys
+			regexp.MustCompile(`-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----`),
+			// Base64 blobs > 40 chars (likely encoded secrets)
+			regexp.MustCompile(`[A-Za-z0-9+/]{40,}={0,2}`),
+		}
+	}
+	return s
+}
+
+// ScrubResource redacts sensitive attribute values from a drifted resource.
+func (s *Scrubber) ScrubResource(r models.DriftedResource) models.DriftedResource {
+	if s.mode == "off" {
+		return r
+	}
+
+	scrubbed := r
+	for i, diff := range scrubbed.Diffs {
+		scrubbed.Diffs[i].OldValue = s.scrubValue(diff.AttributeName, diff.OldValue)
+		scrubbed.Diffs[i].NewValue = s.scrubValue(diff.AttributeName, diff.NewValue)
+	}
+	return scrubbed
+}
+
+// ScrubString redacts sensitive patterns from an arbitrary string.
+func (s *Scrubber) ScrubString(input string) string {
+	if s.mode == "off" {
+		return input
+	}
+
+	result := input
+	for _, p := range s.patterns {
+		result = p.ReplaceAllString(result, "[REDACTED]")
+	}
+
+	// Shannon entropy check for high-entropy strings (likely tokens/keys)
+	if s.mode == "strict" {
+		result = s.entropyRedact(result)
+	}
+
+	return result
+}
+
+func (s *Scrubber) scrubValue(attrName, value string) string {
+	// Always redact known sensitive attribute names
+	sensitiveAttrs := []string{
+		"password", "secret", "token", "api_key", "private_key",
+		"access_key", "secret_key", "connection_string", "certificate",
+	}
+	lower := strings.ToLower(attrName)
+	for _, sa := range sensitiveAttrs {
+		if strings.Contains(lower, sa) {
+			return "[REDACTED]"
+		}
+	}
+
+	return s.ScrubString(value)
+}
+
+// entropyRedact checks for high-entropy substrings (Shannon entropy > 3.5 bits/char).
+func (s *Scrubber) entropyRedact(input string) string {
+	words := strings.Fields(input)
+	for i, word := range words {
+		if len(word) > 20 && shannonEntropy(word) > 3.5 {
+			words[i] = "[HIGH_ENTROPY_REDACTED]"
+		}
+	}
+	return strings.Join(words, " ")
+}
+
+func shannonEntropy(s string) float64 {
+	if len(s) == 0 {
+		return 0
+	}
+	freq := make(map[rune]float64)
+	for _, c := range s {
+		freq[c]++
+	}
+	length := float64(len([]rune(s)))
+	entropy := 0.0
+	for _, count := range freq {
+		p := count / length
+		if p > 0 {
+			entropy -= p * math.Log2(p)
+		}
+	}
+	return entropy
+}
--- a/products/02-iac-drift-detection/agent/internal/scanner/scrubber_test.go
+++ b/products/02-iac-drift-detection/agent/internal/scanner/scrubber_test.go
@@ -0,0 +1,114 @@
+package scanner
+
+import (
+	"testing"
+
+	"github.com/dd0c/drift-agent/pkg/models"
+)
+
+func TestScrubber_RedactsAWSAccessKey(t *testing.T) {
+	s := NewScrubber("strict")
+	input := `{"access_key": "AKIAIOSFODNN7EXAMPLE"}`
+	result := s.ScrubString(input)
+	if containsStr(result, "AKIAIOSFODNN7EXAMPLE") {
+		t.Fatalf("AWS key not redacted: %s", result)
+	}
+}
+
+func TestScrubber_RedactsRSAPrivateKey(t *testing.T) {
+	s := NewScrubber("strict")
+	input := `-----BEGIN RSA PRIVATE KEY-----
+MIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn
+-----END RSA PRIVATE KEY-----`
+	result := s.ScrubString(input)
+	if containsStr(result, "MIIEpAIBAAKCAQEA") {
+		t.Fatalf("RSA key not redacted: %s", result)
+	}
+}
+
+func TestScrubber_HighEntropyStringRedacted(t *testing.T) {
+	s := NewScrubber("strict")
+	// 40-char hex string — looks like a custom API token
+	token := "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"
+	result := s.ScrubString(token)
+	if containsStr(result, token) {
+		t.Fatalf("High-entropy token not redacted: %s", result)
+	}
+}
+
+func TestScrubber_NormalTextNotRedacted(t *testing.T) {
+	s := NewScrubber("strict")
+	input := "Hello world this is a normal log message"
+	result := s.ScrubString(input)
+	if result != input {
+		t.Fatalf("Normal text was incorrectly redacted: %s", result)
+	}
+}
+
+func TestScrubber_OffModePassesThrough(t *testing.T) {
+	s := NewScrubber("off")
+	input := "AKIAIOSFODNN7EXAMPLE"
+	result := s.ScrubString(input)
+	if result != input {
+		t.Fatalf("Off mode should not redact: %s", result)
+	}
+}
+
+func TestScrubResource_RedactsSensitiveAttributes(t *testing.T) {
+	s := NewScrubber("strict")
+	r := models.DriftedResource{
+		ResourceType:    "aws_db_instance",
+		ResourceAddress: "aws_db_instance.main",
+		Diffs: []models.AttributeDiff{
+			{AttributeName: "password", OldValue: "secret123", NewValue: "secret456"},
+			{AttributeName: "engine_version", OldValue: "14.1", NewValue: "15.0"},
+		},
+	}
+	scrubbed := s.ScrubResource(r)
+
+	if scrubbed.Diffs[0].OldValue != "[REDACTED]" {
+		t.Fatalf("Password old value not redacted: %s", scrubbed.Diffs[0].OldValue)
+	}
+	if scrubbed.Diffs[0].NewValue != "[REDACTED]" {
+		t.Fatalf("Password new value not redacted: %s", scrubbed.Diffs[0].NewValue)
+	}
+	// Non-sensitive attribute should be preserved
+	if scrubbed.Diffs[1].OldValue != "14.1" {
+		t.Fatalf("Engine version should not be redacted: %s", scrubbed.Diffs[1].OldValue)
+	}
+}
+
+func TestShannonEntropy_HighForRandomString(t *testing.T) {
+	// Random hex string should have high entropy
+	e := shannonEntropy("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0")
+	if e <= 3.5 {
+		t.Fatalf("Expected entropy > 3.5 for random hex, got %f", e)
+	}
+}
+
+func TestShannonEntropy_LowForRepeatedChars(t *testing.T) {
+	e := shannonEntropy("aaaaaaaaaaaaaaaaaaaaaaaaa")
+	if e > 0.1 {
+		t.Fatalf("Expected near-zero entropy for repeated chars, got %f", e)
+	}
+}
+
+func TestShannonEntropy_ZeroForEmpty(t *testing.T) {
+	e := shannonEntropy("")
+	if e != 0 {
+		t.Fatalf("Expected 0 entropy for empty string, got %f", e)
+	}
+}
+
+func containsStr(haystack, needle string) bool {
+	return len(haystack) >= len(needle) && (haystack == needle || len(needle) > 0 && findStr(haystack, needle))
+}
+
+func findStr(s, sub string) bool {
+	for i := 0; i <= len(s)-len(sub); i++ {
+		if s[i:i+len(sub)] == sub {
+			return true
+		}
+	}
+	return false
+}