Add notification dispatchers (P3 Slack/Email/Webhook, P5 Slack), full YAML parser for P6
- P3 alert: NotificationDispatcher with Slack Block Kit, Resend email, generic webhook; severity-gated dispatch
- P5 cost: CostSlackNotifier with anomaly Block Kit (score, deviation, snooze/expected buttons)
- P6 run: Full YAML runbook parser with serde_yaml, variable substitution ({{var}}), failure actions, 7 tests
- P6 parser: validates non-empty steps, default timeout (300s), default abort on failure
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use anyhow::{Context, Result};
|
||||
|
||||
/// Parsed runbook step.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -6,47 +7,217 @@ pub struct RunbookStep {
|
||||
pub index: usize,
|
||||
pub description: String,
|
||||
pub command: String,
|
||||
#[serde(default = "default_timeout")]
|
||||
pub timeout_seconds: u64,
|
||||
#[serde(default)]
|
||||
pub on_failure: FailureAction,
|
||||
pub condition: Option<String>, // Optional: only run if previous step output matches
|
||||
pub condition: Option<String>,
|
||||
}
|
||||
|
||||
fn default_timeout() -> u64 { 300 }
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "action", rename_all = "snake_case")]
|
||||
pub enum FailureAction {
|
||||
Abort,
|
||||
Continue,
|
||||
Retry { max_attempts: u32 },
|
||||
}
|
||||
|
||||
impl Default for FailureAction {
|
||||
fn default() -> Self { FailureAction::Abort }
|
||||
}
|
||||
|
||||
/// Parsed runbook.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Runbook {
|
||||
pub name: String,
|
||||
#[serde(default)]
|
||||
pub description: String,
|
||||
#[serde(default = "default_version")]
|
||||
pub version: String,
|
||||
pub steps: Vec<RunbookStep>,
|
||||
#[serde(default)]
|
||||
pub variables: std::collections::HashMap<String, VariableSpec>,
|
||||
}
|
||||
|
||||
fn default_version() -> String { "0.1.0".into() }
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VariableSpec {
|
||||
pub description: String,
|
||||
pub default: Option<String>,
|
||||
#[serde(default)]
|
||||
pub required: bool,
|
||||
}
|
||||
|
||||
/// Parse a YAML runbook into structured steps.
|
||||
pub fn parse_yaml(content: &str) -> anyhow::Result<Runbook> {
|
||||
// TODO: Full YAML parsing with serde_yaml
|
||||
// For now, return a placeholder
|
||||
Ok(Runbook {
|
||||
name: "placeholder".into(),
|
||||
description: "TODO: implement YAML parser".into(),
|
||||
version: "0.1.0".into(),
|
||||
steps: vec![],
|
||||
})
|
||||
///
|
||||
/// Example YAML:
|
||||
/// ```yaml
|
||||
/// name: restart-service
|
||||
/// description: Restart a stuck ECS service
|
||||
/// variables:
|
||||
/// cluster:
|
||||
/// description: ECS cluster name
|
||||
/// required: true
|
||||
/// service:
|
||||
/// description: ECS service name
|
||||
/// required: true
|
||||
/// steps:
|
||||
/// - description: Check current task count
|
||||
/// command: "aws ecs describe-services --cluster {{cluster}} --services {{service}}"
|
||||
/// timeout_seconds: 30
|
||||
/// - description: Force new deployment
|
||||
/// command: "aws ecs update-service --cluster {{cluster}} --service {{service}} --force-new-deployment"
|
||||
/// timeout_seconds: 60
|
||||
/// on_failure:
|
||||
/// action: abort
|
||||
/// - description: Wait for stable
|
||||
/// command: "aws ecs wait services-stable --cluster {{cluster}} --services {{service}}"
|
||||
/// timeout_seconds: 300
|
||||
/// on_failure:
|
||||
/// action: retry
|
||||
/// max_attempts: 3
|
||||
/// ```
|
||||
pub fn parse_yaml(content: &str) -> Result<Runbook> {
|
||||
let mut runbook: Runbook = serde_yaml::from_str(content)
|
||||
.context("Failed to parse runbook YAML")?;
|
||||
|
||||
// Assign step indices
|
||||
for (i, step) in runbook.steps.iter_mut().enumerate() {
|
||||
step.index = i;
|
||||
}
|
||||
|
||||
// Validate: at least one step
|
||||
if runbook.steps.is_empty() {
|
||||
anyhow::bail!("Runbook must have at least one step");
|
||||
}
|
||||
|
||||
Ok(runbook)
|
||||
}
|
||||
|
||||
/// Substitute variables in a command string.
|
||||
/// Variables use `{{name}}` syntax.
|
||||
pub fn substitute_variables(
|
||||
command: &str,
|
||||
variables: &std::collections::HashMap<String, String>,
|
||||
) -> Result<String> {
|
||||
let mut result = command.to_string();
|
||||
for (key, value) in variables {
|
||||
result = result.replace(&format!("{{{{{}}}}}", key), value);
|
||||
}
|
||||
|
||||
// Check for unresolved variables
|
||||
if result.contains("{{") {
|
||||
let unresolved: Vec<&str> = result
|
||||
.match_indices("{{")
|
||||
.filter_map(|(start, _)| {
|
||||
result[start..].find("}}").map(|end| &result[start..start + end + 2])
|
||||
})
|
||||
.collect();
|
||||
anyhow::bail!("Unresolved variables: {:?}", unresolved);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
const SAMPLE_YAML: &str = r#"
|
||||
name: restart-service
|
||||
description: Restart a stuck ECS service
|
||||
variables:
|
||||
cluster:
|
||||
description: ECS cluster name
|
||||
required: true
|
||||
service:
|
||||
description: ECS service name
|
||||
required: true
|
||||
steps:
|
||||
- description: Check current task count
|
||||
command: "aws ecs describe-services --cluster {{cluster}} --services {{service}}"
|
||||
timeout_seconds: 30
|
||||
- description: Force new deployment
|
||||
command: "aws ecs update-service --cluster {{cluster}} --service {{service}} --force-new-deployment"
|
||||
timeout_seconds: 60
|
||||
on_failure:
|
||||
action: abort
|
||||
- description: Wait for stable
|
||||
command: "aws ecs wait services-stable --cluster {{cluster}} --services {{service}}"
|
||||
timeout_seconds: 300
|
||||
on_failure:
|
||||
action: retry
|
||||
max_attempts: 3
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn test_parse_empty_returns_placeholder() {
|
||||
let result = parse_yaml("").unwrap();
|
||||
assert_eq!(result.name, "placeholder");
|
||||
assert!(result.steps.is_empty());
|
||||
fn test_parse_valid_yaml() {
|
||||
let runbook = parse_yaml(SAMPLE_YAML).unwrap();
|
||||
assert_eq!(runbook.name, "restart-service");
|
||||
assert_eq!(runbook.steps.len(), 3);
|
||||
assert_eq!(runbook.steps[0].index, 0);
|
||||
assert_eq!(runbook.steps[2].timeout_seconds, 300);
|
||||
assert_eq!(runbook.variables.len(), 2);
|
||||
assert!(runbook.variables.get("cluster").unwrap().required);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_empty_steps_fails() {
|
||||
let yaml = "name: empty\nsteps: []";
|
||||
assert!(parse_yaml(yaml).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_invalid_yaml_fails() {
|
||||
assert!(parse_yaml("not: [valid: yaml: {{").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_substitute_variables() {
|
||||
let mut vars = HashMap::new();
|
||||
vars.insert("cluster".into(), "prod".into());
|
||||
vars.insert("service".into(), "api".into());
|
||||
|
||||
let result = substitute_variables(
|
||||
"aws ecs describe-services --cluster {{cluster}} --services {{service}}",
|
||||
&vars,
|
||||
).unwrap();
|
||||
|
||||
assert_eq!(result, "aws ecs describe-services --cluster prod --services api");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unresolved_variable_fails() {
|
||||
let vars = HashMap::new();
|
||||
let result = substitute_variables("echo {{missing}}", &vars);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_failure_action_is_abort() {
|
||||
let yaml = r#"
|
||||
name: simple
|
||||
steps:
|
||||
- description: test
|
||||
command: echo hello
|
||||
"#;
|
||||
let runbook = parse_yaml(yaml).unwrap();
|
||||
assert!(matches!(runbook.steps[0].on_failure, FailureAction::Abort));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_timeout() {
|
||||
let yaml = r#"
|
||||
name: simple
|
||||
steps:
|
||||
- description: test
|
||||
command: echo hello
|
||||
"#;
|
||||
let runbook = parse_yaml(yaml).unwrap();
|
||||
assert_eq!(runbook.steps[0].timeout_seconds, 300);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user