Testing
Guide to testing Triage Warden.
Test Structure
triage-warden/
├── crates/
│ ├── tw-api/src/
│ │ └── tests/ # API integration tests
│ ├── tw-core/src/
│ │ └── tests/ # Core unit tests
│ └── tw-actions/src/
│ └── tests/ # Action handler tests
└── python/
└── tests/ # Python tests
Running Tests
All Tests
# Rust
cargo test
# Python
cd python && uv run pytest
# Everything
./scripts/test-all.sh
Specific Tests
# Single crate
cargo test -p tw-api
# Single test
cargo test test_incident_creation
# Pattern match
cargo test incident
# With output
cargo test -- --nocapture
Unit Tests
Rust Unit Tests
#![allow(unused)] fn main() { #[cfg(test)] mod tests { use super::*; #[test] fn test_incident_creation() { let incident = Incident::new( IncidentType::Phishing, Severity::High, ); assert_eq!(incident.status, IncidentStatus::Open); } #[tokio::test] async fn test_async_operation() { let result = async_function().await; assert!(result.is_ok()); } } }
Python Unit Tests
import pytest
from tw_ai.agents import TriageAgent
def test_agent_creation():
agent = TriageAgent()
assert agent.model == "claude-sonnet-4-20250514"
@pytest.mark.asyncio
async def test_triage():
agent = TriageAgent()
verdict = await agent.triage(mock_incident)
assert verdict.classification in ["malicious", "benign"]
Integration Tests
API Integration Tests
#![allow(unused)] fn main() { #[tokio::test] async fn test_incident_api() { let app = create_test_app().await; // Create incident let response = app .oneshot( Request::builder() .method("POST") .uri("/api/incidents") .header("Content-Type", "application/json") .body(Body::from(r#"{"type":"phishing"}"#)) .unwrap(), ) .await .unwrap(); assert_eq!(response.status(), StatusCode::CREATED); } }
Database Tests
#![allow(unused)] fn main() { #[tokio::test] async fn test_repository() { // Use in-memory SQLite let pool = create_test_pool().await; let repo = SqliteIncidentRepository::new(pool); let incident = repo.create(&new_incident).await.unwrap(); let found = repo.get(incident.id).await.unwrap(); assert_eq!(found.unwrap().id, incident.id); } }
Test Fixtures
Rust Fixtures
#![allow(unused)] fn main() { // tests/fixtures.rs pub fn mock_incident() -> Incident { Incident { id: Uuid::new_v4(), incident_type: IncidentType::Phishing, severity: Severity::High, status: IncidentStatus::Open, raw_data: json!({"subject": "Test"}), ..Default::default() } } }
Python Fixtures
# tests/conftest.py
import pytest
@pytest.fixture
def mock_incident():
return {
"id": "test-123",
"type": "phishing",
"severity": "high",
"raw_data": {"subject": "Test Email"}
}
@pytest.fixture
def mock_connector():
return MockThreatIntelConnector()
Mocking
Rust Mocking
#![allow(unused)] fn main() { use mockall::mock; mock! { ThreatIntelConnector {} #[async_trait] impl ThreatIntelConnector for ThreatIntelConnector { async fn lookup_hash(&self, hash: &str) -> ConnectorResult<ThreatReport>; } } #[tokio::test] async fn test_with_mock() { let mut mock = MockThreatIntelConnector::new(); mock.expect_lookup_hash() .returning(|_| Ok(ThreatReport::clean())); let result = function_using_connector(&mock).await; assert!(result.is_ok()); } }
Python Mocking
from unittest.mock import AsyncMock, patch
@pytest.mark.asyncio
async def test_with_mock():
with patch("tw_ai.agents.tools.lookup_hash") as mock:
mock.return_value = {"malicious": False}
agent = TriageAgent()
verdict = await agent.triage(mock_incident)
mock.assert_called_once()
Test Coverage
Rust Coverage
cargo install cargo-tarpaulin
cargo tarpaulin --out Html
Python Coverage
cd python
uv run pytest --cov=tw_ai --cov-report=html
CI Testing
GitHub Actions runs tests on every PR:
# .github/workflows/test.yml
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- run: cargo test
- run: cargo clippy -- -D warnings
Test Data
Evaluation Test Cases
Test cases for AI triage evaluation:
# python/tw_ai/evaluation/test_cases/phishing.yaml
- name: obvious_phishing
input:
sender: "security@fake-bank.com"
subject: "Urgent: Verify Account"
urls: ["https://phishing-site.com/login"]
auth_results: {spf: fail, dkim: fail}
expected:
classification: malicious
min_confidence: 0.8
Run evaluation:
cd python
uv run pytest tests/test_evaluation.py