Add event-taxonomy package with canonical schema, adapters, and CLI

Canonical NormalizedFinding schema with Severity enum (CRITICAL/HIGH/MEDIUM/LOW/INFO). Per-project adapters for 9 tools with severity mapping for string labels, int 1-10, float 0-1, Python Enum, and computed properties. CLI pipe interface and registry. Nightshift-Task: event-taxonomy Nightshift-Ref: https://github.com/marcus/nightshift
2026-03-09 21:09:13 +00:00
parent ef0c88d50d
commit a31093822c
35 changed files with 709 additions and 0 deletions
--- a/tests/pycache/test_adapters.cpython-313-pytest-9.0.2.pyc
+++ b/tests/pycache/test_adapters.cpython-313-pytest-9.0.2.pyc
--- a/tests/pycache/test_schema.cpython-313-pytest-9.0.2.pyc
+++ b/tests/pycache/test_schema.cpython-313-pytest-9.0.2.pyc
--- a/tests/pycache/test_severity.cpython-313-pytest-9.0.2.pyc
+++ b/tests/pycache/test_severity.cpython-313-pytest-9.0.2.pyc
--- a/tests/test_adapters.py
+++ b/tests/test_adapters.py
@@ -0,0 +1,145 @@
+from event_taxonomy.adapters import (
+    bus_factor,
+    dep_risk,
+    doc_drift,
+    knowledge_silo,
+    perf_regression,
+    roadmap_entropy,
+    schema_evolution,
+    semantic_diff,
+    test_flakiness,
+)
+from event_taxonomy.schema import Severity
+
+
+def test_bus_factor():
+    f = bus_factor.normalize({
+        "file": "core.py",
+        "top_author": "alice",
+        "top_author_pct": 85.0,
+        "num_contributors": 2,
+        "bus_factor": 1,
+        "risk_label": "CRITICAL",
+    })
+    assert f.severity == Severity.CRITICAL
+    assert f.file == "core.py"
+    assert "alice" in f.message
+
+
+def test_dep_risk():
+    f = dep_risk.normalize({
+        "name": "requests",
+        "ecosystem": "pypi",
+        "version": "2.28.0",
+        "risk_label": "HIGH",
+        "vuln_count": 2,
+        "months_stale": 6,
+    })
+    assert f.severity == Severity.HIGH
+    assert "requests" in f.message
+
+
+def test_doc_drift():
+    f = doc_drift.normalize({
+        "kind": "docstring_param",
+        "severity": "error",
+        "message": "Param x not in signature",
+        "file": "model.py",
+        "lineno": 42,
+        "symbol": "process",
+    })
+    assert f.severity == Severity.HIGH
+    assert f.line == 42
+    assert f.file == "model.py"
+
+
+def test_knowledge_silo():
+    f = knowledge_silo.normalize({
+        "filepath": "auth.py",
+        "total_commits": 50,
+        "dominant_author": "bob",
+        "dominant_commits": 45,
+        "other_authors": {"alice": 5},
+        "risk": "HIGH",
+    })
+    assert f.severity == Severity.HIGH
+    assert "bob" in f.message
+
+
+def test_perf_regression():
+    f = perf_regression.normalize({
+        "pattern": "n-plus-one-query",
+        "severity": 8,
+        "file": "api.py",
+        "line": 123,
+        "message": "DB call inside loop",
+        "snippet": "for x in items: db.query(x)",
+    })
+    assert f.severity == Severity.CRITICAL
+    assert f.file == "api.py"
+    assert f.line == 123
+
+
+def test_roadmap_entropy():
+    f = roadmap_entropy.normalize({
+        "entropy_score": 0.65,
+        "risk_label": "high",
+        "base_count": 10,
+        "head_count": 15,
+        "item_count_delta": 5,
+        "description_churn": 0.25,
+        "priority_shuffles": 3,
+    })
+    assert f.severity == Severity.HIGH
+    assert "0.65" in f.message
+
+
+def test_schema_evolution():
+    f = schema_evolution.normalize({
+        "operation": {"op_type": "drop_column", "args": ["users", "password"]},
+        "risk_level": "dangerous",
+        "rationale": "Dropping column causes data loss",
+        "recommendation": "Back up first",
+    })
+    assert f.severity == Severity.CRITICAL
+    assert f.recommendation == "Back up first"
+
+
+def test_semantic_diff():
+    f = semantic_diff.normalize({
+        "path": "api.py",
+        "categories": ["REFACTOR", "FEATURE"],
+        "summary": "Modified api.py",
+        "added": 45,
+        "removed": 23,
+    })
+    assert f.severity == Severity.MEDIUM
+    assert f.file == "api.py"
+
+
+def test_semantic_diff_large_churn():
+    f = semantic_diff.normalize({
+        "path": "big.py",
+        "categories": ["FEATURE"],
+        "summary": "Rewrote big.py",
+        "added": 200,
+        "removed": 50,
+    })
+    assert f.severity == Severity.HIGH
+
+
+def test_test_flakiness():
+    f = test_flakiness.normalize({
+        "test_id": "tests.integration::test_auth",
+        "classname": "tests.integration",
+        "total_runs": 20,
+        "pass_count": 12,
+        "fail_count": 8,
+        "error_count": 0,
+        "skip_count": 0,
+        "flakiness_rate": 0.4,
+        "avg_duration": 1.25,
+        "duration_stddev": 0.35,
+    })
+    assert f.severity == Severity.HIGH
+    assert "40%" in f.message
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -0,0 +1,52 @@
+import json
+
+from event_taxonomy.schema import NormalizedFinding, Severity, ToolEvent
+
+
+def test_severity_ordering():
+    assert Severity.CRITICAL > Severity.HIGH > Severity.MEDIUM > Severity.LOW > Severity.INFO
+
+
+def test_severity_str():
+    assert str(Severity.CRITICAL) == "CRITICAL"
+    assert str(Severity.INFO) == "INFO"
+
+
+def test_finding_construction():
+    f = NormalizedFinding(
+        tool="test-tool",
+        category="test",
+        severity=Severity.HIGH,
+        message="something broke",
+        file="foo.py",
+        line=42,
+    )
+    assert f.tool == "test-tool"
+    assert f.severity == Severity.HIGH
+    assert f.file == "foo.py"
+    assert f.line == 42
+    assert f.metadata == {}
+    assert f.recommendation is None
+
+
+def test_finding_to_dict():
+    f = NormalizedFinding(tool="t", category="c", severity=Severity.LOW, message="m")
+    d = f.to_dict()
+    assert d["severity"] == "LOW"
+    assert d["tool"] == "t"
+
+
+def test_tool_event_serialization():
+    findings = [
+        NormalizedFinding(tool="t", category="c", severity=Severity.HIGH, message="a"),
+        NormalizedFinding(tool="t", category="c", severity=Severity.LOW, message="b"),
+    ]
+    event = ToolEvent(tool_name="t", tool_version="1.0", findings=findings)
+    d = event.to_dict()
+    assert d["summary"]["total"] == 2
+    assert d["summary"]["HIGH"] == 1
+    assert d["summary"]["LOW"] == 1
+    j = event.to_json()
+    parsed = json.loads(j)
+    assert parsed["tool_name"] == "t"
+    assert len(parsed["findings"]) == 2
--- a/tests/test_severity.py
+++ b/tests/test_severity.py
@@ -0,0 +1,60 @@
+from event_taxonomy.adapters._severity import (
+    map_doc_severity,
+    map_entropy_risk,
+    map_flakiness_rate,
+    map_int_severity,
+    map_risk_label,
+    map_schema_risk,
+)
+from event_taxonomy.schema import Severity
+
+
+def test_risk_label_mapping():
+    assert map_risk_label("CRITICAL") == Severity.CRITICAL
+    assert map_risk_label("high") == Severity.HIGH
+    assert map_risk_label("Medium") == Severity.MEDIUM
+    assert map_risk_label("LOW") == Severity.LOW
+    assert map_risk_label("OK") == Severity.INFO
+    assert map_risk_label("unknown") == Severity.INFO
+
+
+def test_doc_severity_mapping():
+    assert map_doc_severity("error") == Severity.HIGH
+    assert map_doc_severity("warning") == Severity.MEDIUM
+    assert map_doc_severity("info") == Severity.INFO
+    assert map_doc_severity("ERROR") == Severity.HIGH
+
+
+def test_int_severity_boundaries():
+    assert map_int_severity(10) == Severity.CRITICAL
+    assert map_int_severity(8) == Severity.CRITICAL
+    assert map_int_severity(7) == Severity.HIGH
+    assert map_int_severity(6) == Severity.HIGH
+    assert map_int_severity(5) == Severity.MEDIUM
+    assert map_int_severity(4) == Severity.MEDIUM
+    assert map_int_severity(3) == Severity.LOW
+    assert map_int_severity(2) == Severity.LOW
+    assert map_int_severity(1) == Severity.INFO
+
+
+def test_schema_risk_mapping():
+    assert map_schema_risk("dangerous") == Severity.CRITICAL
+    assert map_schema_risk("cautious") == Severity.MEDIUM
+    assert map_schema_risk("safe") == Severity.LOW
+    assert map_schema_risk("Dangerous") == Severity.CRITICAL
+
+
+def test_entropy_risk_mapping():
+    assert map_entropy_risk("critical") == Severity.CRITICAL
+    assert map_entropy_risk("high") == Severity.HIGH
+    assert map_entropy_risk("moderate") == Severity.MEDIUM
+    assert map_entropy_risk("low") == Severity.LOW
+
+
+def test_flakiness_rate_boundaries():
+    assert map_flakiness_rate(0.0) == Severity.INFO
+    assert map_flakiness_rate(0.05) == Severity.LOW
+    assert map_flakiness_rate(0.15) == Severity.MEDIUM
+    assert map_flakiness_rate(0.3) == Severity.HIGH
+    assert map_flakiness_rate(0.5) == Severity.CRITICAL
+    assert map_flakiness_rate(1.0) == Severity.CRITICAL