Add event-taxonomy package with canonical schema, adapters, and CLI

Canonical NormalizedFinding schema with Severity enum (CRITICAL/HIGH/MEDIUM/LOW/INFO).
Per-project adapters for 9 tools with severity mapping for string labels, int 1-10,
float 0-1, Python Enum, and computed properties. CLI pipe interface and registry.

Nightshift-Task: event-taxonomy
Nightshift-Ref: https://github.com/marcus/nightshift
This commit is contained in:
Claude
2026-03-09 21:09:13 +00:00
parent ef0c88d50d
commit a31093822c
35 changed files with 709 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
from event_taxonomy.schema import NormalizedFinding, Severity, ToolEvent
__all__ = ["NormalizedFinding", "Severity", "ToolEvent"]

View File

@@ -0,0 +1,32 @@
"""CLI: reads JSON from stdin, outputs normalized JSON.
Usage:
echo '[{...}]' | python -m event_taxonomy --tool bus-factor-analyzer
echo '[{...}]' | event-taxonomy --tool dep-risk-scanner
"""
import argparse
import json
import sys
from event_taxonomy.registry import list_tools, normalize_output
def main() -> None:
parser = argparse.ArgumentParser(description="Normalize tool findings to canonical schema")
parser.add_argument("--tool", required=True, help=f"Tool name. Options: {', '.join(list_tools())}")
parser.add_argument("--version", default="unknown", help="Tool version string")
parser.add_argument("--indent", type=int, default=2, help="JSON indent (0 for compact)")
args = parser.parse_args()
raw = json.load(sys.stdin)
if isinstance(raw, dict):
raw = [raw]
event = normalize_output(args.tool, raw, tool_version=args.version)
indent = args.indent if args.indent > 0 else None
print(event.to_json(indent=indent))
if __name__ == "__main__":
main()

Binary file not shown.

View File

View File

@@ -0,0 +1,85 @@
"""Centralized severity mapping logic for all tool representations."""
from event_taxonomy.schema import Severity
# bus-factor / dep-risk / knowledge-silo style: CRITICAL/HIGH/MEDIUM/LOW(/OK)
_LABEL_MAP: dict[str, Severity] = {
"CRITICAL": Severity.CRITICAL,
"HIGH": Severity.HIGH,
"MEDIUM": Severity.MEDIUM,
"LOW": Severity.LOW,
"OK": Severity.INFO,
}
def map_risk_label(label: str) -> Severity:
"""Map CRITICAL/HIGH/MEDIUM/LOW/OK string labels."""
return _LABEL_MAP.get(label.upper(), Severity.INFO)
# doc-drift style: error/warning/info
_DOC_MAP: dict[str, Severity] = {
"error": Severity.HIGH,
"warning": Severity.MEDIUM,
"info": Severity.INFO,
}
def map_doc_severity(sev: str) -> Severity:
"""Map error/warning/info string labels."""
return _DOC_MAP.get(sev.lower(), Severity.INFO)
# perf-regression style: integer 1-10
def map_int_severity(score: int) -> Severity:
"""Map integer severity (1-10 scale)."""
if score >= 8:
return Severity.CRITICAL
if score >= 6:
return Severity.HIGH
if score >= 4:
return Severity.MEDIUM
if score >= 2:
return Severity.LOW
return Severity.INFO
# schema-evolution style: safe/cautious/dangerous
_SCHEMA_MAP: dict[str, Severity] = {
"dangerous": Severity.CRITICAL,
"cautious": Severity.MEDIUM,
"safe": Severity.LOW,
}
def map_schema_risk(level: str) -> Severity:
"""Map safe/cautious/dangerous string labels."""
return _SCHEMA_MAP.get(level.lower(), Severity.INFO)
# roadmap-entropy style: critical/high/moderate/low
_ENTROPY_MAP: dict[str, Severity] = {
"critical": Severity.CRITICAL,
"high": Severity.HIGH,
"moderate": Severity.MEDIUM,
"low": Severity.LOW,
}
def map_entropy_risk(label: str) -> Severity:
"""Map critical/high/moderate/low string labels."""
return _ENTROPY_MAP.get(label.lower(), Severity.INFO)
# test-flakiness style: float 0.0-1.0
def map_flakiness_rate(rate: float) -> Severity:
"""Map flakiness rate (0.0-1.0) to severity."""
if rate >= 0.5:
return Severity.CRITICAL
if rate >= 0.3:
return Severity.HIGH
if rate >= 0.15:
return Severity.MEDIUM
if rate > 0.0:
return Severity.LOW
return Severity.INFO

View File

@@ -0,0 +1,20 @@
from typing import Any
from event_taxonomy.adapters._severity import map_risk_label
from event_taxonomy.schema import NormalizedFinding
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
return NormalizedFinding(
tool="bus-factor-analyzer",
category="bus-factor",
severity=map_risk_label(finding["risk_label"]),
message=f"Bus factor {finding['bus_factor']}{finding['top_author']} owns {finding['top_author_pct']:.0f}%",
file=finding.get("file"),
metadata={
"top_author": finding["top_author"],
"top_author_pct": finding["top_author_pct"],
"num_contributors": finding["num_contributors"],
"bus_factor": finding["bus_factor"],
},
)

View File

@@ -0,0 +1,22 @@
from typing import Any
from event_taxonomy.adapters._severity import map_risk_label
from event_taxonomy.schema import NormalizedFinding
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
label = finding.get("risk_label", "OK")
return NormalizedFinding(
tool="dep-risk-scanner",
category="dependency-risk",
severity=map_risk_label(label),
message=f"{finding['name']}@{finding['version']}{label}",
recommendation=f"Vulnerability count: {finding.get('vuln_count', 0)}, months stale: {finding.get('months_stale', 0)}"
if finding.get("vuln_count") or finding.get("months_stale")
else None,
metadata={
k: finding[k]
for k in ("ecosystem", "vuln_score", "maintenance_score", "risk_score", "vulns")
if k in finding
},
)

View File

@@ -0,0 +1,16 @@
from typing import Any
from event_taxonomy.adapters._severity import map_doc_severity
from event_taxonomy.schema import NormalizedFinding
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
return NormalizedFinding(
tool="doc-drift-detector",
category=finding.get("kind", "doc-drift"),
severity=map_doc_severity(finding["severity"]),
message=finding["message"],
file=finding.get("file"),
line=finding.get("lineno"),
metadata={"symbol": finding["symbol"]} if finding.get("symbol") else {},
)

View File

@@ -0,0 +1,24 @@
from typing import Any
from event_taxonomy.adapters._severity import map_risk_label
from event_taxonomy.schema import NormalizedFinding
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
risk = finding.get("risk", "LOW")
if hasattr(risk, "value"):
risk = risk.value
return NormalizedFinding(
tool="knowledge-silo-detector",
category="knowledge-silo",
severity=map_risk_label(str(risk)),
message=f"{finding['filepath']} dominated by {finding['dominant_author']} ({finding['dominant_commits']}/{finding['total_commits']} commits)",
file=finding.get("filepath"),
metadata={
"dominant_author": finding["dominant_author"],
"dominance_ratio": finding["dominant_commits"] / finding["total_commits"]
if finding["total_commits"]
else 0,
"other_authors": finding.get("other_authors", {}),
},
)

View File

@@ -0,0 +1,16 @@
from typing import Any
from event_taxonomy.adapters._severity import map_int_severity
from event_taxonomy.schema import NormalizedFinding
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
return NormalizedFinding(
tool="perf-regression-spotter",
category=finding.get("pattern", "performance"),
severity=map_int_severity(finding["severity"]),
message=finding["message"],
file=finding.get("file"),
line=finding.get("line"),
metadata={"snippet": finding["snippet"]} if finding.get("snippet") else {},
)

View File

@@ -0,0 +1,26 @@
from typing import Any
from event_taxonomy.adapters._severity import map_entropy_risk
from event_taxonomy.schema import NormalizedFinding
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
label = finding.get("risk_label", "low")
return NormalizedFinding(
tool="roadmap-entropy",
category="roadmap-entropy",
severity=map_entropy_risk(label),
message=f"Entropy score {finding['entropy_score']:.2f}{label}",
metadata={
k: finding[k]
for k in (
"base_count",
"head_count",
"item_count_delta",
"description_churn",
"priority_shuffles",
"entropy_score",
)
if k in finding
},
)

View File

@@ -0,0 +1,22 @@
from typing import Any
from event_taxonomy.adapters._severity import map_schema_risk
from event_taxonomy.schema import NormalizedFinding
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
op = finding.get("operation", {})
if hasattr(op, "op_type"):
op_type = op.op_type
op_args = getattr(op, "args", [])
else:
op_type = op.get("op_type", "unknown")
op_args = op.get("args", [])
return NormalizedFinding(
tool="schema-evolution-advisor",
category=op_type,
severity=map_schema_risk(finding["risk_level"]),
message=finding["rationale"],
recommendation=finding.get("recommendation"),
metadata={"op_type": op_type, "args": op_args},
)

View File

@@ -0,0 +1,28 @@
from typing import Any
from event_taxonomy.schema import NormalizedFinding, Severity
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
cats = finding.get("categories", [])
added = finding.get("added", 0)
removed = finding.get("removed", 0)
churn = added + removed
if "DELETION" in cats or "DELETED_FILE" in cats:
severity = Severity.MEDIUM
elif churn > 100:
severity = Severity.HIGH
elif churn > 30:
severity = Severity.MEDIUM
elif churn > 0:
severity = Severity.LOW
else:
severity = Severity.INFO
return NormalizedFinding(
tool="semantic-diff",
category=",".join(cats) if cats else "change",
severity=severity,
message=finding.get("summary", f"Changed {finding['path']}"),
file=finding.get("path"),
metadata={"added": added, "removed": removed, "categories": cats},
)

View File

@@ -0,0 +1,23 @@
from typing import Any
from event_taxonomy.adapters._severity import map_flakiness_rate
from event_taxonomy.schema import NormalizedFinding
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
rate = finding["flakiness_rate"]
return NormalizedFinding(
tool="test-flakiness-analyzer",
category="flaky-test",
severity=map_flakiness_rate(rate),
message=f"{finding['test_id']} flaky at {rate:.0%} ({finding['fail_count']}/{finding['total_runs']} failures)",
metadata={
"test_id": finding["test_id"],
"classname": finding.get("classname", ""),
"total_runs": finding["total_runs"],
"pass_count": finding.get("pass_count", 0),
"fail_count": finding["fail_count"],
"flakiness_rate": rate,
"avg_duration": finding.get("avg_duration", 0),
},
)

View File

@@ -0,0 +1,48 @@
"""Adapter registry — maps tool names to normalize functions."""
from typing import Any, Callable
from event_taxonomy.adapters import (
bus_factor,
dep_risk,
doc_drift,
knowledge_silo,
perf_regression,
roadmap_entropy,
schema_evolution,
semantic_diff,
test_flakiness,
)
from event_taxonomy.schema import NormalizedFinding, ToolEvent
_ADAPTERS: dict[str, Callable[[dict[str, Any]], NormalizedFinding]] = {
"bus-factor-analyzer": bus_factor.normalize,
"dep-risk-scanner": dep_risk.normalize,
"doc-drift-detector": doc_drift.normalize,
"knowledge-silo-detector": knowledge_silo.normalize,
"perf-regression-spotter": perf_regression.normalize,
"roadmap-entropy": roadmap_entropy.normalize,
"schema-evolution-advisor": schema_evolution.normalize,
"semantic-diff": semantic_diff.normalize,
"test-flakiness-analyzer": test_flakiness.normalize,
}
def list_tools() -> list[str]:
return sorted(_ADAPTERS.keys())
def get_adapter(tool_name: str) -> Callable[[dict[str, Any]], NormalizedFinding]:
if tool_name not in _ADAPTERS:
raise KeyError(f"Unknown tool: {tool_name}. Available: {', '.join(sorted(_ADAPTERS))}")
return _ADAPTERS[tool_name]
def normalize_output(
tool_name: str,
raw_findings: list[dict[str, Any]],
tool_version: str = "unknown",
) -> ToolEvent:
adapter = get_adapter(tool_name)
findings = [adapter(f) for f in raw_findings]
return ToolEvent(tool_name=tool_name, tool_version=tool_version, findings=findings)

View File

@@ -0,0 +1,68 @@
from __future__ import annotations
import enum
import json
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from typing import Any
class Severity(enum.IntEnum):
"""Canonical severity levels, ordered from most to least severe."""
CRITICAL = 5
HIGH = 4
MEDIUM = 3
LOW = 2
INFO = 1
def __str__(self) -> str:
return self.name
@dataclass
class NormalizedFinding:
"""Unified finding representation across all analysis tools."""
tool: str
category: str
severity: Severity
message: str
file: str | None = None
line: int | None = None
recommendation: str | None = None
metadata: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
d = asdict(self)
d["severity"] = self.severity.name
return d
@dataclass
class ToolEvent:
"""Envelope wrapping a tool's normalized output."""
tool_name: str
tool_version: str
timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
findings: list[NormalizedFinding] = field(default_factory=list)
@property
def summary(self) -> dict[str, int]:
counts: dict[str, int] = {s.name: 0 for s in Severity}
for f in self.findings:
counts[f.severity.name] += 1
return {"total": len(self.findings), **counts}
def to_dict(self) -> dict[str, Any]:
return {
"tool_name": self.tool_name,
"tool_version": self.tool_version,
"timestamp": self.timestamp,
"findings": [f.to_dict() for f in self.findings],
"summary": self.summary,
}
def to_json(self, **kwargs: Any) -> str:
return json.dumps(self.to_dict(), **kwargs)