Add event-taxonomy package with canonical schema, adapters, and CLI
Canonical NormalizedFinding schema with Severity enum (CRITICAL/HIGH/MEDIUM/LOW/INFO). Per-project adapters for 9 tools with severity mapping for string labels, int 1-10, float 0-1, Python Enum, and computed properties. CLI pipe interface and registry. Nightshift-Task: event-taxonomy Nightshift-Ref: https://github.com/marcus/nightshift
This commit is contained in:
3
src/event_taxonomy/__init__.py
Normal file
3
src/event_taxonomy/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from event_taxonomy.schema import NormalizedFinding, Severity, ToolEvent
|
||||
|
||||
__all__ = ["NormalizedFinding", "Severity", "ToolEvent"]
|
||||
32
src/event_taxonomy/__main__.py
Normal file
32
src/event_taxonomy/__main__.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""CLI: reads JSON from stdin, outputs normalized JSON.
|
||||
|
||||
Usage:
|
||||
echo '[{...}]' | python -m event_taxonomy --tool bus-factor-analyzer
|
||||
echo '[{...}]' | event-taxonomy --tool dep-risk-scanner
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
from event_taxonomy.registry import list_tools, normalize_output
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Normalize tool findings to canonical schema")
|
||||
parser.add_argument("--tool", required=True, help=f"Tool name. Options: {', '.join(list_tools())}")
|
||||
parser.add_argument("--version", default="unknown", help="Tool version string")
|
||||
parser.add_argument("--indent", type=int, default=2, help="JSON indent (0 for compact)")
|
||||
args = parser.parse_args()
|
||||
|
||||
raw = json.load(sys.stdin)
|
||||
if isinstance(raw, dict):
|
||||
raw = [raw]
|
||||
|
||||
event = normalize_output(args.tool, raw, tool_version=args.version)
|
||||
indent = args.indent if args.indent > 0 else None
|
||||
print(event.to_json(indent=indent))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
src/event_taxonomy/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
src/event_taxonomy/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
src/event_taxonomy/__pycache__/schema.cpython-313.pyc
Normal file
BIN
src/event_taxonomy/__pycache__/schema.cpython-313.pyc
Normal file
Binary file not shown.
0
src/event_taxonomy/adapters/__init__.py
Normal file
0
src/event_taxonomy/adapters/__init__.py
Normal file
BIN
src/event_taxonomy/adapters/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
src/event_taxonomy/adapters/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
src/event_taxonomy/adapters/__pycache__/dep_risk.cpython-313.pyc
Normal file
BIN
src/event_taxonomy/adapters/__pycache__/dep_risk.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
85
src/event_taxonomy/adapters/_severity.py
Normal file
85
src/event_taxonomy/adapters/_severity.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""Centralized severity mapping logic for all tool representations."""
|
||||
|
||||
from event_taxonomy.schema import Severity
|
||||
|
||||
# bus-factor / dep-risk / knowledge-silo style: CRITICAL/HIGH/MEDIUM/LOW(/OK)
|
||||
_LABEL_MAP: dict[str, Severity] = {
|
||||
"CRITICAL": Severity.CRITICAL,
|
||||
"HIGH": Severity.HIGH,
|
||||
"MEDIUM": Severity.MEDIUM,
|
||||
"LOW": Severity.LOW,
|
||||
"OK": Severity.INFO,
|
||||
}
|
||||
|
||||
|
||||
def map_risk_label(label: str) -> Severity:
|
||||
"""Map CRITICAL/HIGH/MEDIUM/LOW/OK string labels."""
|
||||
return _LABEL_MAP.get(label.upper(), Severity.INFO)
|
||||
|
||||
|
||||
# doc-drift style: error/warning/info
|
||||
_DOC_MAP: dict[str, Severity] = {
|
||||
"error": Severity.HIGH,
|
||||
"warning": Severity.MEDIUM,
|
||||
"info": Severity.INFO,
|
||||
}
|
||||
|
||||
|
||||
def map_doc_severity(sev: str) -> Severity:
|
||||
"""Map error/warning/info string labels."""
|
||||
return _DOC_MAP.get(sev.lower(), Severity.INFO)
|
||||
|
||||
|
||||
# perf-regression style: integer 1-10
|
||||
def map_int_severity(score: int) -> Severity:
|
||||
"""Map integer severity (1-10 scale)."""
|
||||
if score >= 8:
|
||||
return Severity.CRITICAL
|
||||
if score >= 6:
|
||||
return Severity.HIGH
|
||||
if score >= 4:
|
||||
return Severity.MEDIUM
|
||||
if score >= 2:
|
||||
return Severity.LOW
|
||||
return Severity.INFO
|
||||
|
||||
|
||||
# schema-evolution style: safe/cautious/dangerous
|
||||
_SCHEMA_MAP: dict[str, Severity] = {
|
||||
"dangerous": Severity.CRITICAL,
|
||||
"cautious": Severity.MEDIUM,
|
||||
"safe": Severity.LOW,
|
||||
}
|
||||
|
||||
|
||||
def map_schema_risk(level: str) -> Severity:
|
||||
"""Map safe/cautious/dangerous string labels."""
|
||||
return _SCHEMA_MAP.get(level.lower(), Severity.INFO)
|
||||
|
||||
|
||||
# roadmap-entropy style: critical/high/moderate/low
|
||||
_ENTROPY_MAP: dict[str, Severity] = {
|
||||
"critical": Severity.CRITICAL,
|
||||
"high": Severity.HIGH,
|
||||
"moderate": Severity.MEDIUM,
|
||||
"low": Severity.LOW,
|
||||
}
|
||||
|
||||
|
||||
def map_entropy_risk(label: str) -> Severity:
|
||||
"""Map critical/high/moderate/low string labels."""
|
||||
return _ENTROPY_MAP.get(label.lower(), Severity.INFO)
|
||||
|
||||
|
||||
# test-flakiness style: float 0.0-1.0
|
||||
def map_flakiness_rate(rate: float) -> Severity:
|
||||
"""Map flakiness rate (0.0-1.0) to severity."""
|
||||
if rate >= 0.5:
|
||||
return Severity.CRITICAL
|
||||
if rate >= 0.3:
|
||||
return Severity.HIGH
|
||||
if rate >= 0.15:
|
||||
return Severity.MEDIUM
|
||||
if rate > 0.0:
|
||||
return Severity.LOW
|
||||
return Severity.INFO
|
||||
20
src/event_taxonomy/adapters/bus_factor.py
Normal file
20
src/event_taxonomy/adapters/bus_factor.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from typing import Any
|
||||
|
||||
from event_taxonomy.adapters._severity import map_risk_label
|
||||
from event_taxonomy.schema import NormalizedFinding
|
||||
|
||||
|
||||
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
|
||||
return NormalizedFinding(
|
||||
tool="bus-factor-analyzer",
|
||||
category="bus-factor",
|
||||
severity=map_risk_label(finding["risk_label"]),
|
||||
message=f"Bus factor {finding['bus_factor']} — {finding['top_author']} owns {finding['top_author_pct']:.0f}%",
|
||||
file=finding.get("file"),
|
||||
metadata={
|
||||
"top_author": finding["top_author"],
|
||||
"top_author_pct": finding["top_author_pct"],
|
||||
"num_contributors": finding["num_contributors"],
|
||||
"bus_factor": finding["bus_factor"],
|
||||
},
|
||||
)
|
||||
22
src/event_taxonomy/adapters/dep_risk.py
Normal file
22
src/event_taxonomy/adapters/dep_risk.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from typing import Any
|
||||
|
||||
from event_taxonomy.adapters._severity import map_risk_label
|
||||
from event_taxonomy.schema import NormalizedFinding
|
||||
|
||||
|
||||
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
|
||||
label = finding.get("risk_label", "OK")
|
||||
return NormalizedFinding(
|
||||
tool="dep-risk-scanner",
|
||||
category="dependency-risk",
|
||||
severity=map_risk_label(label),
|
||||
message=f"{finding['name']}@{finding['version']} — {label}",
|
||||
recommendation=f"Vulnerability count: {finding.get('vuln_count', 0)}, months stale: {finding.get('months_stale', 0)}"
|
||||
if finding.get("vuln_count") or finding.get("months_stale")
|
||||
else None,
|
||||
metadata={
|
||||
k: finding[k]
|
||||
for k in ("ecosystem", "vuln_score", "maintenance_score", "risk_score", "vulns")
|
||||
if k in finding
|
||||
},
|
||||
)
|
||||
16
src/event_taxonomy/adapters/doc_drift.py
Normal file
16
src/event_taxonomy/adapters/doc_drift.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from typing import Any
|
||||
|
||||
from event_taxonomy.adapters._severity import map_doc_severity
|
||||
from event_taxonomy.schema import NormalizedFinding
|
||||
|
||||
|
||||
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
|
||||
return NormalizedFinding(
|
||||
tool="doc-drift-detector",
|
||||
category=finding.get("kind", "doc-drift"),
|
||||
severity=map_doc_severity(finding["severity"]),
|
||||
message=finding["message"],
|
||||
file=finding.get("file"),
|
||||
line=finding.get("lineno"),
|
||||
metadata={"symbol": finding["symbol"]} if finding.get("symbol") else {},
|
||||
)
|
||||
24
src/event_taxonomy/adapters/knowledge_silo.py
Normal file
24
src/event_taxonomy/adapters/knowledge_silo.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from typing import Any
|
||||
|
||||
from event_taxonomy.adapters._severity import map_risk_label
|
||||
from event_taxonomy.schema import NormalizedFinding
|
||||
|
||||
|
||||
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
|
||||
risk = finding.get("risk", "LOW")
|
||||
if hasattr(risk, "value"):
|
||||
risk = risk.value
|
||||
return NormalizedFinding(
|
||||
tool="knowledge-silo-detector",
|
||||
category="knowledge-silo",
|
||||
severity=map_risk_label(str(risk)),
|
||||
message=f"{finding['filepath']} dominated by {finding['dominant_author']} ({finding['dominant_commits']}/{finding['total_commits']} commits)",
|
||||
file=finding.get("filepath"),
|
||||
metadata={
|
||||
"dominant_author": finding["dominant_author"],
|
||||
"dominance_ratio": finding["dominant_commits"] / finding["total_commits"]
|
||||
if finding["total_commits"]
|
||||
else 0,
|
||||
"other_authors": finding.get("other_authors", {}),
|
||||
},
|
||||
)
|
||||
16
src/event_taxonomy/adapters/perf_regression.py
Normal file
16
src/event_taxonomy/adapters/perf_regression.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from typing import Any
|
||||
|
||||
from event_taxonomy.adapters._severity import map_int_severity
|
||||
from event_taxonomy.schema import NormalizedFinding
|
||||
|
||||
|
||||
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
|
||||
return NormalizedFinding(
|
||||
tool="perf-regression-spotter",
|
||||
category=finding.get("pattern", "performance"),
|
||||
severity=map_int_severity(finding["severity"]),
|
||||
message=finding["message"],
|
||||
file=finding.get("file"),
|
||||
line=finding.get("line"),
|
||||
metadata={"snippet": finding["snippet"]} if finding.get("snippet") else {},
|
||||
)
|
||||
26
src/event_taxonomy/adapters/roadmap_entropy.py
Normal file
26
src/event_taxonomy/adapters/roadmap_entropy.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from typing import Any
|
||||
|
||||
from event_taxonomy.adapters._severity import map_entropy_risk
|
||||
from event_taxonomy.schema import NormalizedFinding
|
||||
|
||||
|
||||
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
|
||||
label = finding.get("risk_label", "low")
|
||||
return NormalizedFinding(
|
||||
tool="roadmap-entropy",
|
||||
category="roadmap-entropy",
|
||||
severity=map_entropy_risk(label),
|
||||
message=f"Entropy score {finding['entropy_score']:.2f} — {label}",
|
||||
metadata={
|
||||
k: finding[k]
|
||||
for k in (
|
||||
"base_count",
|
||||
"head_count",
|
||||
"item_count_delta",
|
||||
"description_churn",
|
||||
"priority_shuffles",
|
||||
"entropy_score",
|
||||
)
|
||||
if k in finding
|
||||
},
|
||||
)
|
||||
22
src/event_taxonomy/adapters/schema_evolution.py
Normal file
22
src/event_taxonomy/adapters/schema_evolution.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from typing import Any
|
||||
|
||||
from event_taxonomy.adapters._severity import map_schema_risk
|
||||
from event_taxonomy.schema import NormalizedFinding
|
||||
|
||||
|
||||
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
|
||||
op = finding.get("operation", {})
|
||||
if hasattr(op, "op_type"):
|
||||
op_type = op.op_type
|
||||
op_args = getattr(op, "args", [])
|
||||
else:
|
||||
op_type = op.get("op_type", "unknown")
|
||||
op_args = op.get("args", [])
|
||||
return NormalizedFinding(
|
||||
tool="schema-evolution-advisor",
|
||||
category=op_type,
|
||||
severity=map_schema_risk(finding["risk_level"]),
|
||||
message=finding["rationale"],
|
||||
recommendation=finding.get("recommendation"),
|
||||
metadata={"op_type": op_type, "args": op_args},
|
||||
)
|
||||
28
src/event_taxonomy/adapters/semantic_diff.py
Normal file
28
src/event_taxonomy/adapters/semantic_diff.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from typing import Any
|
||||
|
||||
from event_taxonomy.schema import NormalizedFinding, Severity
|
||||
|
||||
|
||||
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
|
||||
cats = finding.get("categories", [])
|
||||
added = finding.get("added", 0)
|
||||
removed = finding.get("removed", 0)
|
||||
churn = added + removed
|
||||
if "DELETION" in cats or "DELETED_FILE" in cats:
|
||||
severity = Severity.MEDIUM
|
||||
elif churn > 100:
|
||||
severity = Severity.HIGH
|
||||
elif churn > 30:
|
||||
severity = Severity.MEDIUM
|
||||
elif churn > 0:
|
||||
severity = Severity.LOW
|
||||
else:
|
||||
severity = Severity.INFO
|
||||
return NormalizedFinding(
|
||||
tool="semantic-diff",
|
||||
category=",".join(cats) if cats else "change",
|
||||
severity=severity,
|
||||
message=finding.get("summary", f"Changed {finding['path']}"),
|
||||
file=finding.get("path"),
|
||||
metadata={"added": added, "removed": removed, "categories": cats},
|
||||
)
|
||||
23
src/event_taxonomy/adapters/test_flakiness.py
Normal file
23
src/event_taxonomy/adapters/test_flakiness.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from typing import Any
|
||||
|
||||
from event_taxonomy.adapters._severity import map_flakiness_rate
|
||||
from event_taxonomy.schema import NormalizedFinding
|
||||
|
||||
|
||||
def normalize(finding: dict[str, Any]) -> NormalizedFinding:
|
||||
rate = finding["flakiness_rate"]
|
||||
return NormalizedFinding(
|
||||
tool="test-flakiness-analyzer",
|
||||
category="flaky-test",
|
||||
severity=map_flakiness_rate(rate),
|
||||
message=f"{finding['test_id']} flaky at {rate:.0%} ({finding['fail_count']}/{finding['total_runs']} failures)",
|
||||
metadata={
|
||||
"test_id": finding["test_id"],
|
||||
"classname": finding.get("classname", ""),
|
||||
"total_runs": finding["total_runs"],
|
||||
"pass_count": finding.get("pass_count", 0),
|
||||
"fail_count": finding["fail_count"],
|
||||
"flakiness_rate": rate,
|
||||
"avg_duration": finding.get("avg_duration", 0),
|
||||
},
|
||||
)
|
||||
48
src/event_taxonomy/registry.py
Normal file
48
src/event_taxonomy/registry.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""Adapter registry — maps tool names to normalize functions."""
|
||||
|
||||
from typing import Any, Callable
|
||||
|
||||
from event_taxonomy.adapters import (
|
||||
bus_factor,
|
||||
dep_risk,
|
||||
doc_drift,
|
||||
knowledge_silo,
|
||||
perf_regression,
|
||||
roadmap_entropy,
|
||||
schema_evolution,
|
||||
semantic_diff,
|
||||
test_flakiness,
|
||||
)
|
||||
from event_taxonomy.schema import NormalizedFinding, ToolEvent
|
||||
|
||||
_ADAPTERS: dict[str, Callable[[dict[str, Any]], NormalizedFinding]] = {
|
||||
"bus-factor-analyzer": bus_factor.normalize,
|
||||
"dep-risk-scanner": dep_risk.normalize,
|
||||
"doc-drift-detector": doc_drift.normalize,
|
||||
"knowledge-silo-detector": knowledge_silo.normalize,
|
||||
"perf-regression-spotter": perf_regression.normalize,
|
||||
"roadmap-entropy": roadmap_entropy.normalize,
|
||||
"schema-evolution-advisor": schema_evolution.normalize,
|
||||
"semantic-diff": semantic_diff.normalize,
|
||||
"test-flakiness-analyzer": test_flakiness.normalize,
|
||||
}
|
||||
|
||||
|
||||
def list_tools() -> list[str]:
|
||||
return sorted(_ADAPTERS.keys())
|
||||
|
||||
|
||||
def get_adapter(tool_name: str) -> Callable[[dict[str, Any]], NormalizedFinding]:
|
||||
if tool_name not in _ADAPTERS:
|
||||
raise KeyError(f"Unknown tool: {tool_name}. Available: {', '.join(sorted(_ADAPTERS))}")
|
||||
return _ADAPTERS[tool_name]
|
||||
|
||||
|
||||
def normalize_output(
|
||||
tool_name: str,
|
||||
raw_findings: list[dict[str, Any]],
|
||||
tool_version: str = "unknown",
|
||||
) -> ToolEvent:
|
||||
adapter = get_adapter(tool_name)
|
||||
findings = [adapter(f) for f in raw_findings]
|
||||
return ToolEvent(tool_name=tool_name, tool_version=tool_version, findings=findings)
|
||||
68
src/event_taxonomy/schema.py
Normal file
68
src/event_taxonomy/schema.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import json
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
class Severity(enum.IntEnum):
|
||||
"""Canonical severity levels, ordered from most to least severe."""
|
||||
|
||||
CRITICAL = 5
|
||||
HIGH = 4
|
||||
MEDIUM = 3
|
||||
LOW = 2
|
||||
INFO = 1
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.name
|
||||
|
||||
|
||||
@dataclass
|
||||
class NormalizedFinding:
|
||||
"""Unified finding representation across all analysis tools."""
|
||||
|
||||
tool: str
|
||||
category: str
|
||||
severity: Severity
|
||||
message: str
|
||||
file: str | None = None
|
||||
line: int | None = None
|
||||
recommendation: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
d = asdict(self)
|
||||
d["severity"] = self.severity.name
|
||||
return d
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolEvent:
|
||||
"""Envelope wrapping a tool's normalized output."""
|
||||
|
||||
tool_name: str
|
||||
tool_version: str
|
||||
timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
||||
findings: list[NormalizedFinding] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def summary(self) -> dict[str, int]:
|
||||
counts: dict[str, int] = {s.name: 0 for s in Severity}
|
||||
for f in self.findings:
|
||||
counts[f.severity.name] += 1
|
||||
return {"total": len(self.findings), **counts}
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"tool_name": self.tool_name,
|
||||
"tool_version": self.tool_version,
|
||||
"timestamp": self.timestamp,
|
||||
"findings": [f.to_dict() for f in self.findings],
|
||||
"summary": self.summary,
|
||||
}
|
||||
|
||||
def to_json(self, **kwargs: Any) -> str:
|
||||
return json.dumps(self.to_dict(), **kwargs)
|
||||
Reference in New Issue
Block a user