- genome-engine.md: Script-Name, Pfade, Scope korrigiert, Mermaid-Visualisierungen - genome-extract.py: dynamisches Pattern statt hardcoded kotlin-conventions - SKILL.md: vollständige Dateiliste mit korrekten Pfaden - genome.prompt.md: Referenz auf Konzept-Dokument korrigiert
287 lines
9.1 KiB
Python
287 lines
9.1 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Genome Engine – Phase 1: Extraction
|
||
|
||
Extrahiert Mutations aus der Git-History für Copilot-Customization-Dateien.
|
||
Scannt git log für Änderungen im Genome-Scope (.github/skills, agents, prompts, instructions).
|
||
Gruppiert Diffs nach Trait und gibt strukturiertes Markdown aus.
|
||
|
||
Usage:
|
||
python .github/skills/genome/genome-extract.py --since "7 days ago"
|
||
python .github/skills/genome/genome-extract.py --since "4 days ago" --repo /path/to/repo
|
||
"""
|
||
|
||
import argparse
|
||
import os
|
||
import re
|
||
import subprocess
|
||
import sys
|
||
from collections import defaultdict
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
# --- Konfiguration ---
|
||
|
||
GENOME_SCOPES = [
|
||
".github/skills/",
|
||
".github/agents/",
|
||
".github/prompts/",
|
||
".github/copilot-instructions.md",
|
||
]
|
||
|
||
# Dynamisches Pattern für weitere Instructions-Dateien
|
||
INSTRUCTIONS_PATTERN = re.compile(r"^\.github/[^/]+\.instructions\.md$")
|
||
|
||
MAX_DIFF_LINES = 80
|
||
|
||
|
||
def run_git(*args: str, cwd: str = ".") -> str:
|
||
"""Git-Kommando ausführen, UTF-8-Output zurückgeben."""
|
||
result = subprocess.run(
|
||
["git"] + list(args),
|
||
cwd=cwd,
|
||
capture_output=True,
|
||
text=True,
|
||
encoding="utf-8",
|
||
errors="replace",
|
||
)
|
||
return result.stdout
|
||
|
||
|
||
def is_in_genome_scope(filepath: str) -> bool:
|
||
"""Prüft ob ein Dateipfad im Genome-Scope liegt."""
|
||
for scope in GENOME_SCOPES:
|
||
if scope.endswith("/"):
|
||
if filepath.startswith(scope):
|
||
return True
|
||
else:
|
||
if filepath == scope:
|
||
return True
|
||
# Dynamisch: .github/*.instructions.md
|
||
if INSTRUCTIONS_PATTERN.match(filepath):
|
||
return True
|
||
return False
|
||
|
||
|
||
def get_trait_key(filepath: str, repo_path: str) -> str | None:
|
||
"""Leitet den Trait-Key aus einem Dateipfad ab."""
|
||
|
||
# Skills: skill/<ordnername>
|
||
m = re.match(r"^\.github/skills/([^/]+)/", filepath)
|
||
if m:
|
||
return f"skill/{m.group(1)}"
|
||
|
||
# Agents: agent/<dateiname-ohne-extension>
|
||
m = re.match(r"^\.github/agents/(.+)\.agent\.md$", filepath)
|
||
if m:
|
||
return f"agent/{m.group(1)}"
|
||
|
||
# Prompts: Standalone oder Verbund
|
||
m = re.match(r"^\.github/prompts/(.+)\.prompt\.md$", filepath)
|
||
if m:
|
||
name = m.group(1)
|
||
|
||
# Verbund-Erkennung: <router>-<sub>.prompt.md → Trait des Routers
|
||
parts = name.split("-")
|
||
if len(parts) > 1:
|
||
# Versuche progressiv kürzere Präfixe als Router-Name
|
||
for i in range(len(parts) - 1, 0, -1):
|
||
candidate = "-".join(parts[:i])
|
||
router_path = Path(repo_path) / f".github/prompts/{candidate}.prompt.md"
|
||
if router_path.exists():
|
||
return f"prompt/{candidate}"
|
||
|
||
# Standalone-Prompt
|
||
return f"prompt/{name}"
|
||
|
||
# Instructions (*.instructions.md)
|
||
m = re.match(r"^\.github/(.+)\.instructions\.md$", filepath)
|
||
if m:
|
||
return f"instructions/{m.group(1)}"
|
||
|
||
# copilot-instructions.md
|
||
if filepath == ".github/copilot-instructions.md":
|
||
return "instructions/copilot-instructions"
|
||
|
||
return None
|
||
|
||
|
||
def get_mutation_type(status: str) -> str:
|
||
"""Bestimmt den Mutation-Typ aus dem Git-Status-Buchstaben."""
|
||
if status.startswith("A"):
|
||
return "member-added"
|
||
elif status.startswith("D"):
|
||
return "member-removed"
|
||
else:
|
||
return "content-change"
|
||
|
||
|
||
def extract_mutations(repo_path: str, since: str) -> dict[str, list[dict]]:
|
||
"""Extrahiert alle Mutations aus der Git-History."""
|
||
mutations: dict[str, list[dict]] = defaultdict(list)
|
||
|
||
# Git-Log abrufen
|
||
log_output = run_git(
|
||
"log",
|
||
"--format=%H|%aI|%an|%s",
|
||
f"--since={since}",
|
||
"--",
|
||
*GENOME_SCOPES,
|
||
cwd=repo_path,
|
||
)
|
||
|
||
if not log_output.strip():
|
||
return mutations
|
||
|
||
for line in log_output.strip().split("\n"):
|
||
if "|" not in line:
|
||
continue
|
||
|
||
parts = line.split("|", 3)
|
||
if len(parts) < 4:
|
||
continue
|
||
|
||
commit_hash, date, author, message = parts
|
||
|
||
# Geänderte Dateien für diesen Commit
|
||
diff_tree_output = run_git(
|
||
"diff-tree", "--no-commit-id", "-r", "--name-status", commit_hash,
|
||
cwd=repo_path,
|
||
)
|
||
|
||
for diff_line in diff_tree_output.strip().split("\n"):
|
||
if not diff_line or not diff_line[0].isalpha():
|
||
continue
|
||
|
||
diff_parts = diff_line.split("\t", 2)
|
||
status = diff_parts[0]
|
||
filepath = diff_parts[1] if len(diff_parts) > 1 else ""
|
||
|
||
# Bei Renames: Zielpfad verwenden
|
||
if status.startswith("R") and len(diff_parts) >= 3:
|
||
filepath = diff_parts[2]
|
||
|
||
# Normalisieren
|
||
filepath = filepath.replace("\\", "/")
|
||
|
||
if not is_in_genome_scope(filepath):
|
||
continue
|
||
|
||
trait_key = get_trait_key(filepath, repo_path)
|
||
if not trait_key:
|
||
continue
|
||
|
||
mutation_type = get_mutation_type(status)
|
||
|
||
# Diff holen
|
||
diff_output = run_git(
|
||
"show", "--format=", "--no-color", commit_hash, "--", filepath,
|
||
cwd=repo_path,
|
||
)
|
||
|
||
# Diff kürzen
|
||
diff_lines = diff_output.strip().split("\n") if diff_output.strip() else []
|
||
if len(diff_lines) > MAX_DIFF_LINES:
|
||
truncated = len(diff_lines) - MAX_DIFF_LINES
|
||
diff_lines = diff_lines[:MAX_DIFF_LINES] + [f"... ({truncated} weitere Zeilen)"]
|
||
|
||
mutations[trait_key].append({
|
||
"hash": commit_hash[:8],
|
||
"date": date,
|
||
"author": author,
|
||
"message": message,
|
||
"file": filepath,
|
||
"type": mutation_type,
|
||
"diff": "\n".join(diff_lines),
|
||
})
|
||
|
||
return mutations
|
||
|
||
|
||
def generate_markdown(mutations: dict[str, list[dict]], repo_path: str, since: str) -> str:
|
||
"""Generiert die Markdown-Ausgabe."""
|
||
lines = []
|
||
lines.append("# Raw Mutations")
|
||
lines.append("")
|
||
lines.append(f"**Extrahiert:** {datetime.now().strftime('%Y-%m-%d %H:%M')}")
|
||
lines.append(f"**Zeitraum:** seit {since}")
|
||
lines.append(f"**Repository:** {Path(repo_path).resolve().name}")
|
||
lines.append(f"**Traits mit Mutations:** {len(mutations)}")
|
||
lines.append("")
|
||
lines.append("---")
|
||
lines.append("")
|
||
|
||
if not mutations:
|
||
lines.append("*Keine Mutations im angegebenen Zeitraum gefunden.*")
|
||
else:
|
||
for trait_key in sorted(mutations.keys()):
|
||
trait_mutations = mutations[trait_key]
|
||
|
||
lines.append(f"## Trait: `{trait_key}`")
|
||
lines.append("")
|
||
lines.append("| Mutations | Dateien |")
|
||
lines.append("|-----------|---------|")
|
||
|
||
unique_files = sorted(set(m["file"] for m in trait_mutations))
|
||
lines.append(f"| {len(trait_mutations)} | {', '.join(unique_files)} |")
|
||
lines.append("")
|
||
|
||
# Gruppiert nach Commit
|
||
commits_seen: dict[str, list[dict]] = {}
|
||
for m in trait_mutations:
|
||
commits_seen.setdefault(m["hash"], []).append(m)
|
||
|
||
for commit_hash, commit_mutations in commits_seen.items():
|
||
first = commit_mutations[0]
|
||
lines.append(f"### [{first['hash']}] {first['message']}")
|
||
lines.append("")
|
||
lines.append(f"- **Datum:** {first['date']}")
|
||
lines.append(f"- **Autor:** {first['author']}")
|
||
lines.append("")
|
||
|
||
for mutation in commit_mutations:
|
||
lines.append(f"#### `{mutation['type']}` – {mutation['file']}")
|
||
lines.append("")
|
||
|
||
if mutation["diff"]:
|
||
lines.append("```diff")
|
||
lines.append(mutation["diff"])
|
||
lines.append("```")
|
||
lines.append("")
|
||
|
||
lines.append("---")
|
||
lines.append("")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description="Genome Engine – Extraction")
|
||
parser.add_argument("--since", default="7 days ago", help='Zeitspanne (z.B. "7 days ago")')
|
||
parser.add_argument("--repo", default=".", help="Pfad zum Repository")
|
||
parser.add_argument("--output", default="", help="Output-Pfad (default: .github/genome/output/raw-mutations.md)")
|
||
args = parser.parse_args()
|
||
|
||
repo_path = os.path.abspath(args.repo)
|
||
output_path = args.output or os.path.join(repo_path, ".github/genome/output/raw-mutations.md")
|
||
|
||
print(f"Genome Extract: Scanning commits since '{args.since}'...")
|
||
|
||
mutations = extract_mutations(repo_path, args.since)
|
||
markdown = generate_markdown(mutations, repo_path, args.since)
|
||
|
||
# Output schreiben
|
||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||
with open(output_path, "w", encoding="utf-8") as f:
|
||
f.write(markdown)
|
||
|
||
total_mutations = sum(len(v) for v in mutations.values())
|
||
print()
|
||
print("Extraction abgeschlossen:")
|
||
print(f" Traits: {len(mutations)}")
|
||
print(f" Mutations: {total_mutations}")
|
||
print(f" Output: {output_path}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|