diff --git a/.github/genome/genome-extract.ps1 b/.github/genome/genome-extract.ps1 deleted file mode 100644 index 92e17d3..0000000 --- a/.github/genome/genome-extract.ps1 +++ /dev/null @@ -1,296 +0,0 @@ -<# -.SYNOPSIS - Genome Engine – Phase 1: Extraction - Extrahiert Mutations aus der Git-History für Copilot-Customization-Dateien. - -.DESCRIPTION - Scannt git log für Änderungen im Genome-Scope (.github/skills, agents, prompts, instructions). - Gruppiert Diffs nach Trait und gibt strukturiertes Markdown aus. - -.PARAMETER Since - Zeitspanne für git log (z.B. "4 days ago", "2 weeks ago"). Default: "7 days ago" - -.PARAMETER RepoPath - Pfad zum Repository. Default: aktuelles Verzeichnis. - -.PARAMETER OutputPath - Pfad für die Ausgabedatei. Default: .github/genome/output/raw-mutations.md - -.EXAMPLE - .\.github\genome\genome-extract.ps1 -Since "4 days ago" -#> - -param( - [string]$Since = "7 days ago", - [string]$RepoPath = ".", - [string]$OutputPath = "" -) - -Set-StrictMode -Version Latest -$ErrorActionPreference = "Stop" - -# --- Konfiguration --- - -$GenomeScopes = @( - ".github/skills/" - ".github/agents/" - ".github/prompts/" - ".github/copilot-instructions.md" - ".github/kotlin-conventions.instructions.md" -) - -# --- Funktionen --- - -function Get-TraitKey { - <# - .SYNOPSIS - Leitet den Trait-Key aus einem Dateipfad ab. - #> - param([string]$FilePath) - - # Skills: skill/ - if ($FilePath -match "^\.github/skills/([^/]+)/") { - return "skill/$($Matches[1])" - } - - # Agents: agent/ - if ($FilePath -match "^\.github/agents/(.+)\.agent\.md$") { - return "agent/$($Matches[1])" - } - - # Prompts: Standalone oder Verbund - if ($FilePath -match "^\.github/prompts/(.+)\.prompt\.md$") { - $name = $Matches[1] - - # Prüfe ob es ein Sub-Prompt ist (enthält Bindestrich und Router existiert) - # Verbund-Erkennung: -.prompt.md → trait des Routers - # Wir suchen den längsten Präfix, der als Router existieren könnte - $parts = $name -split "-" - if ($parts.Count -gt 1) { - # Versuche progressiv kürzere Präfixe als Router-Name - for ($i = $parts.Count - 1; $i -ge 1; $i--) { - $candidate = ($parts[0..($i-1)] -join "-") - $routerPath = ".github/prompts/$candidate.prompt.md" - $fullRouterPath = Join-Path $RepoPath $routerPath - if (Test-Path $fullRouterPath) { - return "prompt/$candidate" - } - } - } - - # Standalone-Prompt - return "prompt/$name" - } - - # Instructions - if ($FilePath -match "^\.github/(.+)\.instructions\.md$") { - return "instructions/$($Matches[1])" - } - if ($FilePath -match "^\.github/copilot-instructions\.md$") { - return "instructions/copilot-instructions" - } - - return $null -} - -function Get-MutationType { - <# - .SYNOPSIS - Bestimmt den Mutation-Typ aus dem Git diff-filter Status. - #> - param( - [string]$Status # A, M, D, R, etc. - ) - - switch -Regex ($Status) { - "^A" { return "member-added" } - "^D" { return "member-removed" } - default { return "content-change" } - } -} - -function Test-InGenomeScope { - <# - .SYNOPSIS - Prüft ob ein Dateipfad im Genome-Scope liegt. - #> - param([string]$FilePath) - - foreach ($scope in $GenomeScopes) { - if ($scope.EndsWith("/")) { - if ($FilePath.StartsWith($scope)) { return $true } - } else { - if ($FilePath -eq $scope) { return $true } - } - } - return $false -} - -# --- Hauptlogik --- - -Push-Location $RepoPath -try { - # Output-Pfad bestimmen - if (-not $OutputPath) { - $OutputPath = Join-Path $RepoPath ".github/genome/output/raw-mutations.md" - } - - Write-Host "Genome Extract: Scanning commits since '$Since'..." -ForegroundColor Cyan - - # Git-Log abrufen: Commits die Genome-Scope-Dateien betreffen - $logFormat = "--format=%H|%aI|%an|%s" - $commits = git log $logFormat --since="$Since" -- $GenomeScopes 2>&1 - - if (-not $commits -or $LASTEXITCODE -ne 0) { - Write-Host "Keine Commits im Genome-Scope seit '$Since' gefunden." -ForegroundColor Yellow - $commits = @() - } - - # Commits parsen - $mutations = @{} # Key: trait → Value: Liste von Mutations - - foreach ($line in $commits) { - if (-not $line -or $line -notmatch "\|") { continue } - - $parts = $line -split "\|", 4 - if ($parts.Count -lt 4) { continue } - - $hash = $parts[0] - $date = $parts[1] - $author = $parts[2] - $message = $parts[3] - - # Geänderte Dateien für diesen Commit abrufen - $diffFiles = git diff-tree --no-commit-id -r --name-status $hash 2>&1 - - foreach ($diffLine in $diffFiles) { - if (-not $diffLine -or $diffLine -notmatch "^\w") { continue } - - $diffParts = $diffLine -split "\t", 3 - $status = $diffParts[0] - $filePath = $diffParts[1] - - # Bei Renames: Zielpfad verwenden - if ($status -match "^R" -and $diffParts.Count -ge 3) { - $filePath = $diffParts[2] - } - - # Normalisieren (Backslash → Forward Slash) - $filePath = $filePath -replace "\\", "/" - - # Prüfe ob im Genome-Scope - if (-not (Test-InGenomeScope $filePath)) { continue } - - # Trait-Key ableiten - $traitKey = Get-TraitKey $filePath - if (-not $traitKey) { continue } - - # Mutation-Typ bestimmen - $mutationType = Get-MutationType $status - - # Diff für diese Datei holen - $diff = git show --format="" --no-color $hash -- $filePath 2>&1 - if ($LASTEXITCODE -ne 0) { - # Fallback: diff-tree - $diff = git diff-tree -p $hash -- $filePath 2>&1 - } - $diffText = ($diff | Out-String).Trim() - - # Mutation speichern - if (-not $mutations.ContainsKey($traitKey)) { - $mutations[$traitKey] = @() - } - - $mutations[$traitKey] += @{ - Hash = $hash.Substring(0, [Math]::Min(8, $hash.Length)) - Date = $date - Author = $author - Message = $message - File = $filePath - Type = $mutationType - Diff = $diffText - } - } - } - - # --- Output generieren --- - - $sb = [System.Text.StringBuilder]::new() - [void]$sb.AppendLine("# Raw Mutations") - [void]$sb.AppendLine("") - [void]$sb.AppendLine("**Extrahiert:** $(Get-Date -Format 'yyyy-MM-dd HH:mm')") - [void]$sb.AppendLine("**Zeitraum:** seit $Since") - [void]$sb.AppendLine("**Repository:** $(Split-Path $RepoPath -Leaf)") - [void]$sb.AppendLine("**Traits mit Mutations:** $($mutations.Count)") - [void]$sb.AppendLine("") - [void]$sb.AppendLine("---") - [void]$sb.AppendLine("") - - if ($mutations.Count -eq 0) { - [void]$sb.AppendLine("*Keine Mutations im angegebenen Zeitraum gefunden.*") - } else { - # Sortiert nach Trait-Key ausgeben - foreach ($traitKey in ($mutations.Keys | Sort-Object)) { - $traitMutations = $mutations[$traitKey] - - [void]$sb.AppendLine("## Trait: ``$traitKey``") - [void]$sb.AppendLine("") - [void]$sb.AppendLine("| Mutations | Dateien |") - [void]$sb.AppendLine("|-----------|---------|") - - $uniqueFiles = ($traitMutations | ForEach-Object { $_.File } | Sort-Object -Unique) -join ", " - [void]$sb.AppendLine("| $($traitMutations.Count) | $uniqueFiles |") - [void]$sb.AppendLine("") - - # Gruppiert nach Commit (Hash) - $byCommit = $traitMutations | Group-Object -Property Hash - - foreach ($commitGroup in $byCommit) { - $first = $commitGroup.Group[0] - [void]$sb.AppendLine("### [$($first.Hash)] $($first.Message)") - [void]$sb.AppendLine("") - [void]$sb.AppendLine("- **Datum:** $($first.Date)") - [void]$sb.AppendLine("- **Autor:** $($first.Author)") - [void]$sb.AppendLine("") - - foreach ($mutation in $commitGroup.Group) { - $header = "#### " + '`' + $mutation.Type + '`' + " - " + $mutation.File - [void]$sb.AppendLine($header) - [void]$sb.AppendLine("") - - if ($mutation.Diff) { - # Diff auf max 80 Zeilen begrenzen - $diffLines = $mutation.Diff -split [Environment]::NewLine - if ($diffLines.Count -gt 80) { - $truncMsg = "... ($($diffLines.Count - 80) weitere Zeilen)" - $diffLines = $diffLines[0..79] + @($truncMsg) - } - [void]$sb.AppendLine('```diff') - [void]$sb.AppendLine(($diffLines -join [Environment]::NewLine)) - [void]$sb.AppendLine('```') - } - [void]$sb.AppendLine("") - } - } - - [void]$sb.AppendLine("---") - [void]$sb.AppendLine("") - } - } - - # Datei schreiben - $outputDir = Split-Path $OutputPath -Parent - if (-not (Test-Path $outputDir)) { - New-Item -ItemType Directory -Path $outputDir -Force | Out-Null - } - - $sb.ToString() | Set-Content -Path $OutputPath -Encoding UTF8 - Write-Host "" - Write-Host "Extraction abgeschlossen:" -ForegroundColor Green - Write-Host " Traits: $($mutations.Count)" -ForegroundColor White - Write-Host " Mutations: $(($mutations.Values | ForEach-Object { $_.Count } | Measure-Object -Sum).Sum)" -ForegroundColor White - Write-Host " Output: $OutputPath" -ForegroundColor White - -} finally { - Pop-Location -} diff --git a/.github/genome/genome-extract.py b/.github/genome/genome-extract.py new file mode 100644 index 0000000..26ad3c7 --- /dev/null +++ b/.github/genome/genome-extract.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python3 +""" +Genome Engine – Phase 1: Extraction + +Extrahiert Mutations aus der Git-History für Copilot-Customization-Dateien. +Scannt git log für Änderungen im Genome-Scope (.github/skills, agents, prompts, instructions). +Gruppiert Diffs nach Trait und gibt strukturiertes Markdown aus. + +Usage: + python .github/genome/genome-extract.py --since "7 days ago" + python .github/genome/genome-extract.py --since "4 days ago" --repo /path/to/repo +""" + +import argparse +import os +import re +import subprocess +import sys +from collections import defaultdict +from datetime import datetime +from pathlib import Path + +# --- Konfiguration --- + +GENOME_SCOPES = [ + ".github/skills/", + ".github/agents/", + ".github/prompts/", + ".github/copilot-instructions.md", + ".github/kotlin-conventions.instructions.md", +] + +MAX_DIFF_LINES = 80 + + +def run_git(*args: str, cwd: str = ".") -> str: + """Git-Kommando ausführen, UTF-8-Output zurückgeben.""" + result = subprocess.run( + ["git"] + list(args), + cwd=cwd, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + ) + return result.stdout + + +def is_in_genome_scope(filepath: str) -> bool: + """Prüft ob ein Dateipfad im Genome-Scope liegt.""" + for scope in GENOME_SCOPES: + if scope.endswith("/"): + if filepath.startswith(scope): + return True + else: + if filepath == scope: + return True + return False + + +def get_trait_key(filepath: str, repo_path: str) -> str | None: + """Leitet den Trait-Key aus einem Dateipfad ab.""" + + # Skills: skill/ + m = re.match(r"^\.github/skills/([^/]+)/", filepath) + if m: + return f"skill/{m.group(1)}" + + # Agents: agent/ + m = re.match(r"^\.github/agents/(.+)\.agent\.md$", filepath) + if m: + return f"agent/{m.group(1)}" + + # Prompts: Standalone oder Verbund + m = re.match(r"^\.github/prompts/(.+)\.prompt\.md$", filepath) + if m: + name = m.group(1) + + # Verbund-Erkennung: -.prompt.md → Trait des Routers + parts = name.split("-") + if len(parts) > 1: + # Versuche progressiv kürzere Präfixe als Router-Name + for i in range(len(parts) - 1, 0, -1): + candidate = "-".join(parts[:i]) + router_path = Path(repo_path) / f".github/prompts/{candidate}.prompt.md" + if router_path.exists(): + return f"prompt/{candidate}" + + # Standalone-Prompt + return f"prompt/{name}" + + # Instructions (*.instructions.md) + m = re.match(r"^\.github/(.+)\.instructions\.md$", filepath) + if m: + return f"instructions/{m.group(1)}" + + # copilot-instructions.md + if filepath == ".github/copilot-instructions.md": + return "instructions/copilot-instructions" + + return None + + +def get_mutation_type(status: str) -> str: + """Bestimmt den Mutation-Typ aus dem Git-Status-Buchstaben.""" + if status.startswith("A"): + return "member-added" + elif status.startswith("D"): + return "member-removed" + else: + return "content-change" + + +def extract_mutations(repo_path: str, since: str) -> dict[str, list[dict]]: + """Extrahiert alle Mutations aus der Git-History.""" + mutations: dict[str, list[dict]] = defaultdict(list) + + # Git-Log abrufen + log_output = run_git( + "log", + "--format=%H|%aI|%an|%s", + f"--since={since}", + "--", + *GENOME_SCOPES, + cwd=repo_path, + ) + + if not log_output.strip(): + return mutations + + for line in log_output.strip().split("\n"): + if "|" not in line: + continue + + parts = line.split("|", 3) + if len(parts) < 4: + continue + + commit_hash, date, author, message = parts + + # Geänderte Dateien für diesen Commit + diff_tree_output = run_git( + "diff-tree", "--no-commit-id", "-r", "--name-status", commit_hash, + cwd=repo_path, + ) + + for diff_line in diff_tree_output.strip().split("\n"): + if not diff_line or not diff_line[0].isalpha(): + continue + + diff_parts = diff_line.split("\t", 2) + status = diff_parts[0] + filepath = diff_parts[1] if len(diff_parts) > 1 else "" + + # Bei Renames: Zielpfad verwenden + if status.startswith("R") and len(diff_parts) >= 3: + filepath = diff_parts[2] + + # Normalisieren + filepath = filepath.replace("\\", "/") + + if not is_in_genome_scope(filepath): + continue + + trait_key = get_trait_key(filepath, repo_path) + if not trait_key: + continue + + mutation_type = get_mutation_type(status) + + # Diff holen + diff_output = run_git( + "show", "--format=", "--no-color", commit_hash, "--", filepath, + cwd=repo_path, + ) + + # Diff kürzen + diff_lines = diff_output.strip().split("\n") if diff_output.strip() else [] + if len(diff_lines) > MAX_DIFF_LINES: + truncated = len(diff_lines) - MAX_DIFF_LINES + diff_lines = diff_lines[:MAX_DIFF_LINES] + [f"... ({truncated} weitere Zeilen)"] + + mutations[trait_key].append({ + "hash": commit_hash[:8], + "date": date, + "author": author, + "message": message, + "file": filepath, + "type": mutation_type, + "diff": "\n".join(diff_lines), + }) + + return mutations + + +def generate_markdown(mutations: dict[str, list[dict]], repo_path: str, since: str) -> str: + """Generiert die Markdown-Ausgabe.""" + lines = [] + lines.append("# Raw Mutations") + lines.append("") + lines.append(f"**Extrahiert:** {datetime.now().strftime('%Y-%m-%d %H:%M')}") + lines.append(f"**Zeitraum:** seit {since}") + lines.append(f"**Repository:** {Path(repo_path).resolve().name}") + lines.append(f"**Traits mit Mutations:** {len(mutations)}") + lines.append("") + lines.append("---") + lines.append("") + + if not mutations: + lines.append("*Keine Mutations im angegebenen Zeitraum gefunden.*") + else: + for trait_key in sorted(mutations.keys()): + trait_mutations = mutations[trait_key] + + lines.append(f"## Trait: `{trait_key}`") + lines.append("") + lines.append("| Mutations | Dateien |") + lines.append("|-----------|---------|") + + unique_files = sorted(set(m["file"] for m in trait_mutations)) + lines.append(f"| {len(trait_mutations)} | {', '.join(unique_files)} |") + lines.append("") + + # Gruppiert nach Commit + commits_seen: dict[str, list[dict]] = {} + for m in trait_mutations: + commits_seen.setdefault(m["hash"], []).append(m) + + for commit_hash, commit_mutations in commits_seen.items(): + first = commit_mutations[0] + lines.append(f"### [{first['hash']}] {first['message']}") + lines.append("") + lines.append(f"- **Datum:** {first['date']}") + lines.append(f"- **Autor:** {first['author']}") + lines.append("") + + for mutation in commit_mutations: + lines.append(f"#### `{mutation['type']}` – {mutation['file']}") + lines.append("") + + if mutation["diff"]: + lines.append("```diff") + lines.append(mutation["diff"]) + lines.append("```") + lines.append("") + + lines.append("---") + lines.append("") + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser(description="Genome Engine – Extraction") + parser.add_argument("--since", default="7 days ago", help='Zeitspanne (z.B. "7 days ago")') + parser.add_argument("--repo", default=".", help="Pfad zum Repository") + parser.add_argument("--output", default="", help="Output-Pfad (default: .github/genome/output/raw-mutations.md)") + args = parser.parse_args() + + repo_path = os.path.abspath(args.repo) + output_path = args.output or os.path.join(repo_path, ".github/genome/output/raw-mutations.md") + + print(f"Genome Extract: Scanning commits since '{args.since}'...") + + mutations = extract_mutations(repo_path, args.since) + markdown = generate_markdown(mutations, repo_path, args.since) + + # Output schreiben + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + f.write(markdown) + + total_mutations = sum(len(v) for v in mutations.values()) + print() + print("Extraction abgeschlossen:") + print(f" Traits: {len(mutations)}") + print(f" Mutations: {total_mutations}") + print(f" Output: {output_path}") + + +if __name__ == "__main__": + main()