refactor(genome): rewrite extraction in Python for proper UTF-8 support
PowerShell auf Windows hat Encoding-Probleme mit Git-Output (Umlaute). Python 3 handhabt UTF-8 nativ korrekt.
This commit is contained in:
parent
24c6fac0f8
commit
10cb474906
2 changed files with 282 additions and 296 deletions
296
.github/genome/genome-extract.ps1
vendored
296
.github/genome/genome-extract.ps1
vendored
|
|
@ -1,296 +0,0 @@
|
||||||
<#
|
|
||||||
.SYNOPSIS
|
|
||||||
Genome Engine – Phase 1: Extraction
|
|
||||||
Extrahiert Mutations aus der Git-History für Copilot-Customization-Dateien.
|
|
||||||
|
|
||||||
.DESCRIPTION
|
|
||||||
Scannt git log für Änderungen im Genome-Scope (.github/skills, agents, prompts, instructions).
|
|
||||||
Gruppiert Diffs nach Trait und gibt strukturiertes Markdown aus.
|
|
||||||
|
|
||||||
.PARAMETER Since
|
|
||||||
Zeitspanne für git log (z.B. "4 days ago", "2 weeks ago"). Default: "7 days ago"
|
|
||||||
|
|
||||||
.PARAMETER RepoPath
|
|
||||||
Pfad zum Repository. Default: aktuelles Verzeichnis.
|
|
||||||
|
|
||||||
.PARAMETER OutputPath
|
|
||||||
Pfad für die Ausgabedatei. Default: .github/genome/output/raw-mutations.md
|
|
||||||
|
|
||||||
.EXAMPLE
|
|
||||||
.\.github\genome\genome-extract.ps1 -Since "4 days ago"
|
|
||||||
#>
|
|
||||||
|
|
||||||
param(
|
|
||||||
[string]$Since = "7 days ago",
|
|
||||||
[string]$RepoPath = ".",
|
|
||||||
[string]$OutputPath = ""
|
|
||||||
)
|
|
||||||
|
|
||||||
Set-StrictMode -Version Latest
|
|
||||||
$ErrorActionPreference = "Stop"
|
|
||||||
|
|
||||||
# --- Konfiguration ---
|
|
||||||
|
|
||||||
$GenomeScopes = @(
|
|
||||||
".github/skills/"
|
|
||||||
".github/agents/"
|
|
||||||
".github/prompts/"
|
|
||||||
".github/copilot-instructions.md"
|
|
||||||
".github/kotlin-conventions.instructions.md"
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- Funktionen ---
|
|
||||||
|
|
||||||
function Get-TraitKey {
|
|
||||||
<#
|
|
||||||
.SYNOPSIS
|
|
||||||
Leitet den Trait-Key aus einem Dateipfad ab.
|
|
||||||
#>
|
|
||||||
param([string]$FilePath)
|
|
||||||
|
|
||||||
# Skills: skill/<ordnername>
|
|
||||||
if ($FilePath -match "^\.github/skills/([^/]+)/") {
|
|
||||||
return "skill/$($Matches[1])"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Agents: agent/<dateiname-ohne-extension>
|
|
||||||
if ($FilePath -match "^\.github/agents/(.+)\.agent\.md$") {
|
|
||||||
return "agent/$($Matches[1])"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Prompts: Standalone oder Verbund
|
|
||||||
if ($FilePath -match "^\.github/prompts/(.+)\.prompt\.md$") {
|
|
||||||
$name = $Matches[1]
|
|
||||||
|
|
||||||
# Prüfe ob es ein Sub-Prompt ist (enthält Bindestrich und Router existiert)
|
|
||||||
# Verbund-Erkennung: <router>-<sub>.prompt.md → trait des Routers
|
|
||||||
# Wir suchen den längsten Präfix, der als Router existieren könnte
|
|
||||||
$parts = $name -split "-"
|
|
||||||
if ($parts.Count -gt 1) {
|
|
||||||
# Versuche progressiv kürzere Präfixe als Router-Name
|
|
||||||
for ($i = $parts.Count - 1; $i -ge 1; $i--) {
|
|
||||||
$candidate = ($parts[0..($i-1)] -join "-")
|
|
||||||
$routerPath = ".github/prompts/$candidate.prompt.md"
|
|
||||||
$fullRouterPath = Join-Path $RepoPath $routerPath
|
|
||||||
if (Test-Path $fullRouterPath) {
|
|
||||||
return "prompt/$candidate"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Standalone-Prompt
|
|
||||||
return "prompt/$name"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Instructions
|
|
||||||
if ($FilePath -match "^\.github/(.+)\.instructions\.md$") {
|
|
||||||
return "instructions/$($Matches[1])"
|
|
||||||
}
|
|
||||||
if ($FilePath -match "^\.github/copilot-instructions\.md$") {
|
|
||||||
return "instructions/copilot-instructions"
|
|
||||||
}
|
|
||||||
|
|
||||||
return $null
|
|
||||||
}
|
|
||||||
|
|
||||||
function Get-MutationType {
|
|
||||||
<#
|
|
||||||
.SYNOPSIS
|
|
||||||
Bestimmt den Mutation-Typ aus dem Git diff-filter Status.
|
|
||||||
#>
|
|
||||||
param(
|
|
||||||
[string]$Status # A, M, D, R, etc.
|
|
||||||
)
|
|
||||||
|
|
||||||
switch -Regex ($Status) {
|
|
||||||
"^A" { return "member-added" }
|
|
||||||
"^D" { return "member-removed" }
|
|
||||||
default { return "content-change" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function Test-InGenomeScope {
|
|
||||||
<#
|
|
||||||
.SYNOPSIS
|
|
||||||
Prüft ob ein Dateipfad im Genome-Scope liegt.
|
|
||||||
#>
|
|
||||||
param([string]$FilePath)
|
|
||||||
|
|
||||||
foreach ($scope in $GenomeScopes) {
|
|
||||||
if ($scope.EndsWith("/")) {
|
|
||||||
if ($FilePath.StartsWith($scope)) { return $true }
|
|
||||||
} else {
|
|
||||||
if ($FilePath -eq $scope) { return $true }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return $false
|
|
||||||
}
|
|
||||||
|
|
||||||
# --- Hauptlogik ---
|
|
||||||
|
|
||||||
Push-Location $RepoPath
|
|
||||||
try {
|
|
||||||
# Output-Pfad bestimmen
|
|
||||||
if (-not $OutputPath) {
|
|
||||||
$OutputPath = Join-Path $RepoPath ".github/genome/output/raw-mutations.md"
|
|
||||||
}
|
|
||||||
|
|
||||||
Write-Host "Genome Extract: Scanning commits since '$Since'..." -ForegroundColor Cyan
|
|
||||||
|
|
||||||
# Git-Log abrufen: Commits die Genome-Scope-Dateien betreffen
|
|
||||||
$logFormat = "--format=%H|%aI|%an|%s"
|
|
||||||
$commits = git log $logFormat --since="$Since" -- $GenomeScopes 2>&1
|
|
||||||
|
|
||||||
if (-not $commits -or $LASTEXITCODE -ne 0) {
|
|
||||||
Write-Host "Keine Commits im Genome-Scope seit '$Since' gefunden." -ForegroundColor Yellow
|
|
||||||
$commits = @()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Commits parsen
|
|
||||||
$mutations = @{} # Key: trait → Value: Liste von Mutations
|
|
||||||
|
|
||||||
foreach ($line in $commits) {
|
|
||||||
if (-not $line -or $line -notmatch "\|") { continue }
|
|
||||||
|
|
||||||
$parts = $line -split "\|", 4
|
|
||||||
if ($parts.Count -lt 4) { continue }
|
|
||||||
|
|
||||||
$hash = $parts[0]
|
|
||||||
$date = $parts[1]
|
|
||||||
$author = $parts[2]
|
|
||||||
$message = $parts[3]
|
|
||||||
|
|
||||||
# Geänderte Dateien für diesen Commit abrufen
|
|
||||||
$diffFiles = git diff-tree --no-commit-id -r --name-status $hash 2>&1
|
|
||||||
|
|
||||||
foreach ($diffLine in $diffFiles) {
|
|
||||||
if (-not $diffLine -or $diffLine -notmatch "^\w") { continue }
|
|
||||||
|
|
||||||
$diffParts = $diffLine -split "\t", 3
|
|
||||||
$status = $diffParts[0]
|
|
||||||
$filePath = $diffParts[1]
|
|
||||||
|
|
||||||
# Bei Renames: Zielpfad verwenden
|
|
||||||
if ($status -match "^R" -and $diffParts.Count -ge 3) {
|
|
||||||
$filePath = $diffParts[2]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Normalisieren (Backslash → Forward Slash)
|
|
||||||
$filePath = $filePath -replace "\\", "/"
|
|
||||||
|
|
||||||
# Prüfe ob im Genome-Scope
|
|
||||||
if (-not (Test-InGenomeScope $filePath)) { continue }
|
|
||||||
|
|
||||||
# Trait-Key ableiten
|
|
||||||
$traitKey = Get-TraitKey $filePath
|
|
||||||
if (-not $traitKey) { continue }
|
|
||||||
|
|
||||||
# Mutation-Typ bestimmen
|
|
||||||
$mutationType = Get-MutationType $status
|
|
||||||
|
|
||||||
# Diff für diese Datei holen
|
|
||||||
$diff = git show --format="" --no-color $hash -- $filePath 2>&1
|
|
||||||
if ($LASTEXITCODE -ne 0) {
|
|
||||||
# Fallback: diff-tree
|
|
||||||
$diff = git diff-tree -p $hash -- $filePath 2>&1
|
|
||||||
}
|
|
||||||
$diffText = ($diff | Out-String).Trim()
|
|
||||||
|
|
||||||
# Mutation speichern
|
|
||||||
if (-not $mutations.ContainsKey($traitKey)) {
|
|
||||||
$mutations[$traitKey] = @()
|
|
||||||
}
|
|
||||||
|
|
||||||
$mutations[$traitKey] += @{
|
|
||||||
Hash = $hash.Substring(0, [Math]::Min(8, $hash.Length))
|
|
||||||
Date = $date
|
|
||||||
Author = $author
|
|
||||||
Message = $message
|
|
||||||
File = $filePath
|
|
||||||
Type = $mutationType
|
|
||||||
Diff = $diffText
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# --- Output generieren ---
|
|
||||||
|
|
||||||
$sb = [System.Text.StringBuilder]::new()
|
|
||||||
[void]$sb.AppendLine("# Raw Mutations")
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
[void]$sb.AppendLine("**Extrahiert:** $(Get-Date -Format 'yyyy-MM-dd HH:mm')")
|
|
||||||
[void]$sb.AppendLine("**Zeitraum:** seit $Since")
|
|
||||||
[void]$sb.AppendLine("**Repository:** $(Split-Path $RepoPath -Leaf)")
|
|
||||||
[void]$sb.AppendLine("**Traits mit Mutations:** $($mutations.Count)")
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
[void]$sb.AppendLine("---")
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
|
|
||||||
if ($mutations.Count -eq 0) {
|
|
||||||
[void]$sb.AppendLine("*Keine Mutations im angegebenen Zeitraum gefunden.*")
|
|
||||||
} else {
|
|
||||||
# Sortiert nach Trait-Key ausgeben
|
|
||||||
foreach ($traitKey in ($mutations.Keys | Sort-Object)) {
|
|
||||||
$traitMutations = $mutations[$traitKey]
|
|
||||||
|
|
||||||
[void]$sb.AppendLine("## Trait: ``$traitKey``")
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
[void]$sb.AppendLine("| Mutations | Dateien |")
|
|
||||||
[void]$sb.AppendLine("|-----------|---------|")
|
|
||||||
|
|
||||||
$uniqueFiles = ($traitMutations | ForEach-Object { $_.File } | Sort-Object -Unique) -join ", "
|
|
||||||
[void]$sb.AppendLine("| $($traitMutations.Count) | $uniqueFiles |")
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
|
|
||||||
# Gruppiert nach Commit (Hash)
|
|
||||||
$byCommit = $traitMutations | Group-Object -Property Hash
|
|
||||||
|
|
||||||
foreach ($commitGroup in $byCommit) {
|
|
||||||
$first = $commitGroup.Group[0]
|
|
||||||
[void]$sb.AppendLine("### [$($first.Hash)] $($first.Message)")
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
[void]$sb.AppendLine("- **Datum:** $($first.Date)")
|
|
||||||
[void]$sb.AppendLine("- **Autor:** $($first.Author)")
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
|
|
||||||
foreach ($mutation in $commitGroup.Group) {
|
|
||||||
$header = "#### " + '`' + $mutation.Type + '`' + " - " + $mutation.File
|
|
||||||
[void]$sb.AppendLine($header)
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
|
|
||||||
if ($mutation.Diff) {
|
|
||||||
# Diff auf max 80 Zeilen begrenzen
|
|
||||||
$diffLines = $mutation.Diff -split [Environment]::NewLine
|
|
||||||
if ($diffLines.Count -gt 80) {
|
|
||||||
$truncMsg = "... ($($diffLines.Count - 80) weitere Zeilen)"
|
|
||||||
$diffLines = $diffLines[0..79] + @($truncMsg)
|
|
||||||
}
|
|
||||||
[void]$sb.AppendLine('```diff')
|
|
||||||
[void]$sb.AppendLine(($diffLines -join [Environment]::NewLine))
|
|
||||||
[void]$sb.AppendLine('```')
|
|
||||||
}
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[void]$sb.AppendLine("---")
|
|
||||||
[void]$sb.AppendLine("")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Datei schreiben
|
|
||||||
$outputDir = Split-Path $OutputPath -Parent
|
|
||||||
if (-not (Test-Path $outputDir)) {
|
|
||||||
New-Item -ItemType Directory -Path $outputDir -Force | Out-Null
|
|
||||||
}
|
|
||||||
|
|
||||||
$sb.ToString() | Set-Content -Path $OutputPath -Encoding UTF8
|
|
||||||
Write-Host ""
|
|
||||||
Write-Host "Extraction abgeschlossen:" -ForegroundColor Green
|
|
||||||
Write-Host " Traits: $($mutations.Count)" -ForegroundColor White
|
|
||||||
Write-Host " Mutations: $(($mutations.Values | ForEach-Object { $_.Count } | Measure-Object -Sum).Sum)" -ForegroundColor White
|
|
||||||
Write-Host " Output: $OutputPath" -ForegroundColor White
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
Pop-Location
|
|
||||||
}
|
|
||||||
282
.github/genome/genome-extract.py
vendored
Normal file
282
.github/genome/genome-extract.py
vendored
Normal file
|
|
@ -0,0 +1,282 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Genome Engine – Phase 1: Extraction
|
||||||
|
|
||||||
|
Extrahiert Mutations aus der Git-History für Copilot-Customization-Dateien.
|
||||||
|
Scannt git log für Änderungen im Genome-Scope (.github/skills, agents, prompts, instructions).
|
||||||
|
Gruppiert Diffs nach Trait und gibt strukturiertes Markdown aus.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python .github/genome/genome-extract.py --since "7 days ago"
|
||||||
|
python .github/genome/genome-extract.py --since "4 days ago" --repo /path/to/repo
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# --- Konfiguration ---
|
||||||
|
|
||||||
|
GENOME_SCOPES = [
|
||||||
|
".github/skills/",
|
||||||
|
".github/agents/",
|
||||||
|
".github/prompts/",
|
||||||
|
".github/copilot-instructions.md",
|
||||||
|
".github/kotlin-conventions.instructions.md",
|
||||||
|
]
|
||||||
|
|
||||||
|
MAX_DIFF_LINES = 80
|
||||||
|
|
||||||
|
|
||||||
|
def run_git(*args: str, cwd: str = ".") -> str:
|
||||||
|
"""Git-Kommando ausführen, UTF-8-Output zurückgeben."""
|
||||||
|
result = subprocess.run(
|
||||||
|
["git"] + list(args),
|
||||||
|
cwd=cwd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
encoding="utf-8",
|
||||||
|
errors="replace",
|
||||||
|
)
|
||||||
|
return result.stdout
|
||||||
|
|
||||||
|
|
||||||
|
def is_in_genome_scope(filepath: str) -> bool:
|
||||||
|
"""Prüft ob ein Dateipfad im Genome-Scope liegt."""
|
||||||
|
for scope in GENOME_SCOPES:
|
||||||
|
if scope.endswith("/"):
|
||||||
|
if filepath.startswith(scope):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
if filepath == scope:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_trait_key(filepath: str, repo_path: str) -> str | None:
|
||||||
|
"""Leitet den Trait-Key aus einem Dateipfad ab."""
|
||||||
|
|
||||||
|
# Skills: skill/<ordnername>
|
||||||
|
m = re.match(r"^\.github/skills/([^/]+)/", filepath)
|
||||||
|
if m:
|
||||||
|
return f"skill/{m.group(1)}"
|
||||||
|
|
||||||
|
# Agents: agent/<dateiname-ohne-extension>
|
||||||
|
m = re.match(r"^\.github/agents/(.+)\.agent\.md$", filepath)
|
||||||
|
if m:
|
||||||
|
return f"agent/{m.group(1)}"
|
||||||
|
|
||||||
|
# Prompts: Standalone oder Verbund
|
||||||
|
m = re.match(r"^\.github/prompts/(.+)\.prompt\.md$", filepath)
|
||||||
|
if m:
|
||||||
|
name = m.group(1)
|
||||||
|
|
||||||
|
# Verbund-Erkennung: <router>-<sub>.prompt.md → Trait des Routers
|
||||||
|
parts = name.split("-")
|
||||||
|
if len(parts) > 1:
|
||||||
|
# Versuche progressiv kürzere Präfixe als Router-Name
|
||||||
|
for i in range(len(parts) - 1, 0, -1):
|
||||||
|
candidate = "-".join(parts[:i])
|
||||||
|
router_path = Path(repo_path) / f".github/prompts/{candidate}.prompt.md"
|
||||||
|
if router_path.exists():
|
||||||
|
return f"prompt/{candidate}"
|
||||||
|
|
||||||
|
# Standalone-Prompt
|
||||||
|
return f"prompt/{name}"
|
||||||
|
|
||||||
|
# Instructions (*.instructions.md)
|
||||||
|
m = re.match(r"^\.github/(.+)\.instructions\.md$", filepath)
|
||||||
|
if m:
|
||||||
|
return f"instructions/{m.group(1)}"
|
||||||
|
|
||||||
|
# copilot-instructions.md
|
||||||
|
if filepath == ".github/copilot-instructions.md":
|
||||||
|
return "instructions/copilot-instructions"
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_mutation_type(status: str) -> str:
|
||||||
|
"""Bestimmt den Mutation-Typ aus dem Git-Status-Buchstaben."""
|
||||||
|
if status.startswith("A"):
|
||||||
|
return "member-added"
|
||||||
|
elif status.startswith("D"):
|
||||||
|
return "member-removed"
|
||||||
|
else:
|
||||||
|
return "content-change"
|
||||||
|
|
||||||
|
|
||||||
|
def extract_mutations(repo_path: str, since: str) -> dict[str, list[dict]]:
|
||||||
|
"""Extrahiert alle Mutations aus der Git-History."""
|
||||||
|
mutations: dict[str, list[dict]] = defaultdict(list)
|
||||||
|
|
||||||
|
# Git-Log abrufen
|
||||||
|
log_output = run_git(
|
||||||
|
"log",
|
||||||
|
"--format=%H|%aI|%an|%s",
|
||||||
|
f"--since={since}",
|
||||||
|
"--",
|
||||||
|
*GENOME_SCOPES,
|
||||||
|
cwd=repo_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not log_output.strip():
|
||||||
|
return mutations
|
||||||
|
|
||||||
|
for line in log_output.strip().split("\n"):
|
||||||
|
if "|" not in line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
parts = line.split("|", 3)
|
||||||
|
if len(parts) < 4:
|
||||||
|
continue
|
||||||
|
|
||||||
|
commit_hash, date, author, message = parts
|
||||||
|
|
||||||
|
# Geänderte Dateien für diesen Commit
|
||||||
|
diff_tree_output = run_git(
|
||||||
|
"diff-tree", "--no-commit-id", "-r", "--name-status", commit_hash,
|
||||||
|
cwd=repo_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
for diff_line in diff_tree_output.strip().split("\n"):
|
||||||
|
if not diff_line or not diff_line[0].isalpha():
|
||||||
|
continue
|
||||||
|
|
||||||
|
diff_parts = diff_line.split("\t", 2)
|
||||||
|
status = diff_parts[0]
|
||||||
|
filepath = diff_parts[1] if len(diff_parts) > 1 else ""
|
||||||
|
|
||||||
|
# Bei Renames: Zielpfad verwenden
|
||||||
|
if status.startswith("R") and len(diff_parts) >= 3:
|
||||||
|
filepath = diff_parts[2]
|
||||||
|
|
||||||
|
# Normalisieren
|
||||||
|
filepath = filepath.replace("\\", "/")
|
||||||
|
|
||||||
|
if not is_in_genome_scope(filepath):
|
||||||
|
continue
|
||||||
|
|
||||||
|
trait_key = get_trait_key(filepath, repo_path)
|
||||||
|
if not trait_key:
|
||||||
|
continue
|
||||||
|
|
||||||
|
mutation_type = get_mutation_type(status)
|
||||||
|
|
||||||
|
# Diff holen
|
||||||
|
diff_output = run_git(
|
||||||
|
"show", "--format=", "--no-color", commit_hash, "--", filepath,
|
||||||
|
cwd=repo_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Diff kürzen
|
||||||
|
diff_lines = diff_output.strip().split("\n") if diff_output.strip() else []
|
||||||
|
if len(diff_lines) > MAX_DIFF_LINES:
|
||||||
|
truncated = len(diff_lines) - MAX_DIFF_LINES
|
||||||
|
diff_lines = diff_lines[:MAX_DIFF_LINES] + [f"... ({truncated} weitere Zeilen)"]
|
||||||
|
|
||||||
|
mutations[trait_key].append({
|
||||||
|
"hash": commit_hash[:8],
|
||||||
|
"date": date,
|
||||||
|
"author": author,
|
||||||
|
"message": message,
|
||||||
|
"file": filepath,
|
||||||
|
"type": mutation_type,
|
||||||
|
"diff": "\n".join(diff_lines),
|
||||||
|
})
|
||||||
|
|
||||||
|
return mutations
|
||||||
|
|
||||||
|
|
||||||
|
def generate_markdown(mutations: dict[str, list[dict]], repo_path: str, since: str) -> str:
|
||||||
|
"""Generiert die Markdown-Ausgabe."""
|
||||||
|
lines = []
|
||||||
|
lines.append("# Raw Mutations")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"**Extrahiert:** {datetime.now().strftime('%Y-%m-%d %H:%M')}")
|
||||||
|
lines.append(f"**Zeitraum:** seit {since}")
|
||||||
|
lines.append(f"**Repository:** {Path(repo_path).resolve().name}")
|
||||||
|
lines.append(f"**Traits mit Mutations:** {len(mutations)}")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("---")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if not mutations:
|
||||||
|
lines.append("*Keine Mutations im angegebenen Zeitraum gefunden.*")
|
||||||
|
else:
|
||||||
|
for trait_key in sorted(mutations.keys()):
|
||||||
|
trait_mutations = mutations[trait_key]
|
||||||
|
|
||||||
|
lines.append(f"## Trait: `{trait_key}`")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("| Mutations | Dateien |")
|
||||||
|
lines.append("|-----------|---------|")
|
||||||
|
|
||||||
|
unique_files = sorted(set(m["file"] for m in trait_mutations))
|
||||||
|
lines.append(f"| {len(trait_mutations)} | {', '.join(unique_files)} |")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Gruppiert nach Commit
|
||||||
|
commits_seen: dict[str, list[dict]] = {}
|
||||||
|
for m in trait_mutations:
|
||||||
|
commits_seen.setdefault(m["hash"], []).append(m)
|
||||||
|
|
||||||
|
for commit_hash, commit_mutations in commits_seen.items():
|
||||||
|
first = commit_mutations[0]
|
||||||
|
lines.append(f"### [{first['hash']}] {first['message']}")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"- **Datum:** {first['date']}")
|
||||||
|
lines.append(f"- **Autor:** {first['author']}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
for mutation in commit_mutations:
|
||||||
|
lines.append(f"#### `{mutation['type']}` – {mutation['file']}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if mutation["diff"]:
|
||||||
|
lines.append("```diff")
|
||||||
|
lines.append(mutation["diff"])
|
||||||
|
lines.append("```")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("---")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Genome Engine – Extraction")
|
||||||
|
parser.add_argument("--since", default="7 days ago", help='Zeitspanne (z.B. "7 days ago")')
|
||||||
|
parser.add_argument("--repo", default=".", help="Pfad zum Repository")
|
||||||
|
parser.add_argument("--output", default="", help="Output-Pfad (default: .github/genome/output/raw-mutations.md)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
repo_path = os.path.abspath(args.repo)
|
||||||
|
output_path = args.output or os.path.join(repo_path, ".github/genome/output/raw-mutations.md")
|
||||||
|
|
||||||
|
print(f"Genome Extract: Scanning commits since '{args.since}'...")
|
||||||
|
|
||||||
|
mutations = extract_mutations(repo_path, args.since)
|
||||||
|
markdown = generate_markdown(mutations, repo_path, args.since)
|
||||||
|
|
||||||
|
# Output schreiben
|
||||||
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(markdown)
|
||||||
|
|
||||||
|
total_mutations = sum(len(v) for v in mutations.values())
|
||||||
|
print()
|
||||||
|
print("Extraction abgeschlossen:")
|
||||||
|
print(f" Traits: {len(mutations)}")
|
||||||
|
print(f" Mutations: {total_mutations}")
|
||||||
|
print(f" Output: {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in a new issue