287 lines
11 KiB
Python
287 lines
11 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
Importiert 10 freie Nachschlagewerke/Referenzwerke (Project Gutenberg) in die Bollwerk-Ressourcen.
|
||
|
||
Voraussetzungen:
|
||
- Server läuft unter SERVER_URL
|
||
- Admin-Credentials als Env-Vars: BOLLWERK_ADMIN_USER, BOLLWERK_ADMIN_PASS
|
||
|
||
Verwendung:
|
||
python import-books-references.py
|
||
python import-books-references.py --dry-run (nur Download, kein Upload)
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import sys
|
||
import time
|
||
import uuid
|
||
import urllib.request
|
||
import urllib.error
|
||
|
||
SERVER_URL = "https://bollwerk.online"
|
||
|
||
# --- 10 ausgewählte Referenzwerke (Project Gutenberg, Public Domain) ---
|
||
BOOKS = [
|
||
{
|
||
"gutenberg_id": 1497,
|
||
"title": "The Republic",
|
||
"author": "Plato",
|
||
"description": "Platons Dialog über Gerechtigkeit, den idealen Staat und die Philosophenherrschaft. Grundlagenwerk der politischen Philosophie und Gesellschaftstheorie seit über 2400 Jahren.",
|
||
"tags": ["nachschlagewerk"],
|
||
"language": "en",
|
||
"release_date": "-375",
|
||
"edition": "Benjamin Jowett Translation",
|
||
},
|
||
{
|
||
"gutenberg_id": 1228,
|
||
"title": "On the Origin of Species",
|
||
"author": "Charles Darwin",
|
||
"description": "Darwins revolutionäre Theorie der natürlichen Selektion. Das Buch, das unser Verständnis des Lebens grundlegend veränderte – Pflichtlektüre für Naturwissenschaft und Evolutionsbiologie.",
|
||
"tags": ["nachschlagewerk"],
|
||
"language": "en",
|
||
"release_date": "1859-11-24",
|
||
"edition": "First Edition, 1859",
|
||
},
|
||
{
|
||
"gutenberg_id": 3300,
|
||
"title": "The Wealth of Nations",
|
||
"author": "Adam Smith",
|
||
"description": "Das Gründungswerk der modernen Ökonomie. Smith analysiert Arbeitsteilung, freie Märkte und die 'unsichtbare Hand' – bis heute Referenz für wirtschaftliches Denken.",
|
||
"tags": ["nachschlagewerk", "handbuch"],
|
||
"language": "en",
|
||
"release_date": "1776-03-09",
|
||
"edition": "First Edition",
|
||
},
|
||
{
|
||
"gutenberg_id": 147,
|
||
"title": "Common Sense",
|
||
"author": "Thomas Paine",
|
||
"description": "Die einflussreichste politische Flugschrift der amerikanischen Revolution. Paines klare Argumente für Unabhängigkeit und Selbstbestimmung inspirierten eine ganze Nation zum Handeln.",
|
||
"tags": ["nachschlagewerk"],
|
||
"language": "en",
|
||
"release_date": "1776-01-10",
|
||
"edition": "First Edition",
|
||
},
|
||
{
|
||
"gutenberg_id": 4363,
|
||
"title": "Beyond Good and Evil",
|
||
"author": "Friedrich Nietzsche",
|
||
"description": "Nietzsches Frontalangriff auf die traditionelle Philosophie. Hinterfragt Moral, Wahrheit und den Willen zur Macht – ein Schlüsseltext der modernen Philosophie.",
|
||
"tags": ["nachschlagewerk"],
|
||
"language": "en",
|
||
"release_date": "1886-01-01",
|
||
"edition": "Helen Zimmern Translation, 1906",
|
||
},
|
||
{
|
||
"gutenberg_id": 3207,
|
||
"title": "Leviathan",
|
||
"author": "Thomas Hobbes",
|
||
"description": "Hobbes' Theorie des Gesellschaftsvertrags und der absoluten Staatsgewalt. Geschrieben im englischen Bürgerkrieg – das Standardwerk über Macht, Ordnung und den Naturzustand des Menschen.",
|
||
"tags": ["nachschlagewerk"],
|
||
"language": "en",
|
||
"release_date": "1651-04-01",
|
||
"edition": "First Edition",
|
||
},
|
||
{
|
||
"gutenberg_id": 1404,
|
||
"title": "The Federalist Papers",
|
||
"author": "Alexander Hamilton, James Madison, John Jay",
|
||
"description": "85 Essays zur Verteidigung der US-Verfassung. Das wichtigste Dokument amerikanischer Staatstheorie – Referenzwerk für Gewaltenteilung, Föderalismus und demokratische Institutionen.",
|
||
"tags": ["nachschlagewerk"],
|
||
"language": "en",
|
||
"release_date": "1788-01-01",
|
||
"edition": "First Collected Edition",
|
||
},
|
||
{
|
||
"gutenberg_id": 1998,
|
||
"title": "Thus Spake Zarathustra",
|
||
"author": "Friedrich Nietzsche",
|
||
"description": "Nietzsches philosophisches Hauptwerk in poetischer Form. Einführung des Übermenschen, des Willens zur Macht und der ewigen Wiederkehr – eine der einflussreichsten Schriften der Moderne.",
|
||
"tags": ["nachschlagewerk"],
|
||
"language": "en",
|
||
"release_date": "1885-01-01",
|
||
"edition": "Thomas Common Translation, 1909",
|
||
},
|
||
{
|
||
"gutenberg_id": 61,
|
||
"title": "The Communist Manifesto",
|
||
"author": "Karl Marx, Friedrich Engels",
|
||
"description": "Das Manifest der Kommunistischen Partei – die wohl wirkungsmächtigste politische Schrift des 19. Jahrhunderts. Analyse des Klassenkampfs und Vision einer klassenlosen Gesellschaft.",
|
||
"tags": ["nachschlagewerk"],
|
||
"language": "en",
|
||
"release_date": "1848-02-21",
|
||
"edition": "Samuel Moore Translation, 1888",
|
||
},
|
||
{
|
||
"gutenberg_id": 4280,
|
||
"title": "The Critique of Pure Reason",
|
||
"author": "Immanuel Kant",
|
||
"description": "Kants Meisterwerk über die Grenzen menschlicher Erkenntnis. Untersucht, was wir wissen können, bevor wir es erfahren – Grundlage der gesamten modernen Erkenntnistheorie.",
|
||
"tags": ["nachschlagewerk"],
|
||
"language": "en",
|
||
"release_date": "1781-01-01",
|
||
"edition": "J. M. D. Meiklejohn Translation, 1855",
|
||
},
|
||
]
|
||
|
||
# ---------------------------------------------------------------------------
|
||
RESET = "\033[0m"
|
||
GREEN = "\033[92m"
|
||
YELLOW = "\033[93m"
|
||
RED = "\033[91m"
|
||
|
||
def ok(msg): print(f"{GREEN}[OK] {msg}{RESET}", flush=True)
|
||
def fail(msg): print(f"{RED}[!!] {msg}{RESET}", flush=True)
|
||
def step(msg): print(f"\n{YELLOW}{msg}{RESET}", flush=True)
|
||
def info(msg): print(f" {msg}", flush=True)
|
||
|
||
|
||
def login(username: str, password: str) -> str:
|
||
"""Login and return JWT access token."""
|
||
payload = json.dumps({"username": username, "password": password}).encode()
|
||
req = urllib.request.Request(
|
||
f"{SERVER_URL}/api/auth/login",
|
||
data=payload,
|
||
headers={"Content-Type": "application/json"},
|
||
method="POST",
|
||
)
|
||
try:
|
||
with urllib.request.urlopen(req) as resp:
|
||
data = json.loads(resp.read())
|
||
return data["accessToken"]
|
||
except urllib.error.HTTPError as e:
|
||
body = e.read().decode() if e.fp else ""
|
||
fail(f"Login fehlgeschlagen: {e.code} {body}")
|
||
sys.exit(1)
|
||
|
||
|
||
def download_epub(gutenberg_id: int) -> bytes:
|
||
"""Download ePub from Project Gutenberg (prefer lightweight no-images version)."""
|
||
urls = [
|
||
f"https://www.gutenberg.org/ebooks/{gutenberg_id}.epub.noimages",
|
||
f"https://www.gutenberg.org/ebooks/{gutenberg_id}.epub3.images",
|
||
f"https://www.gutenberg.org/cache/epub/{gutenberg_id}/pg{gutenberg_id}.epub",
|
||
]
|
||
for url in urls:
|
||
try:
|
||
req = urllib.request.Request(url, headers={"User-Agent": "BollwerkImporter/1.0"})
|
||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||
data = resp.read()
|
||
if len(data) > 1000: # sanity check
|
||
return data
|
||
except (urllib.error.HTTPError, urllib.error.URLError):
|
||
continue
|
||
raise RuntimeError(f"Konnte Gutenberg #{gutenberg_id} nicht herunterladen")
|
||
|
||
|
||
def upload_resource(token: str, book: dict, file_bytes: bytes) -> dict:
|
||
"""Upload resource via multipart POST to admin API."""
|
||
guid = str(uuid.uuid4())
|
||
now = int(time.time() * 1000)
|
||
|
||
metadata = {
|
||
"guid": guid,
|
||
"title": book["title"],
|
||
"description": book["description"],
|
||
"tags": book["tags"],
|
||
"fileFormat": "epub",
|
||
"mimeType": "application/epub+zip",
|
||
"fileSize": len(file_bytes),
|
||
"releaseDate": book.get("release_date"),
|
||
"createdAt": now,
|
||
"updatedAt": now,
|
||
"author": book.get("author"),
|
||
"language": book.get("language", "en"),
|
||
"edition": book.get("edition"),
|
||
"downloadUrl": "",
|
||
}
|
||
|
||
boundary = f"----BollwerkBoundary{uuid.uuid4().hex[:16]}"
|
||
body = bytearray()
|
||
|
||
# Part 1: metadata JSON
|
||
body += f"--{boundary}\r\n".encode()
|
||
body += b'Content-Disposition: form-data; name="metadata"\r\n'
|
||
body += b"Content-Type: application/json\r\n\r\n"
|
||
body += json.dumps(metadata).encode()
|
||
body += b"\r\n"
|
||
|
||
# Part 2: file
|
||
body += f"--{boundary}\r\n".encode()
|
||
body += f'Content-Disposition: form-data; name="file"; filename="{guid}.epub"\r\n'.encode()
|
||
body += b"Content-Type: application/epub+zip\r\n\r\n"
|
||
body += file_bytes
|
||
body += b"\r\n"
|
||
|
||
# End boundary
|
||
body += f"--{boundary}--\r\n".encode()
|
||
|
||
req = urllib.request.Request(
|
||
f"{SERVER_URL}/api/admin/resources",
|
||
data=bytes(body),
|
||
headers={
|
||
"Authorization": f"Bearer {token}",
|
||
"Content-Type": f"multipart/form-data; boundary={boundary}",
|
||
},
|
||
method="POST",
|
||
)
|
||
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=120) as resp:
|
||
return json.loads(resp.read())
|
||
except urllib.error.HTTPError as e:
|
||
error_body = e.read().decode() if e.fp else ""
|
||
raise RuntimeError(f"Upload fehlgeschlagen: {e.code} {error_body}")
|
||
|
||
|
||
def main():
|
||
dry_run = "--dry-run" in sys.argv
|
||
|
||
step("=== Bollwerk Bücher-Import (10 Nachschlagewerke / Referenzwerke) ===")
|
||
|
||
if not dry_run:
|
||
username = os.environ.get("BOLLWERK_ADMIN_USER", "")
|
||
password = os.environ.get("BOLLWERK_ADMIN_PASS", "")
|
||
if not username or not password:
|
||
fail("Setze BOLLWERK_ADMIN_USER und BOLLWERK_ADMIN_PASS als Env-Vars")
|
||
sys.exit(1)
|
||
|
||
step("1/3 Login als Admin...")
|
||
token = login(username, password)
|
||
ok(f"Eingeloggt als '{username}'")
|
||
else:
|
||
token = ""
|
||
info("DRY-RUN: Kein Login, kein Upload")
|
||
|
||
step("2/3 Bücher herunterladen...")
|
||
downloads = []
|
||
for i, book in enumerate(BOOKS, 1):
|
||
info(f" [{i:2d}/10] {book['title']} (Gutenberg #{book['gutenberg_id']})...")
|
||
try:
|
||
data = download_epub(book["gutenberg_id"])
|
||
downloads.append((book, data))
|
||
ok(f" {book['title']} – {len(data) / 1024:.0f} KB")
|
||
except RuntimeError as e:
|
||
fail(f" {e}")
|
||
|
||
if not dry_run:
|
||
step("3/3 Upload auf Bollwerk-Server...")
|
||
success = 0
|
||
for i, (book, data) in enumerate(downloads, 1):
|
||
info(f" [{i:2d}/{len(downloads)}] {book['title']}...")
|
||
try:
|
||
result = upload_resource(token, book, data)
|
||
ok(f" {book['title']} → guid={result['guid']}")
|
||
success += 1
|
||
except RuntimeError as e:
|
||
fail(f" {e}")
|
||
time.sleep(0.5) # rate limiting
|
||
|
||
step(f"Fertig: {success}/{len(downloads)} Bücher erfolgreich importiert.")
|
||
else:
|
||
step(f"DRY-RUN abgeschlossen: {len(downloads)} Bücher heruntergeladen, 0 hochgeladen.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|