Python Scripts: add dl stories script

2025-08-21 23:47:49 +02:00 · 2025-08-21 23:47:49 +02:00 · 8b3e946dd4
commit 8b3e946dd4
parent 380e0ac5d2
1 changed files with 88 additions and 0 deletions
--- a/PythonScripts/dl-stories.py
+++ b/PythonScripts/dl-stories.py
@ -0,0 +1,88 @@
 # This script is evil and will download stories without permission. Use responsibly.
 # But it's amazing and helpful to you, so don't be a bad person!
 import sys
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse
 def get_slug(url):
    """Extract the slug from the URL (the last part without .html)"""
    parsed = urlparse(url)
    path = parsed.path
    slug = path.split("/")[-1].replace(".html", "")
    return slug
 def main():
    if len(sys.argv) != 2:
        print("Usage: python download_story.py <url>")
        sys.exit(1)
    url = sys.argv[1]
    try:
        # Download the page
        response = requests.get(url, timeout=10)
        response.raise_for_status()
    except Exception as e:
        print(f"Error downloading the page: {e}")
        sys.exit(1)
    # Parse the HTML
    soup = BeautifulSoup(response.text, "html.parser")
    # Extract title
    title_tag = soup.find("h1")
    title = title_tag.text.strip() if title_tag else "No title found"
    # Extract author
    author_tag = soup.find("h3", id="author")
    author = "Unknown"
    if author_tag:
        author_link = author_tag.find("a")
        if author_link:
            author = author_link.text.strip()
    # Extract tags
    tags_text = "No tags found"
    tag_paragraph = soup.find(
        "p", string=lambda text: text and text.startswith("Storycodes: ")
    )
    if tag_paragraph:
        text = tag_paragraph.text
        # Split by colon and take the part after the first colon
        parts = text.split(":", 1)
        if len(parts) > 1:
            tags_text = parts[1].strip()
        else:
            tags_text = text
    # Extract slug for filename
    slug = get_slug(url)
    # Extract story text from the div with class "storym"
    story_div = soup.find("div", class_="storym")
    story_text = ""
    if story_div:
        # Get text content without HTML tags
        story_text = story_div.get_text(separator="\n", strip=True)
    # Create the filename
    filename = f"{slug}.txt"
    # Write the file
    with open(filename, "w", encoding="utf-8") as f:
        f.write(f"Title: {title}\n")
        f.write(f"Author: {author}\n")
        f.write(f"Tags: {tags_text}\n")
        f.write(f"Original Link: {url}\n\n")
        f.write("Story Content:\n")
        f.write(story_text)
    print(f"Successfully saved story to {filename}")
 if __name__ == "__main__":
    main()