Python Scripts: add dl stories script

This commit is contained in:
Aroy-Art 2025-08-21 23:47:49 +02:00
parent 380e0ac5d2
commit 8b3e946dd4
Signed by: Aroy
GPG key ID: DB9689E9391DD156

View file

@ -0,0 +1,88 @@
# This script is evil and will download stories without permission. Use responsibly.
# But it's amazing and helpful to you, so don't be a bad person!
import sys
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
def get_slug(url):
"""Extract the slug from the URL (the last part without .html)"""
parsed = urlparse(url)
path = parsed.path
slug = path.split("/")[-1].replace(".html", "")
return slug
def main():
if len(sys.argv) != 2:
print("Usage: python download_story.py <url>")
sys.exit(1)
url = sys.argv[1]
try:
# Download the page
response = requests.get(url, timeout=10)
response.raise_for_status()
except Exception as e:
print(f"Error downloading the page: {e}")
sys.exit(1)
# Parse the HTML
soup = BeautifulSoup(response.text, "html.parser")
# Extract title
title_tag = soup.find("h1")
title = title_tag.text.strip() if title_tag else "No title found"
# Extract author
author_tag = soup.find("h3", id="author")
author = "Unknown"
if author_tag:
author_link = author_tag.find("a")
if author_link:
author = author_link.text.strip()
# Extract tags
tags_text = "No tags found"
tag_paragraph = soup.find(
"p", string=lambda text: text and text.startswith("Storycodes: ")
)
if tag_paragraph:
text = tag_paragraph.text
# Split by colon and take the part after the first colon
parts = text.split(":", 1)
if len(parts) > 1:
tags_text = parts[1].strip()
else:
tags_text = text
# Extract slug for filename
slug = get_slug(url)
# Extract story text from the div with class "storym"
story_div = soup.find("div", class_="storym")
story_text = ""
if story_div:
# Get text content without HTML tags
story_text = story_div.get_text(separator="\n", strip=True)
# Create the filename
filename = f"{slug}.txt"
# Write the file
with open(filename, "w", encoding="utf-8") as f:
f.write(f"Title: {title}\n")
f.write(f"Author: {author}\n")
f.write(f"Tags: {tags_text}\n")
f.write(f"Original Link: {url}\n\n")
f.write("Story Content:\n")
f.write(story_text)
print(f"Successfully saved story to {filename}")
if __name__ == "__main__":
main()