# This script is evil and will download stories without permission. Use responsibly. # But it's amazing and helpful to you, so don't be a bad person! import sys import requests from bs4 import BeautifulSoup from urllib.parse import urlparse def get_slug(url): """Extract the slug from the URL (the last part without .html)""" parsed = urlparse(url) path = parsed.path slug = path.split("/")[-1].replace(".html", "") return slug def main(): if len(sys.argv) != 2: print("Usage: python download_story.py ") sys.exit(1) url = sys.argv[1] try: # Download the page response = requests.get(url, timeout=10) response.raise_for_status() except Exception as e: print(f"Error downloading the page: {e}") sys.exit(1) # Parse the HTML soup = BeautifulSoup(response.text, "html.parser") # Extract title title_tag = soup.find("h1") title = title_tag.text.strip() if title_tag else "No title found" # Extract author author_tag = soup.find("h3", id="author") author = "Unknown" if author_tag: author_link = author_tag.find("a") if author_link: author = author_link.text.strip() # Extract tags tags_text = "No tags found" tag_paragraph = soup.find( "p", string=lambda text: text and text.startswith("Storycodes: ") ) if tag_paragraph: text = tag_paragraph.text # Split by colon and take the part after the first colon parts = text.split(":", 1) if len(parts) > 1: tags_text = parts[1].strip() else: tags_text = text # Extract slug for filename slug = get_slug(url) # Extract story text from the div with class "storym" story_div = soup.find("div", class_="storym") story_text = "" if story_div: # Get text content without HTML tags story_text = story_div.get_text(separator="\n", strip=True) # Create the filename filename = f"{slug}.txt" # Write the file with open(filename, "w", encoding="utf-8") as f: f.write(f"Title: {title}\n") f.write(f"Author: {author}\n") f.write(f"Tags: {tags_text}\n") f.write(f"Original Link: {url}\n\n") f.write("Story Content:\n") f.write(story_text) print(f"Successfully saved story to {filename}") if __name__ == "__main__": main()