diff --git a/archivist/utils/strings.py b/archivist/utils/strings.py index 7e861e2..408e7af 100644 --- a/archivist/utils/strings.py +++ b/archivist/utils/strings.py @@ -1,8 +1,6 @@ import math, re -from bs4 import BeautifulSoup - def get_urls(string:str = None) -> list: """ Extracts URLs from a given string using regular expressions. @@ -37,33 +35,3 @@ def convert_size(size_bytes: int) -> str: p = math.pow(1024, i) s = round(size_bytes / p, 2) return f"{s} {size_name[i]}" - - -def is_string_html(string: str) -> bool: - ''' - Check if string is HTML - - Parameters: - string (str): The string to be checked - - Returns: - bool: True if string is HTML, False otherwise - ''' - return bool(BeautifulSoup(string, "html.parser").find()) - - -def html_to_text(string): - """ - Converts an HTML string to plain text by removing all HTML tags and returning the resulting text. - - Parameters: - string (str): The HTML string to be converted. - - Returns: - str: The plain text version of the input HTML string. - """ - if is_string_html(string): - soup = BeautifulSoup(string, "html.parser") - return soup.get_text() - else: - return string