diff --git a/archivist/utils/strings.py b/archivist/utils/strings.py index 408e7af..027b881 100644 --- a/archivist/utils/strings.py +++ b/archivist/utils/strings.py @@ -1,6 +1,8 @@ import math, re +from bs4 import BeautifulSoup + def get_urls(string:str = None) -> list: """ Extracts URLs from a given string using regular expressions. @@ -35,3 +37,17 @@ def convert_size(size_bytes: int) -> str: p = math.pow(1024, i) s = round(size_bytes / p, 2) return f"{s} {size_name[i]}" + + +def is_string_html(string: str) -> bool: + ''' + Check if string is HTML + + Parameters: + string (str): The string to be checked + + Returns: + bool: True if string is HTML, False otherwise + ''' + return bool(BeautifulSoup(string, "html.parser").find()) +