From ef1361ec39f30caff425bb91319182c887b073f8 Mon Sep 17 00:00:00 2001 From: Aroy-Art Date: Tue, 4 Jun 2024 20:36:30 +0200 Subject: [PATCH] Add: util fuction to check if string is html --- archivist/utils/strings.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/archivist/utils/strings.py b/archivist/utils/strings.py index 408e7af..027b881 100644 --- a/archivist/utils/strings.py +++ b/archivist/utils/strings.py @@ -1,6 +1,8 @@ import math, re +from bs4 import BeautifulSoup + def get_urls(string:str = None) -> list: """ Extracts URLs from a given string using regular expressions. @@ -35,3 +37,17 @@ def convert_size(size_bytes: int) -> str: p = math.pow(1024, i) s = round(size_bytes / p, 2) return f"{s} {size_name[i]}" + + +def is_string_html(string: str) -> bool: + ''' + Check if string is HTML + + Parameters: + string (str): The string to be checked + + Returns: + bool: True if string is HTML, False otherwise + ''' + return bool(BeautifulSoup(string, "html.parser").find()) +