import math, re from bs4 import BeautifulSoup def get_urls(string:str = None) -> list: """ Extracts URLs from a given string using regular expressions. Parameters: string (str): The input string from which URLs need to be extracted. Returns: list: A list of URLs extracted from the input string. """ regex = re.compile( r'((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[.\!\/\\w]*))?)', re.IGNORECASE) return [x[0] for x in re.findall(regex, string)] def convert_size(size_bytes: int) -> str: """ A function that converts the given size in bytes to a human-readable format. Parameters: size_bytes (int): An integer representing the size in bytes to be converted. Returns: A string representing the converted size with the appropriate unit (B, KiB, MiB, GiB, etc.). """ if size_bytes == 0: return "0B" size_name = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB") i = int(math.floor(math.log(size_bytes, 1024))) p = math.pow(1024, i) s = round(size_bytes / p, 2) return f"{s} {size_name[i]}" def is_string_html(string: str) -> bool: ''' Check if string is HTML Parameters: string (str): The string to be checked Returns: bool: True if string is HTML, False otherwise ''' return bool(BeautifulSoup(string, "html.parser").find()) def html_to_text(string): """ Converts an HTML string to plain text by removing all HTML tags and returning the resulting text. Parameters: string (str): The HTML string to be converted. Returns: str: The plain text version of the input HTML string. """ if is_string_html(string): soup = BeautifulSoup(string, "html.parser") return soup.get_text() else: return string