69 lines
2 KiB
Python
69 lines
2 KiB
Python
|
|
import math, re
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
def get_urls(string:str = None) -> list:
|
|
"""
|
|
Extracts URLs from a given string using regular expressions.
|
|
|
|
Parameters:
|
|
string (str): The input string from which URLs need to be extracted.
|
|
|
|
Returns:
|
|
list: A list of URLs extracted from the input string.
|
|
"""
|
|
regex = re.compile(
|
|
r'((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[.\!\/\\w]*))?)',
|
|
re.IGNORECASE)
|
|
|
|
return [x[0] for x in re.findall(regex, string)]
|
|
|
|
|
|
def convert_size(size_bytes: int) -> str:
|
|
"""
|
|
A function that converts the given size in bytes to a human-readable format.
|
|
|
|
Parameters:
|
|
size_bytes (int): An integer representing the size in bytes to be converted.
|
|
|
|
Returns:
|
|
A string representing the converted size with the appropriate unit (B, KiB, MiB, GiB, etc.).
|
|
"""
|
|
if size_bytes == 0:
|
|
return "0B"
|
|
size_name = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
|
|
i = int(math.floor(math.log(size_bytes, 1024)))
|
|
p = math.pow(1024, i)
|
|
s = round(size_bytes / p, 2)
|
|
return f"{s} {size_name[i]}"
|
|
|
|
|
|
def is_string_html(string: str) -> bool:
|
|
'''
|
|
Check if string is HTML
|
|
|
|
Parameters:
|
|
string (str): The string to be checked
|
|
|
|
Returns:
|
|
bool: True if string is HTML, False otherwise
|
|
'''
|
|
return bool(BeautifulSoup(string, "html.parser").find())
|
|
|
|
|
|
def html_to_text(string):
|
|
"""
|
|
Converts an HTML string to plain text by removing all HTML tags and returning the resulting text.
|
|
|
|
Parameters:
|
|
string (str): The HTML string to be converted.
|
|
|
|
Returns:
|
|
str: The plain text version of the input HTML string.
|
|
"""
|
|
if is_string_html(string):
|
|
soup = BeautifulSoup(string, "html.parser")
|
|
return soup.get_text()
|
|
else:
|
|
return string
|