Compare commits

...

2 commits

View file

@ -1,6 +1,8 @@
import math, re import math, re
from bs4 import BeautifulSoup
def get_urls(string:str = None) -> list: def get_urls(string:str = None) -> list:
""" """
Extracts URLs from a given string using regular expressions. Extracts URLs from a given string using regular expressions.
@ -35,3 +37,33 @@ def convert_size(size_bytes: int) -> str:
p = math.pow(1024, i) p = math.pow(1024, i)
s = round(size_bytes / p, 2) s = round(size_bytes / p, 2)
return f"{s} {size_name[i]}" return f"{s} {size_name[i]}"
def is_string_html(string: str) -> bool:
'''
Check if string is HTML
Parameters:
string (str): The string to be checked
Returns:
bool: True if string is HTML, False otherwise
'''
return bool(BeautifulSoup(string, "html.parser").find())
def html_to_text(string):
"""
Converts an HTML string to plain text by removing all HTML tags and returning the resulting text.
Parameters:
string (str): The HTML string to be converted.
Returns:
str: The plain text version of the input HTML string.
"""
if is_string_html(string):
soup = BeautifulSoup(string, "html.parser")
return soup.get_text()
else:
return string