Compare commits
No commits in common. "39d7f64ccf064dbbe7d744a499c10d884622e0ed" and "4af5fdf9e0c2ce8a812fe747ae7fa694c70dc8de" have entirely different histories.
39d7f64ccf
...
4af5fdf9e0
1 changed files with 0 additions and 32 deletions
|
@ -1,8 +1,6 @@
|
|||
|
||||
import math, re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def get_urls(string:str = None) -> list:
|
||||
"""
|
||||
Extracts URLs from a given string using regular expressions.
|
||||
|
@ -37,33 +35,3 @@ def convert_size(size_bytes: int) -> str:
|
|||
p = math.pow(1024, i)
|
||||
s = round(size_bytes / p, 2)
|
||||
return f"{s} {size_name[i]}"
|
||||
|
||||
|
||||
def is_string_html(string: str) -> bool:
|
||||
'''
|
||||
Check if string is HTML
|
||||
|
||||
Parameters:
|
||||
string (str): The string to be checked
|
||||
|
||||
Returns:
|
||||
bool: True if string is HTML, False otherwise
|
||||
'''
|
||||
return bool(BeautifulSoup(string, "html.parser").find())
|
||||
|
||||
|
||||
def html_to_text(string):
|
||||
"""
|
||||
Converts an HTML string to plain text by removing all HTML tags and returning the resulting text.
|
||||
|
||||
Parameters:
|
||||
string (str): The HTML string to be converted.
|
||||
|
||||
Returns:
|
||||
str: The plain text version of the input HTML string.
|
||||
"""
|
||||
if is_string_html(string):
|
||||
soup = BeautifulSoup(string, "html.parser")
|
||||
return soup.get_text()
|
||||
else:
|
||||
return string
|
||||
|
|
Loading…
Reference in a new issue