Compare commits
2 commits
4af5fdf9e0
...
39d7f64ccf
Author | SHA1 | Date | |
---|---|---|---|
39d7f64ccf | |||
ef1361ec39 |
1 changed files with 32 additions and 0 deletions
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
import math, re
|
import math, re
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
def get_urls(string:str = None) -> list:
|
def get_urls(string:str = None) -> list:
|
||||||
"""
|
"""
|
||||||
Extracts URLs from a given string using regular expressions.
|
Extracts URLs from a given string using regular expressions.
|
||||||
|
@ -35,3 +37,33 @@ def convert_size(size_bytes: int) -> str:
|
||||||
p = math.pow(1024, i)
|
p = math.pow(1024, i)
|
||||||
s = round(size_bytes / p, 2)
|
s = round(size_bytes / p, 2)
|
||||||
return f"{s} {size_name[i]}"
|
return f"{s} {size_name[i]}"
|
||||||
|
|
||||||
|
|
||||||
|
def is_string_html(string: str) -> bool:
|
||||||
|
'''
|
||||||
|
Check if string is HTML
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
string (str): The string to be checked
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if string is HTML, False otherwise
|
||||||
|
'''
|
||||||
|
return bool(BeautifulSoup(string, "html.parser").find())
|
||||||
|
|
||||||
|
|
||||||
|
def html_to_text(string):
|
||||||
|
"""
|
||||||
|
Converts an HTML string to plain text by removing all HTML tags and returning the resulting text.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
string (str): The HTML string to be converted.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The plain text version of the input HTML string.
|
||||||
|
"""
|
||||||
|
if is_string_html(string):
|
||||||
|
soup = BeautifulSoup(string, "html.parser")
|
||||||
|
return soup.get_text()
|
||||||
|
else:
|
||||||
|
return string
|
||||||
|
|
Loading…
Reference in a new issue