Add: fuction to convert html to text
This commit is contained in:
parent
ef1361ec39
commit
39d7f64ccf
1 changed files with 16 additions and 0 deletions
|
@ -51,3 +51,19 @@ def is_string_html(string: str) -> bool:
|
||||||
'''
|
'''
|
||||||
return bool(BeautifulSoup(string, "html.parser").find())
|
return bool(BeautifulSoup(string, "html.parser").find())
|
||||||
|
|
||||||
|
|
||||||
|
def html_to_text(string):
|
||||||
|
"""
|
||||||
|
Converts an HTML string to plain text by removing all HTML tags and returning the resulting text.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
string (str): The HTML string to be converted.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The plain text version of the input HTML string.
|
||||||
|
"""
|
||||||
|
if is_string_html(string):
|
||||||
|
soup = BeautifulSoup(string, "html.parser")
|
||||||
|
return soup.get_text()
|
||||||
|
else:
|
||||||
|
return string
|
||||||
|
|
Loading…
Reference in a new issue