Compare commits
2 commits
c0f847aefb
...
c3b4e8300e
Author | SHA1 | Date | |
---|---|---|---|
c3b4e8300e | |||
b631c100b1 |
1 changed files with 21 additions and 5 deletions
|
@ -7,6 +7,23 @@ from bs4 import BeautifulSoup
|
|||
|
||||
register = template.Library()
|
||||
|
||||
|
||||
def is_html(string):
|
||||
'''
|
||||
Check if string is HTML
|
||||
'''
|
||||
soup = BeautifulSoup(string, "html.parser")
|
||||
# Remove leading and trailing white space
|
||||
stripped_string = string.strip()
|
||||
stripped_soup = str(soup).strip()
|
||||
# If the string remained the same after parsing with BeautifulSoup, it's probably not HTML
|
||||
if stripped_string == stripped_soup:
|
||||
return False
|
||||
# If the string changed when parsed by BeautifulSoup, it's probably HTML
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
@register.filter
|
||||
def is_image(file_url):
|
||||
image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.webp']
|
||||
|
@ -26,14 +43,13 @@ def is_pdf(file_url):
|
|||
|
||||
@register.filter
|
||||
def descriptionHtml2Text(description):
|
||||
if description.startswith("<div"):
|
||||
if is_html(description):
|
||||
cleanHtml = nh3.clean(description)
|
||||
soup = BeautifulSoup(cleanHtml, "html.parser")
|
||||
descriptionText = soup.get_text()
|
||||
text = soup.get_text()
|
||||
return text
|
||||
else:
|
||||
descriptionText = description
|
||||
|
||||
return descriptionText
|
||||
return description
|
||||
|
||||
|
||||
@register.filter
|
||||
|
|
Loading…
Reference in a new issue