Fix: descriptionHtml2Text function to work if string not starting with "<div>"

Add: function to check if string is html
2023-09-27 19:10:14 +02:00 · 2023-09-27 19:08:27 +02:00
1 changed files with 21 additions and 5 deletions
--- a/archivist/sites/furaffinity/templatetags/custom_filters.py
+++ b/archivist/sites/furaffinity/templatetags/custom_filters.py
@ -7,6 +7,23 @@ from bs4 import BeautifulSoup

 register = template.Library()

+
+def is_html(string):
+    '''
+    Check if string is HTML
+    '''
+    soup = BeautifulSoup(string, "html.parser")
+    # Remove leading and trailing white space
+    stripped_string = string.strip()
+    stripped_soup = str(soup).strip()
+    # If the string remained the same after parsing with BeautifulSoup, it's probably not HTML
+    if stripped_string == stripped_soup:
+        return False
+    # If the string changed when parsed by BeautifulSoup, it's probably HTML
+    else:
+        return True
+
+
@register.filter
 def is_image(file_url):
    image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.webp']
@ -26,14 +43,13 @@ def is_pdf(file_url):

@register.filter
 def descriptionHtml2Text(description):
-    if description.startswith("<div"):
+    if is_html(description):
        cleanHtml = nh3.clean(description)
        soup = BeautifulSoup(cleanHtml, "html.parser")
-        descriptionText = soup.get_text()
+        text = soup.get_text()
+        return text
    else:
-        descriptionText = description
-    
-    return descriptionText
+        return description


@register.filter
Author	SHA1	Message	Date
Aroy-Art	c3b4e8300e	Fix: descriptionHtml2Text function to work if string not starting with "<div>"	2023-09-27 19:10:14 +02:00
Aroy-Art	b631c100b1	Add: function to check if string is html	2023-09-27 19:08:27 +02:00