From ef1361ec39f30caff425bb91319182c887b073f8 Mon Sep 17 00:00:00 2001
From: Aroy-Art <Aroy-Art@pm.me>
Date: Tue, 4 Jun 2024 20:36:30 +0200
Subject: [PATCH 1/2] Add: util fuction to check if string is html

---
 archivist/utils/strings.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/archivist/utils/strings.py b/archivist/utils/strings.py
index 408e7af..027b881 100644
--- a/archivist/utils/strings.py
+++ b/archivist/utils/strings.py
@@ -1,6 +1,8 @@
 
 import math, re
 
+from bs4 import BeautifulSoup
+
 def get_urls(string:str = None) -> list:
     """
     Extracts URLs from a given string using regular expressions.
@@ -35,3 +37,17 @@ def convert_size(size_bytes: int) -> str:
         p = math.pow(1024, i)
         s = round(size_bytes / p, 2)
         return f"{s} {size_name[i]}"
+
+
+def is_string_html(string: str) -> bool:
+    '''
+    Check if string is HTML
+    
+    Parameters:
+        string (str): The string to be checked
+    
+    Returns:
+        bool: True if string is HTML, False otherwise
+    '''
+    return bool(BeautifulSoup(string, "html.parser").find())
+

From 39d7f64ccf064dbbe7d744a499c10d884622e0ed Mon Sep 17 00:00:00 2001
From: Aroy-Art <Aroy-Art@pm.me>
Date: Tue, 4 Jun 2024 21:00:44 +0200
Subject: [PATCH 2/2] Add: fuction to convert html to text

---
 archivist/utils/strings.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/archivist/utils/strings.py b/archivist/utils/strings.py
index 027b881..7e861e2 100644
--- a/archivist/utils/strings.py
+++ b/archivist/utils/strings.py
@@ -51,3 +51,19 @@ def is_string_html(string: str) -> bool:
     '''
     return bool(BeautifulSoup(string, "html.parser").find())
 
+
+def html_to_text(string):
+    """
+    Converts an HTML string to plain text by removing all HTML tags and returning the resulting text.
+    
+    Parameters:
+        string (str): The HTML string to be converted.
+        
+    Returns:
+        str: The plain text version of the input HTML string.
+    """
+    if is_string_html(string):
+        soup = BeautifulSoup(string, "html.parser")
+        return soup.get_text()
+    else:
+        return string