Gallery-Archivist/archivist/utils/strings.py



import re

def get_urls(string:str = None) -> list:
    """
    Extracts URLs from a given string using regular expressions.

    Parameters:
    string (str): The input string from which URLs need to be extracted.

    Returns:
    list: A list of URLs extracted from the input string.
    """
    regex = re.compile(
        r'((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[.\!\/\\w]*))?)',
        re.IGNORECASE)

    return [x[0] for x in re.findall(regex, string)]
Add: util func to get all urls in a string 2024-03-14 19:46:19 +01:00

			`import re`

Update: Docs string to include parameters & return 2024-03-18 12:40:43 +01:00			`def get_urls(string:str = None) -> list:`
			`"""`
			`Extracts URLs from a given string using regular expressions.`

			`Parameters:`
			`string (str): The input string from which URLs need to be extracted.`

			`Returns:`
			`list: A list of URLs extracted from the input string.`
			`"""`
Add: util func to get all urls in a string 2024-03-14 19:46:19 +01:00			`regex = re.compile(`
			`r'((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+\|(?:www.\|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w_])?\??(?:[-\+=&;%@.\w_])#?(?:[.\!\/\\w]*))?)',`
			`re.IGNORECASE)`

			`return [x[0] for x in re.findall(regex, string)]`