Gallery-Archivist/archivist/utils/strings.py

22 lines
579 B
Python
Raw Normal View History

import re
def get_urls(string:str = None) -> list:
"""
Extracts URLs from a given string using regular expressions.
Parameters:
string (str): The input string from which URLs need to be extracted.
Returns:
list: A list of URLs extracted from the input string.
"""
regex = re.compile(
r'((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[.\!\/\\w]*))?)',
re.IGNORECASE)
return [x[0] for x in re.findall(regex, string)]