2024-03-14 19:46:19 +01:00
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
2024-03-18 12:40:43 +01:00
|
|
|
def get_urls(string:str = None) -> list:
|
|
|
|
"""
|
|
|
|
Extracts URLs from a given string using regular expressions.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
string (str): The input string from which URLs need to be extracted.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
list: A list of URLs extracted from the input string.
|
|
|
|
"""
|
2024-03-14 19:46:19 +01:00
|
|
|
regex = re.compile(
|
|
|
|
r'((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[.\!\/\\w]*))?)',
|
|
|
|
re.IGNORECASE)
|
|
|
|
|
|
|
|
return [x[0] for x in re.findall(regex, string)]
|
|
|
|
|
|
|
|
|