Add: util func to get all urls in a string
This commit is contained in:
parent
7556740579
commit
6fdc478b62
1 changed files with 15 additions and 0 deletions
15
archivist/utils/strings.py
Normal file
15
archivist/utils/strings.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
def get_urls(string):
|
||||||
|
'''
|
||||||
|
A function that returns all URLs from a string.
|
||||||
|
'''
|
||||||
|
regex = re.compile(
|
||||||
|
r'((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[.\!\/\\w]*))?)',
|
||||||
|
re.IGNORECASE)
|
||||||
|
|
||||||
|
return [x[0] for x in re.findall(regex, string)]
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue