(?<=href=\").+?(?=\")|(?<=href=\').+?(?=\')
http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+
"^((http://)|(https://))?([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}(/)"
In [1]: import re
In [2]: str = "http://www.baidu.com/"
In [3]: regular = re.compile(r'[a-zA-Z]+://[^\s]*[.com|.cn]')
In [4]: re.findall(regular, str)
Out[4]: ['http://www.baidu.com']
很好用感谢作者
https://www.cnblogs.com/pzk7788/p/10498128.html
import re
f = open('test.txt')
F = f.read()
f.close()
r = r'[^\.\x20\r\n\f\t]*\.apk'
s = re.findall(r, F)
print '\n'.join(s)