1. 提取电话
text = ''' "(+91)966-352-2720", "☎ +91 9663522720" 9663522720 +79082343434 8(912)2342554, +7 982 342 sdfdsf 34 34 fsf 8-923-132-34-23 +7 982 342 34 34! sdfsd 13045542316 01727 830357 Tlf. 788 96 888 8553664787;3 '(866) 997-3523(615) 488-4607' +TollFree:1-888-412-3160 914-345-0800 1 '+?911235410HorariodeLV:09:0018:00' "Telephone:4697021130" CALL US: +91- 421- 435 00 02, 224 84 72 "+57x018000423 670" "416-998-8438 | 416-630-3053" "Whatsapp + 44 75 88755 173" (852)95788980 | Tel: (852) 23428887" phone: +974 4411-0147 | : +974 5512-3374 fax: 517/253-7366 ¡llámanos! (55) 71 00 35 00 +dubai:043804009|abudhabi:024146688 monfri:9:00am12:30pm/2:00pm5:30pm(cet)33983400075 https:wa-me/15551234567 216-812-1615 ext 115 wsp +56 9 5713 2204 call us +91 96888 78333 +contact: +91 70953 23456 15551234569/15551234567 port perry - 289-485-brew (2739) newmarket - 289-803-9111 (518) 858u200b-u200b9589 +971 58505 8980 waes footwear, u2028suite 2, duke st chambers, bridge street, tq7 1hx, united kingdom (+44) 020 3287 7957 contact-alexy-metals-216-410-8661 toll free +1 (888) 815-6150 call us: +91- 421- 435 00 02, 224 84 72 +61 405 385 704 (0044) (0)1782 611 599 0123-456-7890 55 6930 9729 +1 (800) 123 456 789 '''
numbers0 = re.findall("[\(\+\)]*[\+\d\)]*[\( -]?\d{3}[\) -]?\d{3}[ -]?\d{2}[ -]?\d{2}|[\(]?[0-9]{3}[\)]?[- \.]?[0-9]{3}[- \.]?[0-9]{4,6}|[\(\+\)/\d+]* *[\-\d]{8,}|[\+\d ]*[\d ]{5,} ?[\d]{5,}", text) numbers0 = [t[1:] if t.startswith(')') or t.startswith('/') else t.strip() for t in numbers0] print('numbers0:',numbers0) numbers1 = re.findall(r"[\(\+\)]*[1-9]* ?[0-9.\-\(\)]{8,} ?[0-9]", text,re.S) print('numbers1:',numbers1) numbers2 = re.findall(r"[\(\+\d\)]* ?[\(]?[0-9]{3}[\)]?[- \.]?[0-9]{3}[- \.]?[0-9]{4,6}", text,re.S) print('numbers2:',numbers2) numbers3 = re.findall(r"[\+\d\(\) ]+[\d -]{2,} ?[\d-]{2,}", text) numbers3 = [t[1:].strip() if t.strip()[0] not in ['+','('] and not t.strip()[0].isdigit() else t.strip() for t in numbers3 if len(re.sub('\D','',t))>7] print('numbers3:',numbers3)
2.提取邮箱
emails=""" geon@ihateregex.io test@gmail.com mail@test.org mail@testing.com hello@ @test email@gmail theproblem@test@gmail.com """ emails_0 = re.findall(r'[a-zA-Z]?[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,15}', emails,re.S) print('emails_0:',emails_0)