import re
str="15838477645dfdfdf15887988765dfdf1157990087651fd157385367891fdf15826789876qqq15838545678a"
#方法一:限制手机号开头,或者手机号前面为非数字,且手机号结尾或者以非数字结尾
pattern1="(?:^|[^\d])(1\d{10})(?:$|[^\d])"
phone_list1=re.compile(pattern1).findall(str)
#方法二:利用负向零宽断言,断言手机号前后不能出现数字
pattern2="(?<!\d)(1\d{10})(?!\d)"
phone_list2=re.compile(pattern2).findall(str)print('phone_list1:',phone_list1)print('phone_list2:',phone_list2)
二、提取2
import requests
import re
from bs4 import BeautifulSoup
def down():
r = requests.get("http://www.haomahao.com/") #调用网页
r.encoding = r.apparent_encoding #更改网页编码,防止出现乱码
#print(r.text)
bs =BeautifulSoup(r.text,"html.parser")
str1 = bs.getText() #将BeautifulSoup提出出来的写完text
parttern ="1[35789]\d{9}" #编写手机号的正则表达式
list = re.findall(parttern,str1)#print(list) #调试正则是否正确
f =open("手机号.txt",mode="w",encoding="utf-8") #写入文件
f.write("\n".join(list))
f.close()if __name__ =="__main__":down()
三、提取3
import re
# 正则匹配手机号
def judge_phone_number(account):
a = re.findall('(13\d{9}|14[5|7]\d{8}|15\d{9}|166{\d{8}|17[3|6|7]{\d{8}|18\d{9})', account)return a
if __name__ =='__main__':
s =judge_phone_number('asdasdasdasd15319547777sdfs')print(s)
#############################################################################
import re
# 正则匹配手机号
def judge_phone_number(account):
a = re.findall('(1.*?)sdfs', account)return a
if __name__ =='__main__':
s =judge_phone_number('asdasdasdasd15319547777sdfs')print(s)
###########################################################################
import re
# 正则匹配手机号
def judge_phone_number(account):
a = re.findall('(1.*?7777)', account)return a
if __name__ =='__main__':
s =judge_phone_number('asdasdasdasd15319547777sdfs')print(s)
四、提取4
# -*- coding:utf-8-*-
import re
# 验证手机号是否正确
phone_pat = re.compile('^(13\d|14[5|7]|15\d|166|17[3|6|7]|18\d)\d{8}$')while True:
phone =input('请输入您的手机号:')
res = re.search(phone_pat, phone)if res:print('正常手机号')else:print('不是手机号')
五、提取5
import re
def main():
tel =input("请输入手机号:")#ret= re.match(r"1[35678]\d{9}", tel)
# 由于手机号位数大于11位也能匹配成功,所以修改如下:
ret = re.match(r"^1[35678]\d{9}$", tel)if ret:print("匹配成功")else:print("匹配失败")if __name__ =="__main__":main()
#########################################################
import re
def main():
tel =input("请输入手机号:")
ret = re.match(r"^1[35678]\d{9}$", tel)if ret:print("匹配成功")else:print("匹配失败")if __name__ =="__main__":main()
##################################################################
import re
def main(ori_str):
strings = re.finditer(r'(.)\1', ori_str) # 匹配AA
for i in strings:print('AA=',i)
strings = re.finditer(r'(.)\1{2}', ori_str) # 匹配AAA
for i in strings:print('AAA=',i)
strings = re.finditer(r'(.)\1(.)\2', ori_str) # 匹配AABB
for i in strings:print('AABB=',i)
strings = re.finditer(r'(((.).\3.)|((.)\5(.)\6))', ori_str) # 匹配ABAB
for i in strings:print('ABAB=',i)
strings = re.finditer(r'(.)\1{3}', ori_str) # 匹配AAAA
for i in strings:print('AAAA=',i)if __name__ =='__main__':main('17733337474')
######################################################################
from re import findall
text ='''行尸走肉、金蝉脱壳、百里挑一、金玉满堂、
背水一战、霸王别姬、天上人间、不吐不快、海阔天空、
情非得已、满腹经纶、兵临城下、春暖花开、插翅难逃、
黄道吉日、天下无双、偷天换日、两小无猜、卧虎藏龙、
珠光宝气、簪缨世族、花花公子、绘声绘影、国色天香、
相亲相爱、八仙过海、金玉良缘、掌上明珠、皆大欢喜、
浩浩荡荡、平平安安、秀秀气气、斯斯文文、高高兴兴'''
print("全部词语: ", text)
pattern = r'(((.).\3.)|((.)\5(.)\6))'print("匹配ABAC和AABB的词语:",list(i[0]for i in findall(pattern, text)))
六、提取6
import pandas as pd
import csv
import re
class PhoneMatch(object):
def __init__(self, file_path)-> None:
self.file_path = file_path
def look_name(self,data):
subData = data.loc[(data['联系方式名']== data['法人名'])]return subData
def phone_number_match(self, phone):
string1 = re.findall(r"(\d)\1\1(?!\1)\d",str(phone)) # AAAB
iflen(string1)>=1:return True
string2 = re.findall(r'(.)\1(.)\2',str(phone)) #AABB
iflen(string2)>=1:return True
string3 = re.findall(r'(\w\w)\1+',str(phone)) #ABAB
iflen(string3)>=1:return True
string4 = re.findall(r"(\w)\1{3}",str(phone)) #AAAA
iflen(string4)>=1:return True
#ABCD
string5 = re.findall(r"(?:0(?=1)|1(?=2)|2(?=3)|3(?=4)|4(?=5)|5(?=6)|6(?=7)|7(?=8)|8(?=9)){3}\d",str(phone)) #AAAA
iflen(string5)>=1:return True
#DCBA
string6 = re.findall(r"(?:9(?=8)|8(?=7)|7(?=6)|6(?=5)|5(?=4)|4(?=3)|3(?=2)|2(?=1)|1(?=0)){3}\d",str(phone)) #AAAA
iflen(string6)>=1:return True
def run(self,):
data = pd.read_csv(self.file_path)#drop_index =[i for i in data["电话"].index if not self.phone_number_match(data["电话"][i])]#n_data = data.drop(drop_index)
h = self.look_name(data)
drop_index =[]for i in data["电话"].index:if not self.phone_number_match(data["电话"][i]):
drop_index.append(i)
n_data = data.drop(drop_index)
df = pd.concat([n_data, h])return df
if __name__ =="__main__":
ph =PhoneMatch("./tell.csv")
g = ph.run()print(len(g))
g.to_csv('./tellss.csv',index=False)