import re
a="湖南湖北广东广西"
pat="湖北"
result=re.search(pat,a)
print(result)
b="136892763900"
pat2="1\d\d\d\d\d\d\d\d\d\d"
print(re.search(pat2,b))
c="@@@@@@@@@@##@!_tdyuhdihdiw"
pat3=r"\W\w\w"
print(re.search(pat3,c))
d="!@#$@#@##$张三%$^%$%#@$boy#@%##$%$$@#@#23@#@#@#@##$%$%$"
pat1=r"[\u4e00-\u9fa5][\u4e00-\u9fa5]"
pat2=r"[a-z][a-z][a-z]"
pat3=r"[0-9][0-9]"
result1=re.search(pat1,d)
result2=re.search(pat2,d)
result3=re.search(pat3,d)
print(result1,result2,result3)
b="18689276390"
pat2="1[3578]\d\d\d\d\d\d\d\d\d"
print(re.search(pat2,b))
c="nsiwsoiwpythonjsoksosj"
pat3=r"py[abcdt]hon"
print(re.search(pat2,b))
d="135738484941519874888813774748687"
pat1="..."
pat2="^135\d\d\d\d\d\d\d\d"
pat3=".*8687$"
pat4="8*"
pat5="8+"
print(re.search(pat5,d))
a="234ded65de45667888991jisw"
pat1=r"\d{8,10}"
print(re.search(pat1,a))
a="13699998888"
b="027-1234567"
pat1=r"1[3578]\d{9}|\d{3}-\d{7}"
print(re.search(pat1,a))
a="jiwdjeodjo@$#python%$$^^&*&^%$java#@!!!!!!!!!!!!!!13688889999!!!!!!!!!!!!!!!!!#@#$#$"
pat=r"(python).{0,}(java).{0,}(1[3578]\d{9})"
print(re.search(pat,a).group(3))
a="jiwdjeodjo@$#python%$$^^&*&^%$java#@!!!!!!!!!!!!!!aaa我要自学网bbb!!!!!!!!!!!!!!!!!#@#$#$"
pat=r"aaa(.*?)bbb"
print(re.findall(pat,a))
strr='aa<div>test1</div>bb<div>test2</div>cc'
pat1=r"<div>.*</div>"
print(re.search(pat1,strr))
strr='aa<div>test1</div>bb<div>test2</div>cc'
pat1=r"<div>.*?</div>"
print(re.findall(pat1,strr))
import re
strr="PYTHON666Java"
pat=re.compile(r"Python",re.I)
print(pat.search(strr))
import re
strr="javapythonjavahtmlpythonjs"
pat=re.compile(r"python")
print(pat.search(strr).group())
import re
strr="hello--------hello-----------\
---------hello-----------------\
---------hello--hello----------------\
----------hello---------hello----hello----------"
pat=re.compile(r"hello")
data=pat.finditer(strr)
list1=[]
for i in data:
list1.append(i.group())
print(list1)
import re
strr1="张三,,,李四,,,,,,,,,王五,,,,,,,,赵六"
pat1=re.compile(r",+")
result1=pat1.split(strr1)
strr2="hello 123,hello 456!"
pat2=re.compile(r"\d+")
result2=pat2.sub("666",strr2)
print(result2)
import re
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Ap\
pleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Sa\
fari/537.36"
}
response=requests.get("http://changyongdianhuahaoma.51240.com/",headers=headers).text
pat1=r'<tr bgcolor="#EFF7F0">[\s\S]*?<td>(.*?)</td>[\s\S]*?<td>[\s\S]*?</td>[\s\S]*?</tr>'
pat2=r'<tr bgcolor="#EFF7F0">[\s\S]*?<td>[\s\S]*?</td>[\s\S]*?<td>(.*?)</td>[\s\S]*?</tr>'
pattern1=re.compile(pat1)
pattern2=re.compile(pat2)
data1=pattern1.findall(response)
data2=pattern2.findall(response)
resultlist=[]
for i in range(0,len(data1)):
resultlist.append(data1[i]+data2[i])
print(resultlist)
import urllib.request
import re
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/\
537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
page_num = int(input("请问要爬取第几页呢:"))
page=str((page_num-1)*20)
url="https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start="+page+"&limit=20"
req=urllib.request.Request(url,headers=headers)
data=urllib.request.urlopen(req).read().decode()
pat1=r'"rating":\["(.*?)","\d+"\]'
pat2=r'"title":"(.*?)"'
pattern1=re.compile(pat1,re.I)
pattern2=re.compile(pat2,re.I)
data1=pattern1.findall(data)
data2=pattern2.findall(data)
for x in range(len(data1)):
print("排名:",x+1,"电影名:",data2[x],"豆瓣评分:",data1[x])