# 2.身份证 18位 131123199607080517 前面17位是数字,后1位可以是数字也可以是小写x也可以是大写Ximport re
s ='131123199607080517'
r = re.match('\d{17}[\dxX]',s)# 再加一个$符号print(r.group())
结果
131123199607080517
3 冯提莫堆糖图片爬取,但是不知道为啥下载不了40张
import requests
import re
headers ={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4208.400','Cookie':'js=1; sessionid=a5b2cb66-096a-48cb-a89b-39c026247c07; js=1'}# 1.拿到目标的url
url ='https://www.duitang.com/napi/blog/list/by_search/?kw=%E5%86%AF%E6%8F%90%E8%8E%AB&start=0&limit=40'# 2.拿到网页源码
req = requests.get(url).content.decode('utf-8')# print(req)# pages.append(req)# # ret = r.content.decode('utf-8')# print(req)# 3.3.拿到图片的url
result = re.findall('"path":"(.*?)"', req)# print(result)# 4.保存图片
a =1for i in result:# 获取图片的名字
name = f'冯提莫{a}.jpg'withopen('冯提莫0/'+name,'wb')as f:# 看有米有异常,好像没有try:
s = requests.get(i)except Exception as e:print(e)
f.write(s.content)print(f'正在爬取第{a}张图片')
a +=1