agent1='Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE'
agent2='Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0'
agent3='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
agent_list=[agent1,agent2,agent3]
agent=random.choice(agent_list)
headers ={'User-Agent': agent}
设置大循环,可设置爬取的页码,且页码用于main_url实现翻页操作,一举两得
start_page=int(input("请输入爬取起始页码:"))
end_page=int(input("请输入爬取终止页码:"))for p inrange(start_page,end_page+1):#主页网址(主页网址有时候与子页网址有一点点不同,大多时候是相同只是往后加一点点东西)
main_url=r'http://desk.zol.com.cn/4'+'/{}.html'.format(p)
for m inrange(len(big_jpg_url_list)):file=requests.get(big_jpg_url_list[m],headers=headers).content #requests爬取的数据一定要看他的类型,不然无法获取
time.sleep(0.5)# 创建二级目录(文件名不属于二级目录)
path2=path1+'\图片{}.jpg'.format(str(m+1))withopen(path2,'wb')as fh:
fh.write(file)