Python使用(二)-----爬虫

参考网址:Python3网络爬虫快速入门实战解析_Jack-Cui-CSDN博客_python爬虫实例教程

# -*- coding:UTF-8 -*-
# 下载网络上的图片到本地
import requests
import json
import re
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter

if __name__ == '__main__':
	params = {}
	headers={
	"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
	"Accept-Encoding":"gzip, deflate",
	"Accept-Language":"zh-CN,zh;q=0.9",
	"Connection":"keep-alive",
	"Cookie":"__guid=45238818.1145742547842433700.1540392751906.5432; UM_distinctid=166a69146f6ed-0ae6a89a710775-3c604504-100200-166a69146fa61; CNZZDATA1262204355=1316053633-1540392329-%7C1540649295; Hm_lvt_a476a5ad5ad147e6b24a01c84b92ebc5=1540392831,1540401440,1540653238; Hm_lpvt_a476a5ad5ad147e6b24a01c84b92ebc5=1540653755; monitor_count=15",
	"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
	"Referer":"http://www.kanmanhua.me/manhua-67231/277554_123.html",
	"Host":"www.kanmanhua.me"
	}
	headers2={
	"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
	"Accept-Encoding":"gzip, deflate",
	"Accept-Language":"zh-CN,zh;q=0.9",
	"Connection":"keep-alive",
	"Cookie":"__guid=45238818.1145742547842433700.1540392751906.5432; UM_distinctid=166a69146f6ed-0ae6a89a710775-3c604504-100200-166a69146fa61; CNZZDATA1262204355=1316053633-1540392329-%7C1540649295; Hm_lvt_a476a5ad5ad147e6b24a01c84b92ebc5=1540392831,1540401440,1540653238; Hm_lpvt_a476a5ad5ad147e6b24a01c84b92ebc5=1540653755; monitor_count=15",
	"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
	"Host":"manhua-me.oss-cn-hongkong.aliyuncs.com"
	}
	data={}
	cookies={}
	proxies = {}



	# 读取json数据
	# result3 = json.loads(r.text)
	# result4 = json.dumps(result3,indent=4,ensure_ascii=False)
	# 读文件
	# with open('E:/program/Python/learn/carton/sample.html', 'r',encoding='utf-8') as f:
	# 	content = f.read()
	# 	bf = BeautifulSoup(content)
	# 	texts = bf.find_all('img', class_ = 'img-responsive')
	# 	print(texts[1].get('data-original'))
	# 正则
	# for item in result3['data']:
	# 	if len(item):
	# 		print(item['thumbURL'])
	# 		fileName = re.search('[0-9]*[^\,]*jpg',item['thumbURL']).group()
	# 		imgr = requests.get(item['thumbURL'],headers=imgheaders)
	# 		print(imgr.status_code)


	s = requests.Session()
	s.mount('http://', HTTPAdapter(max_retries=3))
	s.mount('https://', HTTPAdapter(max_retries=3))

	count = 268
	url = "http://www.kanmanhua.me/manhua-67231/277554_"
	catalog = "ZERO"

	for i in range(100,count):
		print(i)
		response = s.get(url+str(i+1)+'.html',params=params,headers=headers,timeout=3)
		bf = BeautifulSoup(response.text)
		imgEles = bf.find_all('img', class_ = 'img-responsive')
		needHref = imgEles[1].get('data-original')
		response2 = s.get(needHref,params=params,headers=headers2,timeout=3)
		with open("F:/img2/"+catalog+"/img"+str(i+1)+".jpg", 'ab+') as f:
			                for chunk in response2.iter_content(chunk_size = 1024):
			                    if chunk:
			                        f.write(chunk)
			                        f.flush()

	
			


	

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值