豆瓣用户相册爬虫

最新推荐文章于 2020-08-24 16:39:33 发布

chuozhuan5581

最新推荐文章于 2020-08-24 16:39:33 发布

阅读量211

点赞数

文章标签：爬虫 python

原文链接：https://my.oschina.net/u/4069811/blog/3003607

版权

#coding:utf-8
import requests
from pyquery import PyQuery as pq
import os
import random

def gethtml(url):
try:
page = requests.get(url,headers=headers,proxies=proxiess)
htmlcode = page.content
return htmlcode
except:
print("Error")
return None

def getimgsrc(code):
doc = pq(code)
items = doc(".photolst .photo_wrap a img").items()
imgsrc =[]
for item in items:
try:
imgsrc.append(item.attr("src"))
except:
print("wancheng")
return imgsrc

def get_img(imglist):
global pagenum
for imgs in imglist:
os.chdir(path)
with open("%s.jpg" %pagenum,'wb') as fb:
img = gethtml(imgs)
fb.write(img)
print("正在下载第"+str(pagenum)+"张图片")
pagenum+=1

headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Host': 'meishi.meituan.com',
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
'Cache-Control': 'max-age=1Cache-Control: no-cache, no-store, private, must-revalidate, proxy-revalidate',
'Referer':'http://hz.meituan.com/meishi/',
'Upgrade-Insecure-Requests': 1,
'Content-Type': 'text/html; charset=UTF-8'
}
proxies = [
"http://119.101.116.253:9999",
"http://119.101.112.97:9999",
"http://111.177.178.139:9999",
"http://111.177.171.78:9999",
"http://111.181.32.45:9999",
"http://115.203.99.9:9999",
"http://119.101.115.194:9999",
"http://110.52.234.64:9999",
"http://111.177.166.126:9999",
"http://119.39.238.55:9999",
]
proxiess = proxies[random.randint(1,10)]
print(proxiess)
path = "F:\douban"
pagenum = 0

try:
while 1:
url = "https://www.douban.com/photos/album/127882234/"+"?start="+str(pagenum)
htmlcode = gethtml(url)
imglist = getimgsrc(htmlcode)
get_img(imglist)
except:
print("完成")

转载于:https://my.oschina.net/u/4069811/blog/3003607

chuozhuan5581

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
豆瓣用户相册爬虫

#coding:utf-8 import requests from pyquery import PyQuery as pq import os import random def gethtml(url): try: page = requests.get(url...
复制链接

扫一扫