豆瓣用户相册爬虫

#coding:utf-8
import requests
from pyquery import PyQuery as pq
import os
import random

def gethtml(url):
    try:
        page = requests.get(url,headers=headers,proxies=proxiess)
        htmlcode = page.content
        return htmlcode
    except:
        print("Error")
        return None

def getimgsrc(code):
    doc = pq(code)
    items = doc(".photolst .photo_wrap a img").items()
    imgsrc =[]
    for item in items:
        try:
            imgsrc.append(item.attr("src"))
        except:
            print("wancheng")
    return imgsrc

def get_img(imglist):
    global pagenum
    for imgs in imglist:
        os.chdir(path)
        with open("%s.jpg" %pagenum,'wb') as fb:
            img = gethtml(imgs)
            fb.write(img)
            print("正在下载第"+str(pagenum)+"张图片")
        pagenum+=1

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Host': 'meishi.meituan.com',
    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
    'Cache-Control': 'max-age=1Cache-Control: no-cache, no-store, private, must-revalidate, proxy-revalidate',
    'Referer':'http://hz.meituan.com/meishi/',
    'Upgrade-Insecure-Requests': 1,
    'Content-Type': 'text/html; charset=UTF-8'
}
proxies = [
"http://119.101.116.253:9999",
"http://119.101.112.97:9999",
"http://111.177.178.139:9999",
"http://111.177.171.78:9999",
"http://111.181.32.45:9999",
"http://115.203.99.9:9999",
"http://119.101.115.194:9999",
"http://110.52.234.64:9999",
"http://111.177.166.126:9999",
"http://119.39.238.55:9999",
]
proxiess = proxies[random.randint(1,10)]
print(proxiess)
path = "F:\douban"
pagenum = 0
    

try:
    while 1:
        url = "https://www.douban.com/photos/album/127882234/"+"?start="+str(pagenum)
        htmlcode = gethtml(url)
        imglist = getimgsrc(htmlcode)
        get_img(imglist)
except:
    print("完成")
        

 
 

转载于:https://my.oschina.net/u/4069811/blog/3003607

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
豆瓣电影的Python爬虫可以用来获取豆瓣高分电影的相关信息,并制作成图表进行展示。以下是一个简单的豆瓣电影Python爬虫的代码示例: ```python import requests import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties from fake_useragent import UserAgent def url_parse(): url = "https://movie.douban.com/j/search_subjects?type=movie&tag=豆瓣高分&sort=rank&page_limit=20&page_start=0" headers = {"User-Agent": UserAgent().random} response = requests.get(url=url, headers=headers).json() return response def content_parse(res): vedio_name = [] vedio_rate = [] content = res["subjects"] for i in content: name = i["title"] rate = i["rate"] vedio_name.append(name) vedio_rate.append(float(rate)) return vedio_name, vedio_rate def make_pic(name, rate): fig = plt.figure(figsize=(15, 8), dpi=80) font = FontProperties(fname=r"STZHONGS.TTF", size=12) plt.barh(name[::-1], rate[::-1], color="red") x_ = [i * 0.5 for i in range(1, 21)] plt.xticks(x_, fontproperties=font) plt.yticks(name, fontproperties=font) plt.savefig("豆瓣.png") plt.show() response = url_parse() vedio_name, vedio_rate = content_parse(response) make_pic(vedio_name, vedio_rate) ``` 这个爬虫的功能是通过发送HTTP请求获取豆瓣高分电影的数据,然后解析数据并提取电影的名称和评分,最后将这些数据制作成水平条形图进行展示。使用的库包括requests、matplotlib和fake_useragent。其中,requests用于发送HTTP请求,matplotlib用于制作图表,fake_useragent用于生成随机的User-Agent以模拟浏览器请求。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* *2* *3* [Python爬虫爬取豆瓣高分电影附源码(详细适合新手)](https://blog.csdn.net/gushuiwuqiu/article/details/117383666)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_1"}}] [.reference_item style="max-width: 100%"] [ .reference_list ]

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值