Python-使用BeautifulSoup爬取豆瓣TOP250电影

最新推荐文章于 2024-05-01 03:47:40 发布

a1394916730

最新推荐文章于 2024-05-01 03:47:40 发布

阅读量1.7k

点赞数 1

分类专栏： Python 文章标签： python

本文链接：https://blog.csdn.net/a1394916730/article/details/104862462

版权

Python 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

# _*_ coding:utf-8 _*_
"""
file_name:py_movie
author:Sam
"""
from urllib import request
from bs4 import BeautifulSoup


def main(request_url=""):
    # 设置头部信息
    request_url = request.Request(request_url)
    request_url.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36')
    # 请求地址并转码为utf-8
    res = request.urlopen(request_url)
    html = res.read()
    html = html.decode('utf-8')

    print_data = []
    # 用BeautifulSoup获取信息
    soup = BeautifulSoup(html, "html.parser")
    # print(soup)
    for item in soup.find_all('div', class_="item"):
        item_data = {}
        # 排名
        item_data['rank'] = item.find('em').get_text()
        # 播放地址
        item_data['play_url'] = item.find('a').get('href')
        # 主图
        item_data['img'] = item.find('img').get("src")
        # 标题
        item_data['title'] = item.find('span', class_='title').get_text()
        # 其他
        item_data['other'] = item.find('span', class_='other').get_text()
        # 评分
        item_data['grade'] = item.find('span', class_='rating_num').get_text()
        # 评论人数
        people = item.find('div', class_='star')
        item_data['people'] = people.contents[7].get_text()
        # 评语 本来不打算写判断的，刚好排名246的那个刚好没有这个评语，报错了
        if item.find('span', class_='inq') is not None:
            item_data['inq'] = item.find('span', class_='inq').get_text()
        else:
            item_data['inq'] = '暂无'
        # info
        info = item.find('div', class_='bd')
        item_data['info'] = info.find('p').contents[0].strip()
        item_data['info'] += "  年份产地："
        item_data['info'] += info.find('p').contents[2].strip()
        print_data.append(item_data)
        # print(print_data)

    fpath = 'd:/project/Python/demo/test_py_movie.txt'

    write_data = ""
    for item in print_data:
        write_data += "[排名]:"+item['rank']
        write_data += "\n"
        write_data += "[标题]:" + item['title']
        write_data += "\n"
        write_data += "[其他]:"+item['other']
        write_data += "\n"
        write_data += "[评语]:"+item['inq']
        write_data += "\n"
        write_data += "[详情]:"+item['info']
        write_data += "\n"
        write_data += "[评分]:"+item['grade']
        write_data += "\n"
        write_data += "[评分人数]:"+item['people']
        write_data += "\n"
        write_data += "[主图]:"+item['img']
        write_data += "\n"
        write_data += "[播放路径]:"+item['play_url']
        write_data += "\n"
        write_data += "\n"
        write_data += "**********************************************************************************************************************"
        write_data += "\n"
        write_data += "\n"

    with open(fpath, 'a+', encoding='utf-8') as f:
        f.write(write_data)
        f.close()


if __name__ == '__main__':
    url = "https://movie.douban.com/top250?start="
    for n in range(10):
        start = n*25
        # print(start)
        main(url+str(start))

a1394916730

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
Python-使用BeautifulSoup爬取豆瓣TOP250电影

# _*_ coding:utf-8 _*_"""file_name:py_movieauthor:Sam"""from urllib import requestfrom bs4 import BeautifulSoupdef main(request_url=""): # 设置头部信息 request_url = request.Request(request...
复制链接

扫一扫