python爬取豆瓣电影top250数据存入excel

# -*- coding:utf-8 -*-
"""获取时光影评电影"""
import requests
from bs4 import BeautifulSoup
from datetime import datetime,timedelta
import pymysql
#xlwt-1.3.0
#from xlwt.xlwt import *
from xlwt import *
#获取指定开始排行的电影url
def get_url(root_url,start):
    return root_url+"?start="+str(start)+"&"

def get_review(page_url):

    """获取电影相关的信息"""
    movies_list = []
    reponse = requests.get(page_url)
    soup =BeautifulSoup(reponse.text,'lxml')
    soup = soup.find("ol","grid_view")
    dict ={}
    for tag_li in soup.find_all("li"):
        dict = {}
        dict['rank'] = tag_li.find("em").string
        dict['title'] = tag_li.find_all("span","title")[0].string
        dict['score'] = tag_li.find("span","rating_num").string
        if tag_li.find("span","inq"):
            dict['desc'] =tag_li.find("span","inq").string
        else:
            dict['desc'] = '无评词'

        movies_list.append(dict)
    return movies_list

def save_excel(movies_list):
    keys =""
    w = Workbook()
    ws = w.add_sheet("movies")
    for i in movies_list:
        keys = list(i.keys())


    for i in range(len(keys)):
        ws.write(0,i,keys[i])

    for movies in range(len(movies_list)):

        for key,value in movies_list[movies].items():

            keys = list(movies_list[movies].keys())
            #找到key的index
            ws.write(movies+1,keys.index(key),value)

        w.save("movies.xls")


if __name__ == '__main__':

    root_url = "https://movie.douban.com/top250"
    start =0
    movies_list =get_review(get_url(root_url,start))
    save_excel(movies_list)

 

 

转载于:https://www.cnblogs.com/venvive/p/11349527.html

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值