python爬虫爬取猫眼电影TOP100

from urllib import request
import re
import json
import pymysql
import time
import cgi,cgitb

base_url = "http://maoyan.com/board/4?offset={}"
for i in range(0,10):
    i*=10
    url = base_url.format(i)
    head = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"}

    req = request.Request(url=url,headers=head)
    # print(req)
    res = request.urlopen(req)
    base_data=res.read().decode("utf-8")
    pattern = re.compile('<dd>(.*?)</dd>',re.S)
    items = pattern.findall(base_data)
    for item in items:
        #排行榜
        pattern1 = re.findall("board-index-.*?(\d{1,4})</i>",item,re.S)
        id=pattern1
        #名称
        pattern2 = re.search("title=\"(.*?)\"",item,re.S)
        name=pattern2.group(1)
        #主演
        pattern3 = re.findall("star\">([\w\W].*?)</p>",item,re.S)
        zhuyan=pattern3[0].strip()
        #上映时间
        pattern4 = re.findall("releasetime\">(.*?)</p>",item,re.S)
        time=pattern4[0].strip()
        #评分
        pattern5 = re.findall("integer\">(.*?)</i>.*?fraction\">(.*?)</i>",item,re.S)
        pingfen=pattern5[0][0]+pattern5[0][1]
        #封面
        pattern6=re.findall("data-src=\"(.*?)\"",item,re.S)
        tupian = pattern6[0]
        # a1 = {"排行榜":x1,"电影名":x2,"主演":x3,"上映时间":x4,"评分":x5}
        #
        # print(a1)
        #
        # #for i in a1:
        # with open("./maoyan.txt","a",encoding="utf-8")as f:
        #     f.write(json.dumps(a1,ensure_ascii=False)+"\n")
        #     time.sleep(1)
        db = pymysql.connect("192.168.0.154", "root", "123456", "maoyan")
        cursor = db.cursor()
        cursor.execute("insert into move values (null ,'{a}','{b}','{c}','{d}','{e}')".format(a=name, b=zhuyan, c=time, d=pingfen,e=tupian))
        db.commit()
# # print(base_data)
#         try:
#             cursor.execute(
#                 "insert into move values ('{a}','{b}','{c}','{d}','{e}')".format(a=name,b=zhuyan,c=time,d=pingfen,e=tupian))
#             db.commit()
#             print("成功")
#         except:
#             db.rollback()
#             print("失败")
        db.close()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值