python-猫眼电影爬虫

爬取猫眼电影TOP100(http://maoyan.com/board/4?offset=90)
1). 爬取内容: 电影名称,主演, 上映时间,图片url地址保存到mariadb数据库中;
2). 所有的图片保存到本地/mnt/maoyan/电影名.png


import re
from urllib.request import urlopen
from urllib import request

import pymysql


def getmovies():

    for i in range(10):
        url = 'http://maoyan.com/board/4?offset=%d' %(i*10)
        user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0'
        req = request.Request(url, headers={'User-Agent':user_agent})
        content = urlopen(req).read().decode('utf-8')
        print("正在爬取地址")
        pattern = r'<img data-src="(?P<picture>.+)" alt="(?P<name>[\u4e00-\u9fa5]+)" class="board-img" />'
        movies = re.findall(pattern,content)
        print(movies)
        pattern2 = r'<p class="star">\s*(.+)\s*</p>'
        star = re.findall(pattern2,content)
        print(star)
        pattern3 = r'<p class="releasetime">(.+)</p>'
        time = re.findall(pattern3,content)
        print(time)
        conn = pymysql.connect(user='root',
                               password='971203', charset='utf8', autocommit=True)
        cur = conn.cursor()
        conn.select_db('bank')
        # create_sql = 'create table movies2 (电影名字 varchar(60) not null , 主演 varchar(200) not null , 上映时间 varchar(50) not null , 图片url varchar(200) not null );'
        # cur.execute(create_sql)
        num = len(movies)
        for i in range(num):
            insert_sqli1 = 'insert into movies2 (电影名字,主演,上映时间,图片url) VALUES ("%s","%s","%s","%s");' % (movies[i][1],star[i],time[i],movies[i][0])
            cur.execute(insert_sqli1)
        cur.close()
        conn.close()
        for i in movies:
            url = i[0]
            content = urlopen(url).read()
            with open('movies/%s.jpg' %(i[1]), 'wb+') as f:
                f.write(content)

getmovies()

 
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值