python爬取猫眼top100电影简介并存储在数据库中

代码如下:

 

from urllib import request
import pymysql
import re

url = 'http://maoyan.com/board/4'


def get_content(url):
    with request.urlopen(url) as f:
        content = f.read().decode('utf-8')
        return content

def get_pattern(url):
    content = get_content(url)
    pattern = r'<p class="name"><a href="/films/.*" title=".*" data-act="boarditem-click" data-val="{movieId:.*}">(.*)</a></p>' \
              r'\s*<p class="star">\s*(.*)\s*</p>' \
              r'\s*<p class="releasetime">(.*)</p>'
    return re.findall(pattern, content)


def create_url(url):

    url_li = []
    for i in range(10):
        new_url = url+'?offset=%d' %(i*10)
        i += 1
        url_li.append(new_url)
    return url_li


url_li = create_url(url)


for i in url_li:


    conn = pymysql.connect(host='localhost',user='root',
                           passwd='redhat',db='movie',
                           charset='utf8')

    cur = conn.cursor()

    try :
        # 执行的插入语句;
        insert_sqil = 'insert into ifm values(%s,%s,%s);'

        cur.executemany(insert_sqil,get_pattern(i))


    except Exception as e:

        print ("sql execute failed")

    else:
        print ('sql execute success')

    # 4.提交sql语句,作用于数据库;
    conn.commit()

    # 5.先关闭游标
    cur.close()

    # 6.关闭数据库连接
    conn.close()

实验结果如下:

 

********         success!    **********

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值