代码如下:
from urllib import request
import pymysql
import re
url = 'http://maoyan.com/board/4'
def get_content(url):
with request.urlopen(url) as f:
content = f.read().decode('utf-8')
return content
def get_pattern(url):
content = get_content(url)
pattern = r'<p class="name"><a href="/films/.*" title=".*" data-act="boarditem-click" data-val="{movieId:.*}">(.*)</a></p>' \
r'\s*<p class="star">\s*(.*)\s*</p>' \
r'\s*<p class="releasetime">(.*)</p>'
return re.findall(pattern, content)
def create_url(url):
url_li = []
for i in range(10):
new_url = url+'?offset=%d' %(i*10)
i += 1
url_li.append(new_url)
return url_li
url_li = create_url(url)
for i in url_li:
conn = pymysql.connect(host='localhost',user='root',
passwd='redhat',db='movie',
charset='utf8')
cur = conn.cursor()
try :
# 执行的插入语句;
insert_sqil = 'insert into ifm values(%s,%s,%s);'
cur.executemany(insert_sqil,get_pattern(i))
except Exception as e:
print ("sql execute failed")
else:
print ('sql execute success')
# 4.提交sql语句,作用于数据库;
conn.commit()
# 5.先关闭游标
cur.close()
# 6.关闭数据库连接
conn.close()
实验结果如下:
******** success! **********