第一次写Python,备忘用,写的不完善大家见笑了
# -*- coding:utf-8 -*-
import urllib
import urllib2
import re
import xlwt
book=xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet=book.add_sheet('movie_review',cell_overwrite_ok=True)
sheet.write(0, 0, '标题')
sheet.write(0, 1, '影评人')
sheet.write(0, 2, '电影')
sheet.write(0, 3, '星级')
sheet.write(0, 4, '时间')
sheet.write(0, 5, '内容')
baseurl='https://movie.douban.com/review/best/?start='
for i in range(0,3):
url_list=baseurl+str(i*20)
request_url = urllib2.Request(url_list)
response_url = urllib2.urlopen(request_url)
html_url = response_url.read().decode('utf-8')
pattern_url = re.compile('<h3 class="title">.*?<a href="(.*?)/"', re.S)
url_thispage =