import urllib.request
import re
import xlwt
data=urllib.request.urlopen("https://read.douban.com/provider/all").read().decode()
pat='<div class="name">[\u4e00-\u9fa5].*?</div>'
new_data=re.compile(pat).findall(data)
book=xlwt.Workbook(encoding="utf-8")
sht=book.add_sheet("publisher",cell_overwrite_ok=True)
pat1="<.*>(.*?)<.*>"
line=0
for d in new_data:
temp=re.compile(pat1).findall(d)
txt="".join(temp)
sht.write(line,0,line+1)
sht.write(line,1,txt)
line=line+1
book.save("d:\data.xls")
转载于:https://www.cnblogs.com/xiesongyou/p/7849599.html