爬取豆瓣小王子下的短评(初级版)
#coding = 'utf-8'
import re
import requests
page = 'https://book.douban.com/subject/1084336/comments/hot?p=1'
url = requests.get(page).text
#正则表达式
p1 = '(?<=<span class="short">).+?(?=</span>)'
pattern1 = re.compile(p1)
items = pattern1.findall(url)
#保存到本地
path = 'd:/open.txt'
with open(path, 'w', encoding='utf-8') as f:
for item in items:
print (item)
f.write(item)