**
爬虫入门:第一个小项目爬取豆瓣官网的出版社名称
**
from urllib.request import urlopen, Request
import re
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
url="https://read.douban.com/provider/all"
ret = Request(url, headers=headers)
res = urlopen(ret)
aa = res.read().decode('utf-8')
pat='<div class="name">(.*?)</div>'
rst=re.compile(pat).findall(aa)
for i in rst:
print(i)