#会触发反爬 HttpError 418:
pattern = '<div class="name">(\w+)</div>'
import urllib.request
try:
web = urllib.request.urlopen('https://read.douban.com/provider/all').read()
except Exception as er:
print(er)
result = re.complie(pattern).findall((web))
print(result)
>>>HTTP Error 418:
用requests去做
import requests
import random
url = 'https://read.douban.com/provider/all'
#添加一些代理
user_list = ["Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.514.0 Safari/534.7",
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.601.0 Safari/534.14",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14",
"Mozilla/