import re
import urllib.request
url="https://blog.csdn.net/"
#伪装成浏览器User-Agent Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/61.0
headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/61.0")
opener=urllib.request.build_opener()
opener.addheader=[headers]
#将opener对象安装为全局
urllib.request.install_opener(opener)
data=urllib.request.urlopen(url).read()
data=data.decode("utf-8","ignore")
print(len(data))
#设置正则
pat='