python3
import urllib.request
from bs4 import BeautifulSoup
url=’http://zzk.cnblogs.com/s/blogpost?Keywords=python’
定义一个字典 作为header
send_header={
‘Accept’:’text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8’,
‘Accept-Encoding’:’gzip, deflate, sdch’,
‘Accept-Language’:’zh-CN,zh;q=0.8’,
‘Connection’:’keep-alive’,
‘Cookie’:’__utma=59123430.1728851006.1499049847.1501827480.1501827480.1; __utmc=59123430; __utmz=59123430.1501827480.1.1.utmcsr=cnblogs.com|utmccn=(referral)|utmcmd=referral|utmcct=/; _ga=GA1.2.1728851006.1499049847; _gid=GA1.2.1963339392.1501827475; _gat=1’,
‘Host’:’zzk.cnblogs.com’,
‘Referer’:’http://zzk.cnblogs.com/s?t=b&w=python‘,
‘Upgrade-Insecure-Requests’:’1’,
‘User-Agent’:’Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36’
}
将heaer同步到url上
req=urllib.request.Request(url,headers=send_header)
发起请求
r=urllib.request.urlopen(req)
接收结果
s=r.read()
BeautifulSoup解析
soup=BeautifulSoup(s,’html.parser’)
print(soup.title)