使用PyQuery去实现抓取
from pyquery import PyQuery
import requests
def req(url):
response = requests.get(url)
return response.content.decode('utf-8')
# 使用这个openser可以自定义用requests模块还是urllib模块去请求网址
doc = PyQuery(url='http://www.baidu.com',opener=req)
print(doc)
当然还可以自己设置一些请求头
from pyquery import PyQuery
headers = {
'User-Agent': 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87',
'Referer': ''
}
doc = PyQuery('http://www.baidu.com',encodeing='utf-8',headers=headers)