ajax请求下,动态页面数据的抓取,主要是构建post请求头,模拟请求
import requests
from lxml import etree
import json
import sys
class Lagou(object):
def __init__(self):
self.headers = {
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Cookie': 'user_trace_token=20170920183457-58cc73d5-9def-11e7-9c29-525400f775ce; LGUID=20170920183457-58cc7899-9def-11e7-9c29-525400f775ce; index_location_city=%E5%85%A8%E5%9B%BD; TG-TRACK-CODE=search_code; _gid=GA1.2.1674249864.1506152972; _ga=GA1.2.661938952.1505903691; LGRID=20170924124247-cffed120-a0e2-11e7-9278-5254005c3644; JSESSIONID=ABAAABAACDBABJB5CBA63393ECA49354BFB77C6B0BD0B5B; SEARCH_ID=4b82b54b5eab451392d2f73823a14a00',
'Host': 'www.lagou.com',