1、直接从JavaScript中采集加载的数据
import requests
import urllib.parse
from lxml import etree
header={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'
}
def getList(url):
req = requests.get(url, headers=header)
req.encoding="utf-8"
html = etree.HTML(source)
xpath = html.xpath('//ul[@class="gl-warp clearfix"]/li')
for i in xpath:
a=i.xpath("div/div[4]/a/em/text()")
b=i.xpath("div/div[3]/strong/i/text()")
if __name__ == '__main__':
label = "手机"
label = urllib.parse.quote(label)
url = "https://search.jd.com/Search?keyword={}&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq={}&cid2=653&cid3=655&page={}&s=1