一、开头
汽车之家配置参数抓取最难的部分是部分页面用JS生成的,导致部分文字抓取不出来。而且该网站会经常改动混淆方式,用正则表达式处理费时费力不说,而且会经常需要改动。因此选择用JS解析器来处理。为了方便,这里选择用PyV8来处理。关键的样式拿出来了,后面都好说。先看结果。
.hs_kw0_baikeYA::before { content:"环保" } .hs_kw1_baikeYA::before { content:"适" } .hs_kw2_baikeYA::before { content:"摄像头" } .hs_kw3_baikeYA::before { content:"离地间隙" } .hs_kw4_baikeYA::before { content:"油箱" } .hs_kw5_baikeYA::before { content:"后桥" } .hs_kw6_baikeYA::before { content:"整备" } .hs_kw7_baikeYA::before { content:"转速" } .hs_kw8_baikeYA::before { content:"制动力分配" } .hs_kw9_baikeYA::before { content:"最大" } .hs_kw10_baikeYA::before { content:"气门数" } .hs_kw11_baikeYA::before { content:"车门数" } .hs_kw12_baikeYA::before { content:"差速锁" } .hs_kw13_baikeYA::before { content:"加热" } .hs_kw14_baikeYA::before { content:"前" } .hs_kw15_baikeYA::before { content:"整体" } .hs_kw16_baikeYA::before { content:"驻车" } .hs_kw17_baikeYA::before { content:"后悬架" } .hs_kw18_baikeYA::before { content:"排量" } .hs_kw19_baikeYA::before { content:"油耗" } .hs_kw20_baikeYA::before { content:"供油" } .hs_kw21_baikeYA::before { content:"配气" } .hs_kw22_baikeYA::before { content:"前轮距" } .hs_kw23_baikeYA::before { content:"宽度" } .hs_kw24_baikeYA::before { content:"成功" } .hs_kw25_baikeYA::before { content:"综合" } .hs_kw26_baikeYA::before { content:"天窗" } .hs_kw27_baikeYA::before { content:"悬架" } .hs_kw28_baikeYA::before { content:"行车电脑" } .hs_kw29_baikeYA::before { content:"缸盖" } .hs_kw30_baikeYA::before { content:"标准" } .hs_kw31_baikeYA::before { content:"限滑" } .hs_kw32_baikeYA::before { content:"放倒" } .hs_kw33_baikeYA::before { content:"前制动器" } .hs_kw34_baikeYA::before { content:"中央" } .hs_kw35_baikeYA::before { content:"备胎" } .hs_kw36_baikeYA::before { content:"电子" } .hs_kw37_baikeYA::before { content:"功率" } .hs_kw38_baikeYA::before { content:"合金" } .hs_kw39_baikeYA::before { content:"排列" } .hs_kw40_baikeYA::before { content:"调节" } .hs_kw41_baikeYA::before { content:"风" } .hs_kw42_baikeYA::before { content:"接口" } .hs_kw43_baikeYA::before { content:"空气" } .hs_kw44_baikeYA::before { content:"前悬架" } .hs_kw45_baikeYA::before { content:"高度" } .hs_kw46_baikeYA::before { content:"铝" } .hs_kw47_baikeYA::before { content:"后轮胎" } .hs_kw48_baikeYA::before { content:"仪表盘" } .hs_kw49_baikeYA::before { content:"规格" } .hs_kw50_baikeYA::before { content:"前排" } .hs_kw51_baikeYA::before { content:"音源" } .hs_kw52_baikeYA::before { content:"价" } .hs_kw53_baikeYA::before { content:"轴距" } .hs_kw54_baikeYA::before { content:"并线" } .hs_kw55_baikeYA::before { content:"指" } .hs_kw56_baikeYA::before { content:"蓝牙" } .hs_kw57_baikeYA::before { content:"扭矩" } .hs_kw58_baikeYA::before { content:"缸体" } .hs_kw59_baikeYA::before { content:"长度" } .hs_kw60_baikeYA::before { content:"氙气" } .hs_kw61_baikeYA::before { content:"助力" } .hs_kw62_baikeYA::before { content:"行程" } .hs_kw63_baikeYA::before { content:"气囊" } .hs_kw64_baikeYA::before { content:"容量" } .hs_kw65_baikeYA::before { content:"元" } .hs_kw66_baikeYA::before { content:"缸径" } .hs_kw67_baikeYA::before { content:"外接" } .hs_kw68_baikeYA::before { content:"商" } .hs_kw69_baikeYA::before { content:"电话" } .hs_kw70_baikeYA::before { content:"喇叭" } .hs_kw71_baikeYA::before { content:"后排" } .hs_kw72_baikeYA::before { content:"支撑" } .hs_kw73_baikeYA::before { content:"独立" } .hs_kw74_baikeYA::before { content:"全液晶" } .hs_kw75_baikeYA::before { content:"燃油" } .hs_kw76_baikeYA::before { content:"容积" } .hs_kw77_baikeYA::before { content:"真皮" } .hs_kw78_baikeYA::before { content:"无钥匙" } .hs_kw79_baikeYA::before { content:"实测" } .hs_kw80_baikeYA::before { content:"牵引力控制" } .hs_kw81_baikeYA::before { content:"前轮胎" } .hs_kw82_baikeYA::before { content:"座椅移动" } .hs_kw83_baikeYA::before { content:"预警" } .hs_kw84_baikeYA::before { content:"影像" } .hs_kw85_baikeYA::before { content:"儿童座椅" } .hs_kw86_baikeYA::before { content:"机构" } .hs_kw87_baikeYA::before { content:"进气" } .hs_kw88_baikeYA::before { content:"名称" } .hs_kw89_baikeYA::before { content:"扬声器" } .hs_kw90_baikeYA::before { content:"视频" } .hs_kw91_baikeYA::before { content:"质保" } .hs_kw92_baikeYA::before { content:"气缸" } .hs_kw93_baikeYA::before { content:"驾驶" } .hs_kw94_baikeYA::before { content:"前桥" } .hs_kw95_baikeYA::before { content:"质量" } .hs_kw96_baikeYA::before { content:"主动" } .hs_kw97_baikeYA::before { content:"电池" } .hs_kw98_baikeYA::before { content:"稳定" } .hs_kw99_baikeYA::before { content:"材质" } .hs_kw100_baikeYA::before { content:"后制动器" } .hs_kw101_baikeYA::before { content:"压缩比" } .hs_kw102_baikeYA::before { content:"单碟" } .hs_kw103_baikeYA::before { content:"差速器" } .hs_kw104_baikeYA::before { content:"通风" } .hs_kw105_baikeYA::before { content:"后轮距" } .hs_kw106_baikeYA::before { content:"号" } .hs_kw107_baikeYA::before { content:"导" } .hs_kw0_configYf::before { content:"后驱" } .hs_kw1_configYf::before { content:"车门数" } .hs_kw2_configYf::before { content:"驻车" } .hs_kw3_configYf::before { content:"后悬架" } .hs_kw4_configYf::before { content:"多片" } .hs_kw5_configYf::before { content:"排量" } .hs_kw6_configYf::before { content:"承载式" } .hs_kw7_configYf::before { content:"供油" } .hs_kw8_configYf::before { content:"配气" } .hs_kw9_configYf::before { content:"综合" } .hs_kw10_configYf::before { content:"悬架" } .hs_kw11_configYf::before { content:"多连杆" } .hs_kw12_configYf::before { content:"中央" } .hs_kw13_configYf::before { content:"双叉臂式" } .hs_kw14_configYf::before { content:"备胎" } .hs_kw15_configYf::before { content:"电子" } .hs_kw16_configYf::before { content:"功率" } .hs_kw17_configYf::before { content:"排列" } .hs_kw18_configYf::before { content:"铝" } .hs_kw19_configYf::before { content:"轴距" } .hs_kw20_configYf::before { content:"长度" } .hs_kw21_configYf::before { content:"助力" } .hs_kw22_configYf::before { content:"元" } .hs_kw23_configYf::before { content:"商" } .hs_kw24_configYf::before { content:"直喷" } .hs_kw25_configYf::before { content:"独立" } .hs_kw26_configYf::before { content:"容积" } .hs_kw27_configYf::before { content:"实测" } .hs_kw28_configYf::before { content:"气缸" } .hs_kw29_configYf::before { content:"质量" } .hs_kw30_configYf::before { content:"后制动器" } .hs_kw31_configYf::before { content:"涡轮" } .hs_kw32_configYf::before { content:"差速器" } .hs_kw33_configYf::before { content:"后轮距" } .hs_kw34_configYf::before { content:"大型车" } .hs_kw35_configYf::before { content:"环保" } .hs_kw36_configYf::before { content:"万" } .hs_kw37_configYf::before { content:"离地间隙" } .hs_kw38_configYf::before { content:"油箱" } .hs_kw39_configYf::before { content:"整备" } .hs_kw40_configYf::before { content:"转速" } .hs_kw41_configYf::before { content:"年或" } .hs_kw42_configYf::before { content:"最大" } .hs_kw43_configYf::before { content:"气门数" } .hs_kw44_configYf::before { content:"版" } .hs_kw45_configYf::before { content:"宝马" } .hs_kw46_configYf::before { content:"油耗" } .hs_kw47_configYf::before { content:"前轮距" } .hs_kw48_configYf::before { content:"宽度" } .hs_kw49_configYf::before { content:"成功" } .hs_kw50_configYf::before { content:"缸盖" } .hs_kw51_configYf::before { content:"标准" } .hs_kw52_configYf::before { content:"前制动器" } .hs_kw53_configYf::before { content:"增压" } .hs_kw54_configYf::before { content:"时间" } .hs_kw55_configYf::before { content:"前置" } .hs_kw56_configYf::before { content:"前悬架" } .hs_kw57_configYf::before { content:"高度" } .hs_kw58_configYf::before { content:"后轮胎" } .hs_kw59_configYf::before { content:"规格" } .hs_kw60_configYf::before { content:"价" } .hs_kw61_configYf::before { content:"指" } .hs_kw62_configYf::before { content:"扭矩" } .hs_kw63_configYf::before { content:"缸体" } .hs_kw64_configYf::before { content:"欧" } .hs_kw65_configYf::before { content:"行程" } .hs_kw66_configYf::before { content:"盘式" } .hs_kw67_configYf::before { content:"缸径" } .hs_kw68_configYf::before { content:"华" } .hs_kw69_configYf::before { content:"燃油" } .hs_kw70_configYf::before { content:"前轮胎" } .hs_kw71_configYf::before { content:"进口" } .hs_kw72_configYf::before { content:"机构" } .hs_kw73_configYf::before { content:"进气" } .hs_kw74_configYf::before { content:"离合器" } .hs_kw75_configYf::before { content:"名称" } .hs_kw76_configYf::before { content:"质保" } .hs_kw77_configYf::before { content:"压缩比" } .hs_kw78_configYf::before { content:"通风" } .hs_kw79_configYf::before { content:"号" } .hs_kw80_configYf::before { content:"导" } .hs_kw0_optionsy::before { content:"适" } .hs_kw1_optionsy::before { content:"摄像头" } .hs_kw2_optionsy::before { content:"后桥" } .hs_kw3_optionsy::before { content:"电磁" } .hs_kw4_optionsy::before { content:"制动力分配" } .hs_kw5_optionsy::before { content:"差速锁" } .hs_kw6_optionsy::before { content:"加热" } .hs_kw7_optionsy::before { content:"前" } .hs_kw8_optionsy::before { content:"整体" } .hs_kw9_optionsy::before { content:"驻车" } .hs_kw10_optionsy::before { content:"成功" } .hs_kw11_optionsy::before { content:"天窗" } .hs_kw12_optionsy::before { content:"悬架" } .hs_kw13_optionsy::before { content:"行车电脑" } .hs_kw14_optionsy::before { content:"限滑" } .hs_kw15_optionsy::before { content:"放倒" } .hs_kw16_optionsy::before { content:"充电" } .hs_kw17_optionsy::before { content:"中央" } .hs_kw18_optionsy::before { content:"电子" } .hs_kw19_optionsy::before { content:"合金" } .hs_kw20_optionsy::before { content:"调节" } .hs_kw21_optionsy::before { content:"风" } .hs_kw22_optionsy::before { content:"接口" } .hs_kw23_optionsy::before { content:"空气" } .hs_kw24_optionsy::before { content:"铝" } .hs_kw25_optionsy::before { content:"高度" } .hs_kw26_optionsy::before { content:"仪表盘" } .hs_kw27_optionsy::before { content:"音源" } .hs_kw28_optionsy::before { content:"并线" } .hs_kw29_optionsy::before { content:"远光灯" } .hs_kw30_optionsy::before { content:"蓝牙" } .hs_kw31_optionsy::before { content:"气囊" } .hs_kw32_optionsy::before { content:"外接" } .hs_kw33_optionsy::before { content:"电话" } .hs_kw34_optionsy::before { content:"升" } .hs_kw35_optionsy::before { content:"上下" } .hs_kw36_optionsy::before { content:"喇叭" } .hs_kw37_optionsy::before { content:"后排" } .hs_kw38_optionsy::before { content:"支撑" } .hs_kw39_optionsy::before { content:"华" } .hs_kw40_optionsy::before { content:"独立" } .hs_kw41_optionsy::before { content:"全液晶" } .hs_kw42_optionsy::before { content:"真皮" } .hs_kw43_optionsy::before { content:"无钥匙" } .hs_kw44_optionsy::before { content:"牵引力控制" } .hs_kw45_optionsy::before { content:"前后" } .hs_kw46_optionsy::before { content:"座椅移动" } .hs_kw47_optionsy::before { content:"预警" } .hs_kw48_optionsy::before { content:"影像" } .hs_kw49_optionsy::before { content:"儿童座椅" } .hs_kw50_optionsy::before { content:"扬声器" } .hs_kw51_optionsy::before { content:"视频" } .hs_kw52_optionsy::before { content:"驾驶" } .hs_kw53_optionsy::before { content:"前桥" } .hs_kw54_optionsy::before { content:"主动" } .hs_kw55_optionsy::before { content:"稳定" } .hs_kw56_optionsy::before { content:"选装" } .hs_kw57_optionsy::before { content:"材质" } .hs_kw58_optionsy::before { content:"单碟" } .hs_kw59_optionsy::before { content:"差速器" } .hs_kw60_optionsy::before { content:"通风" } .hs_kw61_optionsy::before { content:"近光灯" } .hs_kw62_optionsy::before { content:"导" }
二、环境
1、requests
pip install requests
2、PyV8
pip install PyV8
上面这种安装方式,我在我的windows系统电脑上是没安装成功的,于是去官网看了一下。PyV8只看到了Python2.X的版本,Python3.X的用不了,自己到官网下载,下载地址:http://code.google.com/p/pyv8/downloads/list。我安装的是Python2.7 64位的,因此安装的也是64位的PyV8。
三、解题思路
主要思路是先找到那段压缩的跟缺失文字有关JS,然后找到关键的和添加规则有关的方法,可以通过在里面加入console.log(xxx)来查看控制台的输出辅助找到关键的方法。找到后把这些js直接用PyV8执行会报错,需要自己添加一些代码,修正错误即可。代码如下:
#coding=utf-8 import re import PyV8 import logging import requests def clscontent(alljs): try: ctx = PyV8.JSContext() ctx.enter() ctx.eval(alljs) return ctx.eval('rules') except: logging.exception('clscontent function exception') return None def makejs(html): try: alljs = ("var rules = '';" "var document = {};" "document.createElement = function() {" " return {" " sheet: {" " insertRule: function(rule, i) {" " if (rules.length == 0) {" " rules = rule;" " } else {" " rules = rules + '#' + rule;" " }" " }" " }" " }" "};" "document.querySelectorAll = function() {" " return {};" "};" "document.head = {};" "document.head.appendChild = function() {};" "var window = {};" "window.decodeURIComponent = decodeURIComponent;") js = re.findall('(\(function\([a-zA-Z]{2}.*?_\).*?\(document\);)', html) for item in js: alljs = alljs + item return alljs except: logging.exception('makejs function exception') return None def main(index): try: req = requests.get('https://car.autohome.com.cn/config/series/%d.html' % index) alljs = makejs(req.text) if(alljs == None): print('makejs error') return result = clscontent(alljs) if(result == None): print('clscontent error') return for item in result.split('#'): print(item) except: logging('main function exception') if __name__ == '__main__': main(153)
四、后话
解这些东西需要较强的JS基本功。本文章仅供学习参考,请勿用于商业用途!