汽车之家配置参数抓取

一、开头

汽车之家配置参数抓取最难的部分是部分页面用JS生成的,导致部分文字抓取不出来。而且该网站会经常改动混淆方式,用正则表达式处理费时费力不说,而且会经常需要改动。因此选择用JS解析器来处理。为了方便,这里选择用PyV8来处理。关键的样式拿出来了,后面都好说。先看结果。

.hs_kw0_baikeYA::before { content:"环保" }
.hs_kw1_baikeYA::before { content:"适" }
.hs_kw2_baikeYA::before { content:"摄像头" }
.hs_kw3_baikeYA::before { content:"离地间隙" }
.hs_kw4_baikeYA::before { content:"油箱" }
.hs_kw5_baikeYA::before { content:"后桥" }
.hs_kw6_baikeYA::before { content:"整备" }
.hs_kw7_baikeYA::before { content:"转速" }
.hs_kw8_baikeYA::before { content:"制动力分配" }
.hs_kw9_baikeYA::before { content:"最大" }
.hs_kw10_baikeYA::before { content:"气门数" }
.hs_kw11_baikeYA::before { content:"车门数" }
.hs_kw12_baikeYA::before { content:"差速锁" }
.hs_kw13_baikeYA::before { content:"加热" }
.hs_kw14_baikeYA::before { content:"前" }
.hs_kw15_baikeYA::before { content:"整体" }
.hs_kw16_baikeYA::before { content:"驻车" }
.hs_kw17_baikeYA::before { content:"后悬架" }
.hs_kw18_baikeYA::before { content:"排量" }
.hs_kw19_baikeYA::before { content:"油耗" }
.hs_kw20_baikeYA::before { content:"供油" }
.hs_kw21_baikeYA::before { content:"配气" }
.hs_kw22_baikeYA::before { content:"前轮距" }
.hs_kw23_baikeYA::before { content:"宽度" }
.hs_kw24_baikeYA::before { content:"成功" }
.hs_kw25_baikeYA::before { content:"综合" }
.hs_kw26_baikeYA::before { content:"天窗" }
.hs_kw27_baikeYA::before { content:"悬架" }
.hs_kw28_baikeYA::before { content:"行车电脑" }
.hs_kw29_baikeYA::before { content:"缸盖" }
.hs_kw30_baikeYA::before { content:"标准" }
.hs_kw31_baikeYA::before { content:"限滑" }
.hs_kw32_baikeYA::before { content:"放倒" }
.hs_kw33_baikeYA::before { content:"前制动器" }
.hs_kw34_baikeYA::before { content:"中央" }
.hs_kw35_baikeYA::before { content:"备胎" }
.hs_kw36_baikeYA::before { content:"电子" }
.hs_kw37_baikeYA::before { content:"功率" }
.hs_kw38_baikeYA::before { content:"合金" }
.hs_kw39_baikeYA::before { content:"排列" }
.hs_kw40_baikeYA::before { content:"调节" }
.hs_kw41_baikeYA::before { content:"风" }
.hs_kw42_baikeYA::before { content:"接口" }
.hs_kw43_baikeYA::before { content:"空气" }
.hs_kw44_baikeYA::before { content:"前悬架" }
.hs_kw45_baikeYA::before { content:"高度" }
.hs_kw46_baikeYA::before { content:"铝" }
.hs_kw47_baikeYA::before { content:"后轮胎" }
.hs_kw48_baikeYA::before { content:"仪表盘" }
.hs_kw49_baikeYA::before { content:"规格" }
.hs_kw50_baikeYA::before { content:"前排" }
.hs_kw51_baikeYA::before { content:"音源" }
.hs_kw52_baikeYA::before { content:"价" }
.hs_kw53_baikeYA::before { content:"轴距" }
.hs_kw54_baikeYA::before { content:"并线" }
.hs_kw55_baikeYA::before { content:"指" }
.hs_kw56_baikeYA::before { content:"蓝牙" }
.hs_kw57_baikeYA::before { content:"扭矩" }
.hs_kw58_baikeYA::before { content:"缸体" }
.hs_kw59_baikeYA::before { content:"长度" }
.hs_kw60_baikeYA::before { content:"氙气" }
.hs_kw61_baikeYA::before { content:"助力" }
.hs_kw62_baikeYA::before { content:"行程" }
.hs_kw63_baikeYA::before { content:"气囊" }
.hs_kw64_baikeYA::before { content:"容量" }
.hs_kw65_baikeYA::before { content:"元" }
.hs_kw66_baikeYA::before { content:"缸径" }
.hs_kw67_baikeYA::before { content:"外接" }
.hs_kw68_baikeYA::before { content:"商" }
.hs_kw69_baikeYA::before { content:"电话" }
.hs_kw70_baikeYA::before { content:"喇叭" }
.hs_kw71_baikeYA::before { content:"后排" }
.hs_kw72_baikeYA::before { content:"支撑" }
.hs_kw73_baikeYA::before { content:"独立" }
.hs_kw74_baikeYA::before { content:"全液晶" }
.hs_kw75_baikeYA::before { content:"燃油" }
.hs_kw76_baikeYA::before { content:"容积" }
.hs_kw77_baikeYA::before { content:"真皮" }
.hs_kw78_baikeYA::before { content:"无钥匙" }
.hs_kw79_baikeYA::before { content:"实测" }
.hs_kw80_baikeYA::before { content:"牵引力控制" }
.hs_kw81_baikeYA::before { content:"前轮胎" }
.hs_kw82_baikeYA::before { content:"座椅移动" }
.hs_kw83_baikeYA::before { content:"预警" }
.hs_kw84_baikeYA::before { content:"影像" }
.hs_kw85_baikeYA::before { content:"儿童座椅" }
.hs_kw86_baikeYA::before { content:"机构" }
.hs_kw87_baikeYA::before { content:"进气" }
.hs_kw88_baikeYA::before { content:"名称" }
.hs_kw89_baikeYA::before { content:"扬声器" }
.hs_kw90_baikeYA::before { content:"视频" }
.hs_kw91_baikeYA::before { content:"质保" }
.hs_kw92_baikeYA::before { content:"气缸" }
.hs_kw93_baikeYA::before { content:"驾驶" }
.hs_kw94_baikeYA::before { content:"前桥" }
.hs_kw95_baikeYA::before { content:"质量" }
.hs_kw96_baikeYA::before { content:"主动" }
.hs_kw97_baikeYA::before { content:"电池" }
.hs_kw98_baikeYA::before { content:"稳定" }
.hs_kw99_baikeYA::before { content:"材质" }
.hs_kw100_baikeYA::before { content:"后制动器" }
.hs_kw101_baikeYA::before { content:"压缩比" }
.hs_kw102_baikeYA::before { content:"单碟" }
.hs_kw103_baikeYA::before { content:"差速器" }
.hs_kw104_baikeYA::before { content:"通风" }
.hs_kw105_baikeYA::before { content:"后轮距" }
.hs_kw106_baikeYA::before { content:"号" }
.hs_kw107_baikeYA::before { content:"导" }
.hs_kw0_configYf::before { content:"后驱" }
.hs_kw1_configYf::before { content:"车门数" }
.hs_kw2_configYf::before { content:"驻车" }
.hs_kw3_configYf::before { content:"后悬架" }
.hs_kw4_configYf::before { content:"多片" }
.hs_kw5_configYf::before { content:"排量" }
.hs_kw6_configYf::before { content:"承载式" }
.hs_kw7_configYf::before { content:"供油" }
.hs_kw8_configYf::before { content:"配气" }
.hs_kw9_configYf::before { content:"综合" }
.hs_kw10_configYf::before { content:"悬架" }
.hs_kw11_configYf::before { content:"多连杆" }
.hs_kw12_configYf::before { content:"中央" }
.hs_kw13_configYf::before { content:"双叉臂式" }
.hs_kw14_configYf::before { content:"备胎" }
.hs_kw15_configYf::before { content:"电子" }
.hs_kw16_configYf::before { content:"功率" }
.hs_kw17_configYf::before { content:"排列" }
.hs_kw18_configYf::before { content:"铝" }
.hs_kw19_configYf::before { content:"轴距" }
.hs_kw20_configYf::before { content:"长度" }
.hs_kw21_configYf::before { content:"助力" }
.hs_kw22_configYf::before { content:"元" }
.hs_kw23_configYf::before { content:"商" }
.hs_kw24_configYf::before { content:"直喷" }
.hs_kw25_configYf::before { content:"独立" }
.hs_kw26_configYf::before { content:"容积" }
.hs_kw27_configYf::before { content:"实测" }
.hs_kw28_configYf::before { content:"气缸" }
.hs_kw29_configYf::before { content:"质量" }
.hs_kw30_configYf::before { content:"后制动器" }
.hs_kw31_configYf::before { content:"涡轮" }
.hs_kw32_configYf::before { content:"差速器" }
.hs_kw33_configYf::before { content:"后轮距" }
.hs_kw34_configYf::before { content:"大型车" }
.hs_kw35_configYf::before { content:"环保" }
.hs_kw36_configYf::before { content:"万" }
.hs_kw37_configYf::before { content:"离地间隙" }
.hs_kw38_configYf::before { content:"油箱" }
.hs_kw39_configYf::before { content:"整备" }
.hs_kw40_configYf::before { content:"转速" }
.hs_kw41_configYf::before { content:"年或" }
.hs_kw42_configYf::before { content:"最大" }
.hs_kw43_configYf::before { content:"气门数" }
.hs_kw44_configYf::before { content:"版" }
.hs_kw45_configYf::before { content:"宝马" }
.hs_kw46_configYf::before { content:"油耗" }
.hs_kw47_configYf::before { content:"前轮距" }
.hs_kw48_configYf::before { content:"宽度" }
.hs_kw49_configYf::before { content:"成功" }
.hs_kw50_configYf::before { content:"缸盖" }
.hs_kw51_configYf::before { content:"标准" }
.hs_kw52_configYf::before { content:"前制动器" }
.hs_kw53_configYf::before { content:"增压" }
.hs_kw54_configYf::before { content:"时间" }
.hs_kw55_configYf::before { content:"前置" }
.hs_kw56_configYf::before { content:"前悬架" }
.hs_kw57_configYf::before { content:"高度" }
.hs_kw58_configYf::before { content:"后轮胎" }
.hs_kw59_configYf::before { content:"规格" }
.hs_kw60_configYf::before { content:"价" }
.hs_kw61_configYf::before { content:"指" }
.hs_kw62_configYf::before { content:"扭矩" }
.hs_kw63_configYf::before { content:"缸体" }
.hs_kw64_configYf::before { content:"欧" }
.hs_kw65_configYf::before { content:"行程" }
.hs_kw66_configYf::before { content:"盘式" }
.hs_kw67_configYf::before { content:"缸径" }
.hs_kw68_configYf::before { content:"华" }
.hs_kw69_configYf::before { content:"燃油" }
.hs_kw70_configYf::before { content:"前轮胎" }
.hs_kw71_configYf::before { content:"进口" }
.hs_kw72_configYf::before { content:"机构" }
.hs_kw73_configYf::before { content:"进气" }
.hs_kw74_configYf::before { content:"离合器" }
.hs_kw75_configYf::before { content:"名称" }
.hs_kw76_configYf::before { content:"质保" }
.hs_kw77_configYf::before { content:"压缩比" }
.hs_kw78_configYf::before { content:"通风" }
.hs_kw79_configYf::before { content:"号" }
.hs_kw80_configYf::before { content:"导" }
.hs_kw0_optionsy::before { content:"适" }
.hs_kw1_optionsy::before { content:"摄像头" }
.hs_kw2_optionsy::before { content:"后桥" }
.hs_kw3_optionsy::before { content:"电磁" }
.hs_kw4_optionsy::before { content:"制动力分配" }
.hs_kw5_optionsy::before { content:"差速锁" }
.hs_kw6_optionsy::before { content:"加热" }
.hs_kw7_optionsy::before { content:"前" }
.hs_kw8_optionsy::before { content:"整体" }
.hs_kw9_optionsy::before { content:"驻车" }
.hs_kw10_optionsy::before { content:"成功" }
.hs_kw11_optionsy::before { content:"天窗" }
.hs_kw12_optionsy::before { content:"悬架" }
.hs_kw13_optionsy::before { content:"行车电脑" }
.hs_kw14_optionsy::before { content:"限滑" }
.hs_kw15_optionsy::before { content:"放倒" }
.hs_kw16_optionsy::before { content:"充电" }
.hs_kw17_optionsy::before { content:"中央" }
.hs_kw18_optionsy::before { content:"电子" }
.hs_kw19_optionsy::before { content:"合金" }
.hs_kw20_optionsy::before { content:"调节" }
.hs_kw21_optionsy::before { content:"风" }
.hs_kw22_optionsy::before { content:"接口" }
.hs_kw23_optionsy::before { content:"空气" }
.hs_kw24_optionsy::before { content:"铝" }
.hs_kw25_optionsy::before { content:"高度" }
.hs_kw26_optionsy::before { content:"仪表盘" }
.hs_kw27_optionsy::before { content:"音源" }
.hs_kw28_optionsy::before { content:"并线" }
.hs_kw29_optionsy::before { content:"远光灯" }
.hs_kw30_optionsy::before { content:"蓝牙" }
.hs_kw31_optionsy::before { content:"气囊" }
.hs_kw32_optionsy::before { content:"外接" }
.hs_kw33_optionsy::before { content:"电话" }
.hs_kw34_optionsy::before { content:"升" }
.hs_kw35_optionsy::before { content:"上下" }
.hs_kw36_optionsy::before { content:"喇叭" }
.hs_kw37_optionsy::before { content:"后排" }
.hs_kw38_optionsy::before { content:"支撑" }
.hs_kw39_optionsy::before { content:"华" }
.hs_kw40_optionsy::before { content:"独立" }
.hs_kw41_optionsy::before { content:"全液晶" }
.hs_kw42_optionsy::before { content:"真皮" }
.hs_kw43_optionsy::before { content:"无钥匙" }
.hs_kw44_optionsy::before { content:"牵引力控制" }
.hs_kw45_optionsy::before { content:"前后" }
.hs_kw46_optionsy::before { content:"座椅移动" }
.hs_kw47_optionsy::before { content:"预警" }
.hs_kw48_optionsy::before { content:"影像" }
.hs_kw49_optionsy::before { content:"儿童座椅" }
.hs_kw50_optionsy::before { content:"扬声器" }
.hs_kw51_optionsy::before { content:"视频" }
.hs_kw52_optionsy::before { content:"驾驶" }
.hs_kw53_optionsy::before { content:"前桥" }
.hs_kw54_optionsy::before { content:"主动" }
.hs_kw55_optionsy::before { content:"稳定" }
.hs_kw56_optionsy::before { content:"选装" }
.hs_kw57_optionsy::before { content:"材质" }
.hs_kw58_optionsy::before { content:"单碟" }
.hs_kw59_optionsy::before { content:"差速器" }
.hs_kw60_optionsy::before { content:"通风" }
.hs_kw61_optionsy::before { content:"近光灯" }
.hs_kw62_optionsy::before { content:"导" }

二、环境

1、requests

pip install requests

2、PyV8

pip install PyV8

上面这种安装方式,我在我的windows系统电脑上是没安装成功的,于是去官网看了一下。PyV8只看到了Python2.X的版本,Python3.X的用不了,自己到官网下载,下载地址:http://code.google.com/p/pyv8/downloads/list。我安装的是Python2.7 64位的,因此安装的也是64位的PyV8。

三、解题思路

主要思路是先找到那段压缩的跟缺失文字有关JS,然后找到关键的和添加规则有关的方法,可以通过在里面加入console.log(xxx)来查看控制台的输出辅助找到关键的方法。找到后把这些js直接用PyV8执行会报错,需要自己添加一些代码,修正错误即可。代码如下:

#coding=utf-8
import re
import PyV8
import logging
import requests

def clscontent(alljs):
    try:
        ctx = PyV8.JSContext()
        ctx.enter()
        ctx.eval(alljs)
        return ctx.eval('rules')
    except:
        logging.exception('clscontent function exception')
        return None

def makejs(html):
    try:
        alljs = ("var rules = '';"
                 "var document = {};"
                 "document.createElement = function() {"
                 "      return {"
                 "              sheet: {"
                 "                      insertRule: function(rule, i) {"
                 "                              if (rules.length == 0) {"
                 "                                      rules = rule;"
                 "                              } else {"
                 "                                      rules = rules + '#' + rule;"
                 "                              }"
                 "                      }"
                 "              }"
                 "      }"
                 "};"
                 "document.querySelectorAll = function() {"
                 "      return {};"
                 "};"
                 "document.head = {};"
                 "document.head.appendChild = function() {};"

                 "var window = {};"
                 "window.decodeURIComponent = decodeURIComponent;")

        js = re.findall('(\(function\([a-zA-Z]{2}.*?_\).*?\(document\);)', html)
        for item in js:
            alljs = alljs + item
        return alljs
    except:
        logging.exception('makejs function exception')
        return None

def main(index):
    try:
        req = requests.get('https://car.autohome.com.cn/config/series/%d.html' % index)
        alljs = makejs(req.text)
        if(alljs == None):
            print('makejs error')
            return

        result = clscontent(alljs)
        if(result == None):
            print('clscontent error')
            return

        for item in result.split('#'):
            print(item)
    except:
        logging('main function exception')

if __name__ == '__main__':
    main(153)

四、后话

解这些东西需要较强的JS基本功。本文章仅供学习参考,请勿用于商业用途!

转载于:https://www.cnblogs.com/qiyueliuguang/p/8144248.html

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值