python selenium 获取动态网页数据

最新推荐文章于 2024-07-21 10:00:00 发布

光辉灿烂@

最新推荐文章于 2024-07-21 10:00:00 发布

阅读量2.5k

点赞数

分类专栏： Python

本文链接：https://blog.csdn.net/u012406790/article/details/75115536

版权

Python 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

# -*- coding:utf-8 -*-

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import json
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
curpath=sys.path[0]
print curpath

def getData(url):
    driver=webdriver.Chrome()
    driver.set_page_load_timeout(40)
    time.sleep(3)
    html=driver.get(url[0])
    for page in range(1):
        html=driver.page_source
        soup=BeautifulSoup(html,'lxml')
        table=soup.find('div',class_="unit_loan_prj_detail")
        name=[]
        for th in table.find_all('span',class_="prolist_info_title"):
            name.append(th.get_text())
        i=0
        for tr in table.find_all('span',class_="prolist_info_detail"):
            dic={}
            value=tr.get_text()
            if value is not None:
                dic[name[i]]=value
            else:
                for td in tr.find_all('span'):
                    dic[name[i]]=td.get_text()
            i+=1
            jsonDump(dic,url[1])

def jsonDump(_json,name):
    with open(curpath+'/'+name+'.json','a')as outfile:
        json.dump(_json,outfile,ensure_ascii=False)
    with open(curpath+'/'+name+'.json','a')as outfile:
        outfile.write(',\n')

if __name__=='__main__':
    url=['http://www.powerec.net/gdwz-web/html/xjxx/inquiry_detail.html?inq_h_id=ZGFmNTM2ZjctOWFlYi00ZDEyLWEyZjItNDFjNjAxYmY4MTZj','test']
    getData(url)

光辉灿烂@

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
python selenium 获取动态网页数据

# -*- coding:utf-8 -*-import requestsfrom bs4 import BeautifulSoupfrom selenium import webdriverimport timeimport jsonimport sysreload(sys)sys.setdefaultencoding("utf-8")curpath=sys.path[0]
复制链接

扫一扫

专栏目录