python autoit获取网页ajax数据_通过Ajax请求的网页数据采集详解

本文介绍了如何使用Python结合自动化测试工具(如Selenium)和抓包工具来采集通过Ajax加载的网页数据,以FX168财经网站为例,展示了如何获取美元指数、上证指数等各类指数的实时价格和涨跌幅,并将其存储到数据库中。
摘要由CSDN通过智能技术生成

Ajax 是一种用于创建快速动态网页的技术。

Ajax是一种在无需重新加载整个网页的情况下,能够更新部分网页的技术。

对于使用Ajax返回的数据我们通常有两种方式采集数据

使用自动化测试工具chromedriver进行采集

通过抓包找到网页发送Ajax发送请求并返回的数据

392a85cf6754?utm_source=oschina-app

392a85cf6754?utm_source=oschina-app

查看返回的数据格式,通过对数据处理采集我们想要的数据

目标网址:全球视野的中文财经网站fx168

目标数据:采集美元指数、上证指数、深证成指、恒生指数、现货黄金、布兰特原油、标普500、离岸汇率的每日价格及涨跌幅

使用自动化测试工具来抓取数据信息

import requests

import pymongo

import datetime

from lxml import etree

from selenium import webdriver

from common.pgutils import get_conn

sql_truncate = "truncate TABLE public.news_morning_code"

sql = "insert into public.news_morning_code(TradeName,TradePrice,ChangePre,create_time) values(%s,%s,%s,%s)"

def insert_data(conn, name, present_price, rise_fall):

current_date = datetime.datetime.now()

present_price = float(present_price)

rise_fall = float(rise_fall.split('(')[1][:-2])

with conn.cursor() as cur:

sql_params = [name, present_price, rise_fall, current_date]

cur.execute(sql, sql_params)

def get_usdcny(conn):

"""离岸汇率"""

# client = pymongo.MongoClient('localhost', 27017)

# news = client['news']

# cj_zs = news['cj_zs']

chromedriver = r"/usr/local/share/chromedriver"

driver = webdriver.Chrome(chromedriver)

driver.get('http://quote.fx168.com/USDCNY/')

result = driver.page_source

xml = etree.HTML(result)

datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]

name = datas.xpath('./h2/span/text()')[0]

present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 现价

rise_fall = datas.xpath("./h3/b/text()")[0] # 当日涨跌

print(name, present_price, rise_fall)

insert_data(conn, name, present_price, rise_fall)

# data = {

# 'name': name,

# 'present_price': present_price,

# 'rise_fall': rise_fall,

# }

# cj_zs.insert_one(data)

driver.close()

driver.quit()

print('在岸汇率存储成功')

def get_shcomp(conn):

"""上证综指"""

chromedriver = r"/usr/local/share/chromedriver"

driver = webdriver.Chrome(chromedriver)

driver.get('http://quote.fx168.com/SHCOMP/')

result = driver.page_source

xml = etree.HTML(result)

datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]

name = datas.xpath('./h2/span/text()')[0]

present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 现价

rise_fall = datas.xpath("./h3/b/text()")[0] # 当日涨跌

print(name, present_price, rise_fall)

insert_data(conn, name, present_price, rise_fall)

driver.close()

driver.quit()

print('上证综指存储成功')

def get_szcomp(conn):

"""深圳成指"""

chromedriver = r"/usr/local/share/chromedriver"

driver = webdriver.Chrome(chromedriver)

driver.get('http://quote.fx168.com/SZCOMP/')

result = driver.page_source

xml = etree.HTML(result)

datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]

name = datas.xpath('./h2/span/text()')[0]

present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 现价

rise_fall = datas.xpath("./h3/b/text()")[0] # 当日涨跌

print(name, present_price, rise_fall)

insert_data(conn, name, present_price, rise_fall)

driver.close()

driver.quit()

print('深圳成指存储成功')

def get_hsi(conn):

"""恒生指数"""

chromedriver = r"/usr/local/share/chromedriver"

driver = webdriver.Chrome(chromedriver)

driver.get('http://quote.fx168.com/HSI/')

result = driver.page_source

xml = etree.HTML(result)

datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]

name = datas.xpath('./h2/span/text()')[0]

present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 现价

rise_fall = datas.xpath("./h3/b/text()")[0] # 当日涨跌

print(name, present_price, rise_fall)

insert_data(conn, name, present_price, rise_fall)

driver.close()

driver.quit()

print('恒生指数存储成功')

def get_dini(conn):

"""美元指数"""

chromedriver = r"/usr/local/share/chromedriver"

driver = webdriver.Chrome(chromedriver)

driver.get('http://quote.fx168.com/DINI/')

result = driver.page_source

xml = etree.HTML(result)

datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]

name = datas.xpath('./h2/span/text()')[0]

present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 现价

rise_fall = datas.xpath("./h3/b/text()")[0] # 当日涨跌

print(name, present_price, rise_fall)

insert_data(conn, name, present_price, rise_fall)

driver.close()

driver.quit()

print('美元指数存储成功')

def get_xau(conn):

"""现货黄金"""

chromedriver = r"/usr/local/share/chromedriver"

driver = webdriver.Chrome(chromedriver)

driver.get('http://quote.fx168.com/XAU/')

result = driver.page_source

xml = etree.HTML(result)

datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]

name = datas.xpath('./h2/span/text()')[0]

present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 现价

rise_fall = datas.xpath("./h3/b/text()")[0] # 当日涨跌

print(name, present_price, rise_fall)

insert_data(conn, name, present_price, rise_fall)

driver.close()

driver.quit()

print('现货黄金存储成功')

def get_spciw(conn):

"""标普500"""

chromedriver = r"/usr/local/share/chromedriver"

driver = webdriver.Chrome(chromedriver)

driver.get('http://quote.fx168.com/SPCIW/')

result = driver.page_source

xml = etree.HTML(result)

datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]

present_price = xml.xpath('/html/body/section[1]/div/div/div[2]/div[1]/div/div[1]/h3/span')[0].xpath('string(.)') # 现价

rise_fall = datas.xpath("./h3/b/text()")[0] # 当日涨跌

print('标普500', present_price, rise_fall)

name = '标普500'

insert_data(conn, name, present_price, rise_fall)

driver.close()

driver.quit()

print('标普500存储成功')

def get_brents(conn):

"""布兰特油"""

chromedriver = r"/usr/local/share/chromedriver"

driver = webdriver.Chrome(chromedriver)

driver.get('http://quote.fx168.com/BRENTS/')

result = driver.page_source

xml = etree.HTML(result)

datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]

name = datas.xpath('./h2/span/text()')[0]

present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 现价

rise_fall = datas.xpath("./h3/b/text()")[0] # 当日涨跌

print(name, present_price, rise_fall)

insert_data(conn, name, present_price, rise_fall)

driver.close()

driver.quit()

print('布兰特油存储成功')

def main():

conn = get_conn()

try:

with conn:

with conn.cursor() as cur:

cur.execute(sql_truncate)

get_usdcny(conn)

get_shcomp(conn)

get_szcomp(conn)

get_hsi(conn)

get_dini(conn)

get_xau(conn)

get_spciw(conn)

get_brents(conn)

finally:

if conn:

conn.close()

if __name__ == '__main__':

main()

分析网页结构及数据返回的方法,采集数据

#!/usr/bin/env python3.4

# encoding: utf-8

"""

Created on 17-12-12

@author: Xu

"""

import json

import requests

import time

import datetime

from common.pgutils import get_conn

def get_json():

url = 'http://fx168api.fx168.com/InterfaceCollect/Default.aspx?Code=fx168&bCode=IQuoteDataALL&succ_callback=show_whdata&_=1512984141823'

my_headers = {

'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',

'Host': 'fx168api.fx168.com',

'Referer': 'http://quote.fx168.com/BRENTS/',

}

current_time = int(time.time())

current_date = datetime.datetime.now()

datas = {

'Code': 'fx168',

'bCode':'IQuoteDataALL',

'succ_callback': 'show_whdata',

'_': current_time

}

cont = requests.get(url=url, headers=my_headers, data=datas)

cont.encoding = 'utf-8'

result = cont.text

content = result[12:-1]

info = json.loads(content)['List']

# print(info)

sql_truncate = "truncate TABLE public.news_morning_code"

sql = "insert into public.news_morning_code(TradeName,TradePrice,ChangePre,create_time) values(%s,%s,%s,%s)"

# 0:美元指数 30:上证指数 74:深证成指 76:恒生指数 19:现货黄金 73:布兰特原油 29:标普500 45:离岸汇率

code_list = [0, 30, 74, 76, 19, 73, 29, 45]

conn = get_conn()

try:

with conn:

with conn.cursor() as cur:

cur.execute(sql_truncate)

for i in code_list:

sql_params = [info[i]['TradeName'], info[i]['TradePrice'], info[i]['ChangePre'], current_date]

# print(sql_params)

cur.execute(sql, sql_params)

finally:

if conn:

conn.close()

if __name__ == '__main__':

get_json()

连接postgresql数据库,保存数据

#!/usr/bin/env python3.4

# encoding: utf-8

"""

Created on 17-12-12

@author: Xu

"""

import psycopg2

def get_conn():

database = '数据库名'

user = '用户名'

password = '密码'

host = 'ip地址'

port = '5432'

return psycopg2.connect(database=database, user=user, password=password, host=host, port=port)

392a85cf6754?utm_source=oschina-app

数据展示

这两种方式从效率上来讲显然第二种更快捷,两者的差别是第一种需要解析html的结构取得数据,而第二种可以直接对返回的数据进行处理进而保存我们想要的数据。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值