此次爬取的源网站是金投网:上海黄金交易所(金交所)行情查询-贵金属行情中心-金投网 (cngold.org)
还是一样分析网络,发现是先渲染的页面再请求的数据
找到了请求的网址和格式,code参数是
JO_9753,JO_92226,JO_9754,JO_71,JO_70,JO_73,JO_72,JO_75,JO_9751,JO_9752,JO_92224,JO_92225,JO_92276,JO_76,JO_74,JO_92277,JO_92278
后面拼了一个时间戳,请求内容返回结果如下:
里面是一个的数据,看了一下觉得太复杂了,因为不知道对应的变量名字,得一个个找,所幸搜了一下这个quote_json,找着了解析的源码getQuote(),美滋滋
把js搞下来
function getQuote(){
var codes = "JO_9753,JO_92226,JO_9754,JO_71,JO_70,JO_73,JO_72,JO_75,JO_9751,JO_9752,JO_92224,JO_92225,JO_92276,JO_76,JO_74,JO_92277,JO_92278";
var url="https://api.jijinhao.com/quoteCenter/realTime.htm?codes="+codes;
$.getScript(url,function(){
if(quote_json != null && quote_json != ""){
var codeArr = codes.split(',');
for(var i in codeArr){
var jCode = codeArr[i];
var quote = quote_json[jCode];
if(quote != undefined) {
var digits = 2;
digits = quote.digits;
if(isNaN(digits)) digits=2;
//最新价
var q63Val = quote.q63;
q63Val = format(q63Val,digits);
if(isNaN(q63Val)) q63Val="----";
//涨跌值
var q70Val = quote.q70;
q70Val = format(q70Val,digits);
if(isNaN(q70Val)) q70Val="----";
//涨跌幅
var q80Val = quote.q80;
q80Val = format(q80Val,2);
if(isNaN(q80Val)) q80Val="----";
//开盘价
var q1Val = quote.q1;
q1Val = format(q1Val,digits);
if(isNaN(q1Val)) q1Val="----";
//收盘价
var q2Val = quote.q2;
q2Val = format(q2Val,digits);
if(isNaN(q2Val)) q2Val="----";
//最高价
var q3Val = quote.q3;
q3Val = format(q3Val,digits);
if(isNaN(q3Val)) q3Val="----";
//最低价
var q4Val = quote.q4;
q4Val = format(q4Val,digits);
if(isNaN(q4Val)) q4Val="----";
//时间
var qTimeVal = quote.time;
if(isNaN(qTimeVal)){
qTimeVal = "----";
}else{
qTimeVal = new Date(qTimeVal).Format("yyyy-MM-dd hh:mm:ss");
}
//买入价
var q5Val=quote.q5;
q5Val=format(q5Val,digits);
if(isNaN(q5Val)) q5Val="----";
//卖出价
var q6Val=quote.q6;
q6Val=format(q6Val,digits);
if(isNaN(q6Val)) q6Val="----";
//商品英文名
var showCode = quote.showCode;
if(showCode == undefined)showCode="----";
//单位
var unit=quote.unit;
if(unit == undefined || unit=="")unit="----";
if(q63Val == 0) {
q63Val = "<em>----</em>";
q70Val = "<em>----</em>";
q80Val = "<em>----</em>";
showCode = "<em>"+showCode+"</em>";
} else {
if(q70Val > 0){
q63Val = "<em class='red'>" + q63Val + "</em>";
q70Val = "<em class='red'>"+q70Val+"</em>";
q80Val = "<em class='red'>"+q80Val+"%</em>";
showCode = "<em>"+showCode+"</em>";
} else if(q70Val < 0){
q63Val = "<em class='green'>"+q63Val+"</em>";
q70Val = "<em class='green'>"+q70Val+"</em>";
q80Val = "<em class='green'>"+q80Val+"%</em>";
showCode = "<em>"+showCode+"</em>";
} else if(q70Val == 0){
q63Val = "<em>"+q63Val+"</em>";
q70Val = "<em>"+q70Val+"</em>";
q80Val = "<em>"+q80Val+"%</em>";
showCode = "<em>"+showCode+"</em>";
}
}
if(q1Val != 0) {
if (parseFloat(q1Val) > parseFloat(q2Val)) {
q1Val = "<em class='red'>"+q1Val+"</em>";
} else if (parseFloat(q1Val) < parseFloat(q2Val)) {
q1Val = "<em class='green'>"+q1Val+"</em>";
} else {
q1Val = "<em>"+q1Val+"</em>";
}
}
if(q3Val != 0) {
if (parseFloat(q3Val) > parseFloat(q2Val)) {
q3Val = "<em class='red'>"+q3Val+"</em>";
} else if (parseFloat(q3Val) < parseFloat(q2Val)) {
q3Val = "<em class='green'>"+q3Val+"</em>";
} else {
q3Val = "<em>"+q3Val+"</em>";
}
}
if(q4Val != 0) {
if (parseFloat(q4Val) > parseFloat(q2Val)) {
q4Val = "<em class='red'>"+q4Val+"</em>";
} else if (parseFloat(q4Val) < parseFloat(q2Val)) {
q4Val = "<em class='green'>"+q4Val+"</em>";
} else {
q4Val = "<em>"+q4Val+"</em>";
}
}
if(q5Val != 0) {
if (parseFloat(q5Val) > parseFloat(q2Val)) {
q5Val = "<em class='red'>"+q5Val+"</em>";
} else if (parseFloat(q5Val) < parseFloat(q2Val)) {
q5Val = "<em class='green'>"+q5Val+"</em>";
} else {
q5Val = "<em>"+q5Val+"</em>";
}
}
if(q6Val != 0) {
if (parseFloat(q6Val) > parseFloat(q2Val)) {
q6Val = "<em class='red'>"+q6Val+"</em>";
} else if (parseFloat(q6Val) < parseFloat(q2Val)) {
q6Val = "<em class='green'>"+q6Val+"</em>";
} else {
q6Val = "<em>"+q6Val+"</em>";
}
}
q2Val = "<em>"+q2Val+"</em>";
$("#"+jCode+"_price").html(q63Val);
$("#"+jCode+"_sellPrice").html(q6Val);
$("#"+jCode+"_buyPrice").html(q5Val);
$("#"+jCode+"_updownPercent").html(q70Val);
$("#"+jCode+"_updown").html(q80Val);
$("#"+jCode+"_open").html(q1Val);
$("#"+jCode+"_high").html(q3Val);
$("#"+jCode+"_low").html(q4Val);
$("#"+jCode+"_close").html(q2Val);
$("#"+jCode+"_time").html(qTimeVal);
$("#"+jCode+"_showCode").html(showCode);
$("#"+jCode+"_unit").html(unit);
} else {
$("#"+jCode+"_price").html("<em>----</em>");
$("#"+jCode+"_sellPrice").html("<em>----</em>");
$("#"+jCode+"_buyPrice").html("<em>----</em>");
$("#"+jCode+"_updownPercent").html("<em>----</em>");
$("#"+jCode+"_updown").html("<em>----</em>");
$("#"+jCode+"_open").html("<em>----</em>");
$("#"+jCode+"_high").html("<em>----</em>");
$("#"+jCode+"_low").html("<em>----</em>");
$("#"+jCode+"_close").html("<em>----</em>");
$("#"+jCode+"_time").html("<em>----</em>");
$("#"+jCode+"_showCode").html("<em>----</em>");
$("#"+jCode+"_unit").html("<em>----</em>");
}
}
}
});
}
其中有很多判断数值然后给他标红或者标绿的操作,可以去掉,然后----转换为None,不需要的html代码也去掉,只保留数据,处理一下时间戳转换日期时间格式
然后开始写代码:
还是先搞一个中文对照的字典,拼接请求,做好header,将返回结果转换为json字典,然后逐个击破,此代码仅供学习研究,不要用于非法用途,否则后果自负
import datetime
import time
import requests
import json
gold_dict = {
"Au(T+D)": "黄金T+D",
"mAu(T+D)": "m黄金T+D",
"Ag(T+D)": "白银T+D",
"AuT+D": "黄金T+D",
"mAuT+D": "m黄金T+D",
"AgT+D": "白银T+D",
"Au9999": "黄金9999",
"Au9995": "黄金9995",
"Au100g": "金条100g",
"Au50g": "金条50g",
"Ag999": "白银999",
"AuT+N1": "黄金T+N1",
"AuT+N2": "黄金T+N2",
"SGiAu100g": "gi黄金100g",
"SGiAu9999i": "i黄金9999",
"SGiAu995i": "i黄金995",
"Ag9999": "白银9999",
"PT9995": "铂金9995",
"NYAuTN06": "纽约金TN06",
"NYAuTN12": "纽约金TN12",
}
def get_quote():
codes = "JO_9753,JO_92226,JO_9754,JO_71,JO_70,JO_73,JO_72,JO_75,JO_9751,JO_9752,JO_92224,JO_92225,JO_92276,JO_76,JO_74,JO_92277,JO_92278"
url = "https://api.jijinhao.com/quoteCenter/realTime.htm?codes=" + codes+"&_="+str(int(time.time() * 1000))
headers = {
'authority': 'api.jijinhao.com',
'accept': '*/*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'referer': 'https://quote.cngold.org/gjs/jjs.html',
'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Microsoft Edge";v="122"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'script',
'sec-fetch-mode': 'no-cors',
'sec-fetch-site': 'cross-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0'
}
payload={}
response = requests.request("GET", url, headers=headers, data=payload)
# print(response.text)
# response = requests.get(url)
# print(response.status_code)
json_str = response.text[16:]
# print(json_str)
# 将字符串转换为字典对象
# 提取quote_json的值
quote_json = json.loads(json_str)
# print(quote_json)
# print(type(quote_json))
# quote_json = json.loads(response.text)
if quote_json:
codeArr = codes.split(',')
for jCode in codeArr:
quote = quote_json.get(jCode)
if quote:
digits = 2
digits = quote.get('digits', 2)
# 最新价
q63Val = format(quote.get('q63', 'None'), str(digits))
# 涨跌值
q70Val = format(quote.get('q70', 'None'), str(digits))
# 涨跌幅
q80Val = format(quote.get('q80', 'None'), str(2))
# 开盘价
q1Val = format(quote.get('q1', 'None'), str(digits))
# 收盘价
q2Val = format(quote.get('q2', 'None'), str(digits))
# 最高价
q3Val = format(quote.get('q3', 'None'), str(digits))
# 最低价
q4Val = format(quote.get('q4', 'None'), str(digits))
# 时间
qTimeVal = quote.get('time', 'None')
if qTimeVal != None :
dt_object = datetime.datetime.fromtimestamp(qTimeVal/1000)
formatted_time = dt_object.strftime("%Y年%m月%d日 %H:%M:%S")
# 买入价
q5Val = format(quote.get('q5', 'None'), str(digits))
# 卖出价
q6Val = format(quote.get('q6', 'None'), str(digits))
# 商品英文名
showCode = quote.get('showCode', 'None')
# 单位
unit = quote.get('unit', 'None')
if q63Val == 0:
q63Val = None
q70Val = None
q80Val = None
print("代码 showCode: " + showCode)
if showCode in gold_dict:
print("名称:"+gold_dict[showCode])
print("价格 price: " + q63Val)
print("单位 unit: " + unit)
print("卖出价 sellPrice: " + q6Val)
print("买入价 buyPrice: " + q5Val)
print("涨跌幅百分比 updownPercent: " + q70Val)
print("涨跌幅 updown: " + q80Val)
print("开盘价 open: " + q1Val)
print("最高价 high: " + q3Val)
print("最低价 low: " + q4Val)
print("收盘价 close: " + q2Val)
print("时间 time: " + str(formatted_time))
print("\n")
else:
print("代码 showCode: " + None)
print("名称:"+None)
print("价格 price: " + None)
print("单位 unit: " + None)
print("卖出价 sellPrice: " + None)
print("买入价 buyPrice: " + None)
print("涨跌幅百分比 updownPercent: " + None)
print("涨跌幅 updown: " + None)
print("开盘价 open: " + None)
print("最高价 high: " + None)
print("最低价 low: " + None)
print("收盘价 close: " + None)
print("时间 time: " + None)
get_quote()
输出: