爬取页面:FUNcube-1 (AO-73) Realtime Data
先看网页的源代码:
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>FUNcube-1 · AMSAT-UK Data Warehouse</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<link rel="icon" type="image/png" href="/images/favicon-16x16.png" sizes="16x16">
<link rel="icon" type="image/png" href="/images/favicon-24x24.png" sizes="24x24">
<link rel="icon" type="image/png" href="/images/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="/images/favicon-64x64.png" sizes="64x64">
<!-- -------------------------------------------------------------------- -->
<div class="w-100"></div>
<!-- Tab panes -->
<div class="tab-content">
<div id="tab-eps" class="container tab-pane active"><br>
<h3>Electrical Power Subsystem</h3>
<table style="width: 100%;">
<tr>
<td style="text-align: center;">Name</td>
<td style="text-align: center; white-space: nowrap;">Value</td>
<td style="text-align: center; white-space: nowrap;">Min.</td>
<td style="text-align: center; white-space: nowrap;">Max.</td>
</tr>
<tr>
<td id="eps_sol_volts_x_name" style="text-align: center;">Solar Panel Voltage X</td>
<td id="eps_sol_volts_x_value"
style="text-align: center; white-space: nowrap;"> </td>
<td id="eps_sol_volts_x_min"
style="text-align: center; white-space: nowrap;"> </td>
<td id="eps_sol_volts_x_max"
style="text-align: center; white-space: nowrap;"> </td>
</tr>
<tr>
<td id="eps_sol_volts_y_name" style="text-align: center;">Solar Panel Voltage Y</td>
<td id="eps_sol_volts_y_value"
style="text-align: center; white-space: nowrap;"> </td>
<td id="eps_sol_volts_y_min"
style="text-align: center; white-space: nowrap;"> </td>
<td id="eps_sol_volts_y_max"
style="text-align: center; white-space: nowrap;"> </td>
</tr>
<tr>
<td id="eps_sol_volts_z_name" style="text-align: center;">Solar Panel Voltage Z </td>
<td id="eps_sol_volts_z_value"
style="text-align: center; white-space: nowrap;"> </td>
<td id="eps_sol_volts_z_min"
style="text-align: center; white-space: nowrap;"> </td>
<td id="eps_sol_volts_z_max"
style="text-align: center; white-space: nowrap;"> </td>
</tr>
<tr>
<td id="eps_total_photo_current_name" style="text-align: center;">Total Photo Current</td>
<td id="eps_total_photo_current_value"
style="text-align: center; white-space: nowrap;"> </td>
<td id="eps_total_photo_current_min"
style="text-align: center; white-space: nowrap;"> </td>
<!-- -------------------------------------------------------------------- -->
<td id="sw_deployment_wait_name" style="text-align: center;">Deployment Wait At Next Boot</td>
<td id="sw_deployment_wait_value"
style="text-align: center; white-space: nowrap;"> </td>
</tr>
</table>
</div>
</div>
</div>
</div>
</div>
</main>
</div>
</div>
</div>
<script src="/js/bundle.min.js"></script>
<script type="text/javascript" src="https://cdn.datatables.net/v/ju/dt-1.10.18/datatables.min.js"></script>
<script>
$(document).ready(function(){
refreshDiv();
setInterval(refreshDiv, 10000);
});
</script>
<script>
var handleRealtime = function (data) {
$("#created_date").text(data.data.createdDate);
$("#lat_long").text(data.data.latLong);
$("#info_seqno").text("Seq. No.: " + data.data.sequenceNumber);
$("#info_packet_count").text("Packets: " + data.data.packetCount);
$("#info_satellite_mode").text("Mode switching: " + data.satelliteMode);
$("#info_transponder_state").text("Transponder state: " + data.transponderState);
$("#eps_sol_volts_x_value").text(data.data.epsDTO.panelVolts1);
$("#eps_sol_volts_x_min").text(data.data.minima[0]);
$("#eps_sol_volts_x_max").text(data.data.maxima[0]);
$("#eps_sol_volts_y_value").text(data.data.epsDTO.panelVolts2);
$("#eps_sol_volts_y_min").text(data.data.minima[1]);
$("#eps_sol_volts_y_max").text(data.data.maxima[1]);
$("#eps_sol_volts_z_value").text(data.data.epsDTO.panelVolts3);
$("#eps_sol_volts_z_min").text(data.data.minima[2]);
$("#eps_sol_volts_z_max").text(data.data.maxima[2]);
$("#eps_total_photo_current_value").text(data.data.epsDTO.totPhotoCurr);
$("#eps_total_photo_current_min").text(data.data.minima[3]);
$("#eps_total_photo_current_max").text(data.data.maxima[3]);
$("#eps_battery_volts_value").text(data.data.epsDTO.batteryVolts);
$("#eps_battery_volts_min").text(data.data.minima[4]);
$("#eps_battery_volts_max").text(data.data.maxima[4]);
$("#eps_total_system_current_value").text(data.data.epsDTO.totSystemCurr);
$("#eps_total_system_current_min").text(data.data.minima[5]);
$("#eps_total_system_current_max").text(data.data.maxima[5]);
$("#eps_reboot_count_value").text(data.data.epsDTO.rebootCount);
$("#eps_software_errors_value").text(data.data.epsDTO.epsSwErrors);
$("#eps_boost_temp_x_value").text(data.data.epsDTO.boostTemp1);
$("#eps_boost_temp_x_min").text(data.data.minima[8]);
$("#eps_boost_temp_x_max").text(data.data.maxima[8]);
$("#eps_boost_temp_y_value").text(data.data.epsDTO.boostTemp2);
$("#eps_boost_temp_y_min").text(data.data.minima[9]);
$("#eps_boost_temp_y_max").text(data.data.maxima[9]);
$("#eps_boost_temp_z_value").text(data.data.epsDTO.boostTemp3);
$("#eps_boost_temp_z_min").text(data.data.minima[10]);
$("#eps_boost_temp_z_max").text(data.data.maxima[10]);
$("#eps_battery_temp_value").text(data.data.epsDTO.batteryTemp);
$("#eps_battery_temp_min").text(data.data.minima[11]);
$("#eps_battery_temp_max").text(data.data.maxima[11]);
$("#eps_latch_up_5_value").text(data.data.epsDTO.latchUpCount5v);
$("#eps_latch_up_3_value").text(data.data.epsDTO.latchUpCount3v3);
$("#eps_reset_cause_value").text(data.data.epsDTO.resetCause);
$("#eps_ppt_mode_value").text(data.data.epsDTO.pptMode);
$("#asib_sun_sensor_x_value").text(data.data.asibDTO.sunSensorX);
$("#asib_sun_sensor_x_min").text(data.data.minima[16]);
$("#asib_sun_sensor_x_max").text(data.data.maxima[16]);
$("#asib_sun_sensor_y_value").text(data.data.asibDTO.sunSensorY);
$("#asib_sun_sensor_y_min").text(data.data.minima[17]);
$("#asib_sun_sensor_y_max").text(data.data.maxima[17]);
$("#asib_sun_sensor_z_value").text(data.data.asibDTO.sunSensorZ);
$("#asib_sun_sensor_z_min").text(data.data.minima[18]);
$("#asib_sun_sensor_z_max").text(data.data.maxima[18]);
$("#asib_sol_temp_x_plus_value").text(data.data.asibDTO.solXPlus);
$("#asib_sol_temp_x_plus_min").text(data.data.minima[19]);
$("#asib_sol_temp_x_plus_max").text(data.data.maxima[19]);
$("#asib_sol_temp_x_minus_value").text(data.data.asibDTO.solXMinus);
$("#asib_sol_temp_x_minus_min").text(data.data.minima[20]);
$("#asib_sol_temp_x_minus_max").text(data.data.maxima[20]);
$("#asib_sol_temp_y_plus_value").text(data.data.asibDTO.solYPlus);
$("#asib_sol_temp_y_plus_min").text(data.data.minima[21]);
$("#asib_sol_temp_y_plus_max").text(data.data.maxima[21]);
$("#asib_sol_temp_y_minus_value").text(data.data.asibDTO.solYMinus);
$("#asib_sol_temp_y_minus_min").text(data.data.minima[22]);
$("#asib_sol_temp_y_minus_max").text(data.data.maxima[22]);
$("#asib_bus_volts_three_value").text(data.data.asibDTO.busVolts3v3);
$("#asib_bus_volts_three_min").text(data.data.minima[23]);
$("#asib_bus_volts_three_max").text(data.data.maxima[23]);
$("#asib_bus_current_three_value").text(data.data.asibDTO.busCurr3v3);
$("#asib_bus_current_three_min").text(data.data.minima[24]);
$("#asib_bus_current_three_max").text(data.data.maxima[24]);
$("#asib_bus_volts_five_value").text(data.data.asibDTO.busVolts5);
$("#asib_bus_volts_five_min").text(data.data.minima[25]);
$("#asib_bus_volts_five_max").text(data.data.maxima[25]);
$("#rf_receive_doppler_value").text(data.data.rfDTO.rxDoppler);
$("#rf_receive_doppler_min").text(data.data.minima[26]);
$("#rf_receive_doppler_max").text(data.data.maxima[26]);
$("#rf_receive_rssi_value").text(data.data.rfDTO.rxRSSI);
$("#rf_receive_rssi_min").text(data.data.minima[27]);
$("#rf_receive_rssi_max").text(data.data.maxima[27]);
$("#rf_temperature_value").text(data.data.rfDTO.rxTemp);
$("#rf_temperature_min").text(data.data.minima[28]);
$("#rf_temperature_max").text(data.data.maxima[28]);
$("#rf_receive_current_value").text(data.data.rfDTO.rxCurr);
$("#rf_receive_current_min").text(data.data.minima[29]);
$("#rf_receive_current_max").text(data.data.maxima[29]);
$("#rf_transmit_current_three_value").text(data.data.rfDTO.txBusCurr3v3);
$("#rf_transmit_current_three_min").text(data.data.minima[30]);
$("#rf_transmit_current_three_max").text(data.data.maxima[30]);
$("#rf_transmit_current_five_value").text(data.data.rfDTO.txBusCurr5v);
$("#rf_transmit_current_five_min").text(data.data.minima[31]);
$("#rf_transmit_current_five_max").text(data.data.maxima[31]);
// Note: minmax pairs are reversed :-(
$("#pa_forward_power_value").text(data.data.paDTO.txRevPwr);
$("#pa_forward_power_min").text(data.data.minima[33]);
$("#pa_forward_power_max").text(data.data.maxima[33]);
$("#pa_reverse_power_value").text(data.data.paDTO.txFwdPwr);
$("#pa_reverse_power_min").text(data.data.minima[32]);
$("#pa_reverse_power_max").text(data.data.maxima[32]);
$("#pa_device_temperature_value").text(data.data.paDTO.txTemp);
$("#pa_device_temperature_min").text(data.data.minima[34]);
$("#pa_device_temperature_max").text(data.data.maxima[34]);
$("#pa_bus_current_value").text(data.data.paDTO.txCurr);
$("#pa_bus_current_min").text(data.data.minima[35]);
$("#pa_bus_current_max").text(data.data.maxima[35]);
$("#ants_temp_zero_value").text(data.data.antsDTO.antTemp0);
$("#ants_temp_zero_min").text(data.data.minima[36]);
$("#ants_temp_zero_max").text(data.data.maxima[36]);
$("#ants_temp_one_value").text(data.data.antsDTO.antTemp1);
$("#ants_temp_one_min").text(data.data.minima[37]);
$("#ants_temp_one_max").text(data.data.maxima[37]);
$("#ants_deployment_vhf_a_value").text(data.data.antsDTO.antDepl0);
$("#ants_deployment_uhf_a_value").text(data.data.antsDTO.antDepl1);
$("#ants_deployment_vhf_b_value").text(data.data.antsDTO.antDepl2);
$("#ants_deployment_uhf_b_value").text(data.data.antsDTO.antDepl3);
$("#sw_sequence_number_value").text(data.data.sequenceNumber);
$("#sw_command_count_value").text(data.data.swDTO.dtmfCmdCount);
$("#sw_last_command_value").text(data.data.swDTO.dtmfLastCmd);
$("#sw_command_success_value").text(data.data.swDTO.dtmfCmdSuccess);
$("#sw_valid_asib_value").text(data.data.swDTO.dataValidASIB);
$("#sw_valid_eps_value").text(data.data.swDTO.dataValidEPS);
$("#sw_valid_pa_value").text(data.data.swDTO.dataValidPA);
$("#sw_valid_rf_value").text(data.data.swDTO.dataValidRF);
$("#sw_valid_mse_value").text(data.data.swDTO.dataValidiMTQ);
$("#sw_valid_ants_bus_b_value").text(data.data.swDTO.dataValidAntsBusB);
$("#sw_valid_ants_bus_a_value").text(data.data.swDTO.dataValidAntsBusA);
$("#sw_eclipse_mode_value").text(data.data.swDTO.inEclipseMode);
$("#sw_safe_mode_value").text(data.data.swDTO.inSafeMode);
$("#sw_hardware_abf_value").text(data.data.swDTO.hardwareABFOnOff);
$("#sw_software_abf_value").text(data.data.swDTO.softwareABFOnOff);
$("#sw_deployment_wait_value").text(data.data.swDTO.deploymentWait);
$("#site_list").html('');
var items = [];
$.each(data.data.sites, function (id, site) {
items.push('<li>' + site + '</li>');
});
$("#site_list").append(items.join(''));
}
function refreshDiv(){
var url = "/funcube/data/realtime";
$.ajax({
type: "GET",
url: url,
contentType: "application/json",
accepts : "application/json",
dataType: "jsonp",
crossDomain: true,
cache: false,
success : handleRealtime
});
}
</script>
</body>
</html>
可以看出,数据是通过ajax动态获取的,每隔10s更新一次,直接分析html网页并不能拿到数据。
通过F12分析其定时产生的请求:
可以发现,每次请求的url都是不同的,有时是url最后一个数字加一,有时url前面的数字也会改变,所以不太容易模拟浏览器发送请求的过程,原本想使用selenium来着,发现不太好实现。
上网查资料发现了Ajax-hook,可以实现请求和响应的拦截。but自己太菜。
有空还得看一下:
Ajax-hook 原理解析
[Python3爬虫]Ajax请求信息的爬取
后来想起可以用Wireshark抓包先看一下:
于是可以使用sniffer实现,只需要控制好过滤条件就好了。
去GitHub上一搜,找到一个Python-Packet-Sniffer,简单易懂,稍微修改一下就可以拿到想要的数据了(数据是通过gzip压缩的,要自己解压一下:python处理gzip数据)。
解析json字符串:Python中JSON的基本使用
uncompress_data = gzip.decompress(compressed_data).decode('utf-8')
begin_index = uncompressed_data.find('(')
end_index = uncompressed_data.rfind(')')
json_data = uncompressed_data[begin_index+1:end_index]
print(json_data)
{'data': {'latLong': 'Satellite Latitude, Longitude: 42.8 N, 42.2 E', 'sequenceNumber': 1866066, 'frameType': 16, 'createdDate': 'Data received: 2020-12-21 13:47:15.0', 'satelliteTime': '2020-12-25 10:11:36.0', 'latitude': 42.79, 'longitude': 42.19, 'sites': ['SP5ULN'], 'packetCount': 8882079, 'epsDTO': {'panelVolts1': 4315, 'panelVolts2': 4293, 'panelVolts3': 4186, 'totPhotoCurr': 179, 'batteryVolts': 8292, 'totSystemCurr': 146, 'rebootCount': 1377, 'epsSwErrors': 0, 'boostTemp1': 28, 'boostTemp2': 28, 'boostTemp3': 28, 'batteryTemp': 28, 'latchUpCount5v': 0, 'latchUpCount3v3': 0, 'resetCause': 3, 'pptMode': 1}, 'asibDTO': {'sunSensorX': '0.0', 'sunSensorY': '2.2', 'sunSensorZ': '2.9', 'solXPlus': '18.7', 'solXMinus': '38.6', 'solYPlus': '29.1', 'solYMinus': '20.1', 'busVolts3v3': '3276.0', 'busCurr3v3': '139.0', 'busVolts5': '4956.0'}, 'rfDTO': {'rxDoppler': 156, 'rxRSSI': 183, 'rxTemp': '30.8', 'rxCurr': 41, 'txBusCurr3v3': 59, 'txBusCurr5v': 29}, 'paDTO': {'txRevPwr': '48.9', 'txFwdPwr': '7.2', 'txTemp': '41.1', 'txCurr': '42.7'}, 'antsDTO': {'antTemp0': '18.9', 'antTemp1': '20.1', 'antDepl0': 'Deployed', 'antDepl1': 'Deployed', 'antDepl2': 'Deployed', 'antDepl3': 'Deployed'}, 'swDTO': {'dtmfCmdCount': 36, 'dtmfLastCmd': 'Fitter Copy', 'dtmfCmdSuccess': 'YES', 'dataValidASIB': 'YES', 'dataValidEPS': 'YES', 'dataValidPA': 'YES', 'dataValidRF': 'YES', 'dataValidiMTQ': 'YES', 'dataValidAntsBusB': 'YES', 'dataValidAntsBusA': 'YES', 'inEclipseMode': 'YES', 'inSafeMode': 'NO', 'hardwareABFOnOff': 'ON', 'softwareABFOnOff': 'Off', 'deploymentWait': 'NO'}, 'minima': ['2079', '1939', '978', '77', '8262', '137', '1377', '0', '21', '22', '22', '22', '0', '0', '3', '1', '0', '0', '0', '8', '11', '10', '10', '3272', '131', '4956', '131', '181', '24', '40', '57', '28', '4', '19', '35', '30', '9', '8', '0', '0', '0', '0', '0'], 'maxima': ['4762', '4689', '4583', '280', '8303', '238', '1377', '0', '31', '30', '30', '29', '0', '0', '3', '1', '4', '4', '4', '45', '43', '41', '43', '3280', '185', '4962', '182', '183', '33', '42', '81', '29', '51', '453', '44', '121', '31', '30', '0', '0', '0', '0', '0']}}