首先通过elasticdump组件将线上指定的index导出成文本文件
elasticdump --input=“http://192.168.0.30:9200/adv_default” --output=“/bigdata/adv_default.data” --type=data --li
————————————————
数据结构如下
{
"_id": "LzGZS5IBxuIn9SX3wAIs",
"_index": "ods_data_vsahuman_202410",
"_score": 1,
"_source": {
"ageRegion": "1",
"capStyle": "0",
"cateCode": "vsahuman",
"coatColor": "1",
"coatStyle": "0",
"coatTexture": "0",
"collectPicture": false,
"collectTime": "2024-10-02 13:00:20",
"dataId": "1841342429132345344",
"deviceID": "61030300002006000212",
"deviceName": "XX路人民保险公司门口",
"deviceNo": "61030300002006000212",
"genderCode": "1",
"glasses": "0",
"hairStyle": "2",
"hat": "0",
"inStorage": false,
"locationMarkTime": "1727845208000",
"personID": "1841342429132345344",
"sourceCode": "zhongke",
"sourceType": 1,
"standardCode": "video",
"storageUrl1": "http://XX.45.58.111:9000/citmsism-person/2024/10/02/13/00/41ec75053b2de4e5b7d474dd3ac5ee9b.jpg",
"storageUrl2": "http://XX.45.58.111:9000/citmsxva/202410/02/1300/JSJkbKcrEy20241002T130008ZVQmk.jpg",
"storageUrl3": "http://XX.45.58.111:9000/citmsxva/202410/02/1300/JSJkbKcrEy20241002T130008ZVQmk.jpg",
"transformTime": "2024-10-02 13:00:08",
"trousersColor": "1"
},
"_type": "citms_data"
}
方法1-利用java将文件转为excel
public static void main(String[] args) throws IOException {
// 假设data文件是一个文本文件,每行是一个Excel行
File dataFile = new File("C:\\Users\\tytu\\Desktop\\fsdownload\\es-data\\ods_data_vsanonmotorvehicle_202410.data");
Workbook workbook = new XSSFWorkbook();
Sheet sheet = workbook.createSheet("Data");
// 读取data文件并写入Excel
try (BufferedReader reader = new BufferedReader(new FileReader(dataFile))) {
int rowNum = 0;
while (reader.ready()) {
String json = reader.readLine();
JSONObject jsonObject = JSONObject.parseObject(json, JSONObject.class);
JSONObject source = jsonObject.getJSONObject("_source");
Row row = sheet.createRow(rowNum);
if (rowNum == 0) {
//标题栏
int colNum2 = 0;
for (String s : source.keySet()) {
Cell cell = row.createCell(colNum2);
cell.setCellValue(s);
++colNum2;
}
} else {
//数据栏
int colNum3 = 0;
for (Object value : source.values()) {
Cell cell = row.createCell(colNum3);
if (value == null) {
cell.setCellValue("");
} else {
cell.setCellValue(value.toString());
}
++colNum3;
}
}
++rowNum;
}
}
// 写入Excel文件
String f= dataFile.getPath();
String substring = f.substring(0, f.lastIndexOf("."));
try (FileOutputStream outputStream = new FileOutputStream(substring+".xlsx")) {
workbook.write(outputStream);
}
System.out.println("Data written to Excel successfully.");
}
方法二.通过python脚本
import json
import pandas as pd
json_file_path = 'C:\\Users\\tytu\\Desktop\\fsdownload\\ods_data_vsahuman_202410.data'
with open(json_file_path, 'r', encoding='utf-8') as file:
json_data = file.readlines()
data_list = []
for line in json_data:
json_line = json.loads(line)
source_data = json_line['_source']
data_list.append(source_data)
df = pd.DataFrame(data_list)
excel_output_file = json_file_path.split('.')[0] + '.xlsx'
df.to_excel(excel_output_file, index=False)