目前在做的一个实验的数据集包含了两万个json文件,为了能高效读取所有的json文件并转换成dataframe格式以实现端到端的数据处理,不得不写了一个小工具:
try:
import ujson as json
except ImportError:
try:
import simplejson as json
except ImportError:
import json
import pandas as pd
import os
class data_utils():
def __init__(self, data_path):
self.data_path = data_path
def to_dataframe(self):
radar_dataframe = pd.DataFrame()
all_files = [os.path.join(root, file) for root, dirs, files in os.walk(self.data_path) for file in files if file.endswith('.json')]
data_list = [json.load(open(file)) for file in all_files]
df = pd.DataFrame(data_list)
return df