import pandas as pd
import json
df = pd.read_excel('C:/Users/1/Desktop/1.xls',index_col=0)
df.fillna("", inplace=True)
'''
# fillna 自动填充NAN空值,可设置为用""填充
'''
# for name, group in df.groupby(['TABLE_NAME','COLUMN_NAME'])['COMMENTS','DATA_TYPE','DATA_LENGTH','DATA_PRECISION']:
# print ("name: ", name)
# print ("group:", group)
s = df.groupby(['TABLE_NAME','COLUMN_NAME'])['COMMENTS','DATA_TYPE','DATA_LENGTH','DATA_PRECISION'].agg(list)
'''
# 按照'TABLE_NAME','COLUMN_NAME'两个特征聚类,聚类结果只保留'COMMENTS','DATA_TYPE','DATA_LENGTH','DATA_PRECISION'这几个特征
# 聚合结果value的数据类型是list
'''
# for name, group in s:
# print ("name: ", name)
# print ("group:", group)
# print (s)
# print(s.index.levels)
d = {level: s.xs(level).to_dict('index') for level in s.index.levels[0]}
'''
# dataframe.index() 得到 'TABLE_NAME','COLUMN_NAME'两个聚类标准的结果,结果为list
# dataframe.index().levels[0] 取结果的[0]元素
# dataframe.xs(level) 获取具体level下的value值
'''
# print(s.xs('A1'))
# print("******")
# print(s.xs('A1').to_dict('index'))
'''
# DataFrame.to_dict (self, orient='dict’, into=) — 官方文档
# orient =‘index’ ,转化后的字典形式:{index(值) : {column(列名) : value(值)}};
'''
j = json.dumps(d)
result
COMMENTS DATA_TYPE DATA_LENGTH DATA_PRECISION
COLUMN_NAME
NAME [] [VARCHAR2] [200] []
VIN [] [VARCHAR2] [17] []
******
{'NAME': {'COMMENTS': [''], 'DATA_TYPE': ['VARCHAR2'], 'DATA_LENGTH': [200], 'DATA_PRECISION': ['']}, 'VIN': {'COMMENTS': [''], 'DATA_TYPE': ['VARCHAR2'], 'DATA_LENGTH': [17], 'DATA_PRECISION': ['']}}
筛选含有Nan值的行
import pandas as pd
import numpy as np
df = pd.DataFrame({
'A': [1, 2, 3, np.nan, 5],
'B': [np.nan, 2, np.nan, 4, 5],
'C': [np.nan, 2, 3, 4, 5]
})
mask = df.isnull().any(axis=1) # 找到含有NaN的行
result = df[mask] # 根据mask进行筛选
将某列中Nan值所在的行打印出来
df[df['B'].isnull().values==True]
df[df.isnull().values==True],这条语句只适用于一行中最多有一个缺失值的情况,如果一行中有n个缺失值,那么会重复输出n行同样的记录