1. pandas读取 没有标题栏的数据,添加标题栏的方法(csv文件)
header = [ "A" , "B" , "C" , "D" , "E" , "F" , "G" , "H" ]
df = pd. read_csv( 'sy_yic_result_2020.csv' , header= None , names= header)
2. pandas两列相加或者拼接转换成一列
df[ "I" ] = df[ "B" ] + df[ "G" ]
3. 存入mongodb
db = conn[ 'pandas_db' ]
col = db[ 'test' ]
data = df. to_dict( orient= 'record' )
col. insert_many( data)
4. pandas对某一列数据进行处理
def add_uuid ( data) :
"""
对字符串进行加密
:return:
"""
data = uuid. uuid3( uuid. NAMESPACE_DNS, data)
data = str ( data)
result_data = data. replace( '-' , '' )
return result_data
df[ "I" ] = df[ "B" ] + df[ "G" ]
df[ "I" ] = df. apply ( lambda x: add_uuid( x[ "I" ] ) , axis= 1 )
5. 创建空的DataFrame
result = pd. DataFrame( )
6. dataFrame 转化成字典, 列表
items = df. to_dict( orient= 'records' )
items = df. values. tolist( )
7. pandas统计某一列中各个值的出现次数
df_train. loc[ : , 'label' ] . value_counts( )
8. 获取每一行最大值, 及对应的列索引
df[ 'max_values' ] = df. max ( axis= 1 )
df[ 'max_index' ] = df. idxmax( axis= 1 )
9. 二维DataFrame对象的拼接
def make_df ( cols, index) :
data = { c: [ str ( c) + str ( i) for i in index] for c in cols}
return pd. DataFrame( data, index= index)
df1 = make_df( 'AB' , [ 1 , 2 ] )
print ( df1)
df2 = make_df( 'ABc' , [ 3 , 4 , 5 ] )
print ( df2)
df = pd. concat( [ df1, df2] )
print ( df)