?
import numpy as np
import pandas as pd
features_list_temp = [ x for x in df_tmp. columns if ( 'norm' in str ( x) ) == True ]
features_list = [ x. replace( '_norm' , '' ) for x in features_list_temp]
features = df_tmp[ features_list]
补集
idx_test = np. random. randint( data_num, size= test_num)
session_ymd_test = session_ymd_uniq[ idx_test]
idx_train = np. array( list ( set ( np. arange( data_num) ) - set ( idx_test) ) )
select 行 where某一列的值在某个array中
feature[ feature[ 'session_ymd' ] . isin( [ 3 , 4 , 9 ] ) ]
转换类型・连接两列(字符)
dataframe[ "a" ] . map ( str )
dataframe[ "a" ] . map ( str ) + ' ' + dataframe[ "b" ]
numpy -> pandas
df_pred = pd. DataFrame( data= {
'pop_pred' : pop_pred} )
df_pred = pd. DataFrame( data= {
'pop_pred' : pop_pred} , index= data_test_cur. index)
pandas -> numpy
a = df_pred. values
a. tolist( )
groupby
grouped = df_data. groupby( "session_ymd" )
grouped = rawdata. groupby( [ rawdata[ "sessionid" ] , rawdata[ "ymd" ] ] )
for session_ymd, df_group in grouped:
df_group. sort_va