python常用导入函数
from IPython. display import display
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import warnings
warnings. filterwarnings( 'ignore' )
from PIL import Image
import matplotlib. pyplot as plt
% matplotlib inline
plt. rcParams[ 'figure.figsize' ] = ( 14.0 , 8.0 )
plt. figure( figsize = ( 12 , 6 ) )
plt. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
plt. rcParams[ 'axes.unicode_minus' ] = False
plt. axes( aspect= 'equal' )
date_format = mpl. dates. DateFormatter( "%m-%d" )
ax. xaxis. set_major_formatter( date_format)
xlocator = mpl. ticker. MultipleLocator( 7 )
ax. xaxis. set_major_locator( xlocator)
plt. xticks( rotation= 45 )
plt. style. use( 'ggplot' )
plt. subplots_adjust( hspace= 0.6 , wspace= 0.3 )
% config ZMQInteractiveShell. ast_node_interactivity= 'all'
from scipy import interp
from selenium import webdriver
path = "D:/box/chromedriver_win32/chromedriver"
browser = webdriver. Chrome( executable_path= path, options= webdriver. ChromeOptions( ) )
browser. get( 'http://www.baidu.com' )
from sklearn. tree import export_graphviz
import os
os. environ[ "PATH" ] += os. pathsep + 'D:/software/graphviz-2.38/release/bin/'
from sklearn. model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size= 0.3 )
from sklearn. preprocessing import StandardScaler
s = StandardScaler( )
x_train = s. fit_transform( X_train)
x_test = s. transform( X_test)
tf. logging. set_verbosity( tf. logging. INFO)
解压缩zip并读取csv文件
import pandas as pd
pd. set_option( 'display.max_columns' , 500 )
import zipfile
with zipfile. ZipFile( 'KaggleCredit2.zip' , 'r' ) as z:
f = z. open ( 'KaggleCredit2.csv' )
data = pd. read_csv( f, index_col= 0 )
data. head( )
查看缺失值
data. isnull( )
data. isnull( ) . sum ( axis= 0 )
data. dropna( )
data. dropna( inplace= True )
data. dropna( axis = 1 )
data. dropna( how = 'all' )
data. dropna( thresh = n)
data. dropna( subset = [ 'C' ] )
权重系数取绝对值后排序(查看特征权重重要度)
pd. Series( lr. coef_[ 0 ] , index= X. columns)
pd. Series( np. abs ( lr. coef_[ 0 ] ) , index= X. columns) . sort_values( ascending= False )