使用cities_10.csv进行降维练习
import matplotlib. pyplot as plt
import os
import pandas as pd
% matplotlib inline
os. chdir( 'Q:/data' )
pd. set_option( 'display.max_columns' , None )
cities = pd. read_csv( 'cities_10.csv' , encoding= 'gbk' )
cities
AREA X1 X2 X3 X4 X5 X6 X7 X8 X9 0 辽宁 5458.2 13000 1376.2 2258.4 1315.9 529.0 2258.4 123.7 399.7 1 山东 10550.0 11643 3502.5 3851.0 2288.7 1070.7 3181.9 211.1 610.2 2 河北 6076.6 9047 1406.7 2092.6 1161.6 597.1 1968.3 45.9 302.3 3 天津 2022.6 22068 822.8 960.0 703.7 361.9 941.4 115.7 171.8 4 江苏 10636.3 14397 3536.3 3967.2 2320.0 1141.3 3215.8 384.7 643.7 5 上海 5408.8 40627 2196.2 2755.8 1970.2 779.3 2035.2 320.5 709.0 6 浙江 7670.0 16570 2356.5 3065.0 2296.6 1180.6 2877.5 294.2 566.9 7 福建 4682.0 13510 1047.1 1859.0 964.5 397.9 1663.3 173.7 272.9 8 广东 11769.7 15030 4224.6 4793.6 3022.9 1275.5 5013.6 1843.7 1201.6 9 广西 2455.4 5062 367.0 995.7 542.2 352.7 1025.5 15.1 186.7
from sklearn. decomposition import PCA, FastICA
cities_ica = FastICA( n_components= 2 , whiten= True )
ica_trans = cities_ica. fit( cities. iloc[ : , 1 : ] )
cities_X_ica = ica_trans. transform( cities. iloc[ : , 1 : ] )
pd. DataFrame( cities_X_ica)
0 1 0 0.117789 -0.114247 1 0.119293 0.357979 2 0.251025 -0.078474 3 -0.163078 -0.446406 4 0.023720 0.365038 5 -0.833090 -0.130078 6 -0.026661 0.104525 7 0.109146 -0.204617 8 -0.016233 0.542019 9 0.418088 -0.395740
% matplotlib inline
import matplotlib. pyplot as plt
import numpy as np
plt. scatter( cities_X_ica[ : , 0 ] , cities_X_ica[ : , 1 ] )
for x, y, s in np. vstack( [ cities_X_ica[ : , 0 ] , cities_X_ica[ : , 1 ] , np. arange( 10 ) ] ) . T:
plt. text( x, y, s)
plt. show( )
[外链图片转存 (img-kGZbe9ui-1562726815212)(output_4_0.png)]