Python聚类分析
导入类模块
from sklearn. cluster import KMeans
import matplotlib. pyplot as plt
import numpy as np
import pandas as pd
import os
os. chdir( r'' )
datafile= r'air_data-utf8.csv'
读数据
data = pd. read_csv( datafile, encoding= 'utf-8' , header= 0 )
查看数据列
print ( data. columns)
Index(['MEMBER_NO', 'FFP_DATE', 'FIRST_FLIGHT_DATE', 'GENDER', 'FFP_TIER',
'WORK_CITY', 'WORK_PROVINCE', 'WORK_COUNTRY', 'AGE', 'LOAD_TIME',
'FLIGHT_COUNT', 'BP_SUM', 'EP_SUM_YR_1', 'EP_SUM_YR_2', 'SUM_YR_1',
'SUM_YR_2', 'SEG_KM_SUM', 'WEIGHTED_SEG_KM', 'LAST_FLIGHT_DATE',
'AVG_FLIGHT_COUNT', 'AVG_BP_SUM', 'BEGIN_TO_FIRST', 'LAST_TO_END',
'AVG_INTERVAL', 'MAX_INTERVAL', 'ADD_POINTS_SUM_YR_1',
'ADD_POINTS_SUM_YR_2', 'EXCHANGE_COUNT', 'avg_discount',
'P1Y_Flight_Count', 'L1Y_Flight_Count', 'P1Y_BP_SUM', 'L1Y_BP_SUM',
'EP_SUM', 'ADD_Point_SUM', 'Eli_Add_Point_Sum', 'L1Y_ELi_Add_Points',
'Points_Sum', 'L1Y_Points_Sum', 'Ration_L1Y_Flight_Count',
'Ration_P1Y_Flight_Count', 'Ration_P1Y_BPS', 'Ration_L1Y_BPS',
'Point_NotFlight'],
dtype='object')
查看数据
print ( data. head( ) )
MEMBER_NO FFP_DATE FIRST_FLIGHT_DATE GENDER FFP_TIER WORK_CITY \
0 54993 2006/11/02 2008/12/24 男 6 .
1 28065 2007/02/19 2007/08/03 男 6 NaN
2 55106 2007/02/01 2007/08/30 男 6 .
3 21189 2008/08/22 2008/08/23 男 5 Los Angeles
4 39546 2009/04/10 2009/04/15 男