1. numpy+kmeans处理数组:
原文本是3维数据(点id, x坐标,y坐标),要读入后两维,组成向量后kmeans进行聚类,后得到一维标签,存入txt中
>>>import numpy
>>> from sklearn.cluster import KMeans
>>> kmean=KMeans(6)
>>> a=numpy.loadtxt('traffic_vec2D.txt',usecols=(1,2))
>>> result=kmean.fit_predict(a)
>>> numpy.savetxt('tra_label6.txt',result,fmt='%d')
2. id 与标签对应生成txt文件
因为数据格式要求为:id (label 1),所以用python代码将一维数据转换一下。(ToIdLabel())
3. 用学长的FDandPicture.py画图、得到各区域POI值
4. 把POI值导入到csv中(ToCsv())
5. 计算熵值(excel中计算)
以下是两个处理的工具函数:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'standared mudule'
__author__ = 'Tree'
import csv
def ToCsv():
f_r=open('DF_random9_original.txt','r')
csvfile=open('DF_random9_original.csv','a')
writer=csv.writer(csvfile)
all_line=f_r.readlines()
for line in all_line:
line=line.strip()
line=line.split(':')
if line[0].isdigit():
writer.writerow([int(line[0])])
else:
writer.writerow([line[0],float(line[1])])
f_r.close()
csvfile.close()
def ToIdLabel():
f_r1=open('traffic_vec2D.txt','r')
f_r2=open('tra_label7.txt','r')
f_w=open('tra_label7_ordered.txt','w')
all_line1=f_r1.readlines()
all_line2=f_r2.readlines()
for i in range(813):
line1=all_line1[i].strip()
line1=line1.split(' ')
id=int(line1[0])
line2=all_line2[i].strip()
line2=line2.split(' ')
label=int(line2[0])
f_w.write(str(id)+' ('+str(label)+' 1)\n')
f_r2.close()
f_r1.close()
f_w.close()
if __name__=='__main__':
ToCsv()