k-mean算法的主类
package kmeans;
import java.math.*;
import java.sql.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Scanner;
import MySQL.ChaZhao;
public class main {
public static void main(String[] args)
{
Scanner s = new Scanner(System.in);
//初始化一个Kmean对象,设置k
int num;
//System.out.println("输入要分为的类数:");
//num=s.nextInt();
num = 10;
k_means k=new k_means(num);
ArrayList<float[]> dataSet=new ArrayList<float[]>();
Connection conn = ChaZhao.getconn();
dataSet = ChaZhao.select(conn);
ChaZhao.close();
//设置原始数据集
k.setDataSet(dataSet);
//执行算法
k.kmeans();
//得到聚类结果
ArrayList<ArrayList<float[]>> cluster=k.getCluster();
//查看结果
//分类坐标结果打印
k.printCenter();
//数据分布结果
System.out.println(cluster.size());
double d = 0;
double m = 1200;//12000条数据 12000/16=750
//12000/10 = 1200
double s1;
for(int i=0;i<cluster.size();i++)
{ System.out.print("k="+i+":");
int n = cluster.get(i).size();
System.out.println(n);
d += (n-m)*(n-m);
}
BigDecimal dd = new BigDecimal(d);
s1 = dd.divide(new BigDecimal(16)).doubleValue();
System.out.println("s:"+s1);
/*
for(int i=0;i<cluster.size();i++)
{ System.out.println("k="+i);
k.printDataArray(cluster.get(i));
}
*/
dataSet.clear();
cluster.clear();
k.Clean();
}
}
计算k-means中心值时的k-means算法,计算出所给数据所属的分类,更新入数据库
package kmeans;
import java.util.ArrayList;
import java.util.Random;
public class k_means {
private int k;// 分成多少簇
private int m;// 迭代次数
private int dataSetLength;// 数据集元素个数,即数据集的长度
private ArrayList<float[]> dataSet;// 数据集链表 一组数据(x,y,z)存入一个数组中
//一个动态手势30组数据,一个手势需要100组,也就是3000组数据
private ArrayList<float[]> center;// 中心链表
private ArrayList<ArrayList<float[]>> cluster; // 簇
private ArrayList<Float> jc;// 误差平方和,k越接近dataSetLength,误差越小
private Random random;
public void Clean(){
dataSet.clear();
center.clear();
cluster.clear();
jc.clear();
}
//DataSet的set方法
public void setDataSet(ArrayList<float[]> dataSet) {
//设置需分组的原始数据集
this.dataSet = dataSet;
}
//Cluster的get方法
public ArrayList<ArrayList<float[]>> getCluster() {
return cluster;
}
//构造方法传入需要分成的簇数量
public k_means(int k) {
if (k <= 0) {
k = 1;
}
this.k = k;
}
private void init() { //初始化
m = 0;
random = new Random();
if (dataSet == null || dataSet.size() == 0) {
System.out.println("数据为空,请输入数据!!!!");
} else{
dataSetLength = dataSet.size();
if (k > dataSetLength) {
k = dataSetLength;
}
center = initCenters();
cluster = initCluster();
jc = new ArrayList<Float>();
}
}
private ArrayList<float[]> initCenters() {//初始化中心数据链表,分成多少簇就有多少个中心点
ArrayList<float[]> center = new ArrayList<float[]>();
int[] randoms = new int[k];
boolean flag;
int temp = random.nextInt(dataSetLength);
randoms[0] = temp;
for (int i = 1; i < k; i++) {
flag = true;
while (flag) {
temp = random.nextInt(dataSetLength);
int j = 0;
while (j < i) {
if (temp == randoms[j]) {
break;
}
j++;
}
if (j == i) {
flag = false;
}
}
randoms[i] = temp;
}
for (int i = 0; i < k; i++) {
center.add(dataSet.get(randoms[i]));// 生成初始化中心链表
}
return center;
}
private ArrayList<ArrayList<float[]>> initCluster() {//初始化簇集合
ArrayList<ArrayList<float[]>> cluster = new ArrayList<ArrayList<float[]>>();
//ArrayList里面套ArrayList,之前的cluster是Array
for (int i = 0; i < k; i++) {
cluster.add(new ArrayList<float[]>());//开辟空间 创建对象
}
return cluster;
}
//需要根据维度改写距离公式,需要改动
private float distance(float[] element, float[] center) {
//计算两个点之间的距离
float distance = 0.0f;
float x = element[0] - center[0];
float y = element[1] - center[1];
float z = x * x + y * y;
distance = (float) Math.sqrt(z);
/*
float distance = 0.0f;
float x = element[0] - center[0];
float y = element[1] - center[1];
float z = element[2] - center[2];
float m = element[3] - center[3];
float e = element[4] - center[4];
*/
//float n = x * x + y * y + z * z + m * m + e*e;
float n = x * x + y * y;
distance = (float) Math.sqrt(n);
return distance;
}
private int minDistance(float[] distance) {//获取坐标,距离集合中最小距离的位置
float minDistance = distance[0];
int minLocation = 0;
for (int i = 1; i < distance.length; i++) {
if (distance[i] < minDistance) {
minDistance = distance[i];
minLocation = i;
}
else if (distance[i] == minDistance){ // 如果相等,随机返回一个位置
if (random.nextInt(10) < 5) { //使用随机数来实现随机返回
minLocation = i;
}
}
}
return minLocation;
}
private void clusterSet() {//将当前元素放到最小距离中心相关的簇中
float[] distance = new float[k];
//System.out.println(dataSetLength);
for (int i = 0; i < dataSetLength; i++) {
for (int j = 0; j < k; j++) {//计算数据集中,每个数据,到簇中心的距离
distance[j] = distance(dataSet.get(i), center.get(j));
}
int minLocation = minDistance(distance);//找出距离最小的的坐标
/*
* 输出坐标在这里产生
*/
cluster.get(minLocation).add(dataSet.get(i));//将数据放到对应坐标集合的位置中
}
}
private float errorSquare(float[] element, float[] center) {
//求两点误差平方的方法
float x = element[0] - center[0];
float y = element[1] - center[1];
float errSquare = x * x + y * y;
/*
float x = element[0] - center[0];
float y = element[1] - center[1];
float z = element[2] - center[2];
float n = element[3] - center[3];
float e = element[4] - center[4];
*/
//float errSquare = x * x + y * y + z * z + n * n + e * e;
return errSquare;
}
private void countRule() {//计算误差平方和准则函数方法
float jcF = 0;
for (int i = 0; i < cluster.size(); i++) {
for (int j = 0; j < cluster.get(i).size(); j++) {
jcF += errorSquare(cluster.get(i).get(j), center.get(i));
}
}
jc.add(jcF);
}
private void setNewCenter() {
//设置新的簇中心方法
for (int i = 0; i < k; i++) {
int n = cluster.get(i).size();
if (n != 0) {
float[] newCenter = { 0, 0 ,0,0,0};
for (int j = 0; j < n; j++) {
newCenter[0] += cluster.get(i).get(j)[0];
newCenter[1] += cluster.get(i).get(j)[1];
//newCenter[2] += cluster.get(i).get(j)[2];//补充代码针对三维坐标
// newCenter[3] += cluster.get(i).get(j)[3];//
//newCenter[4] += cluster.get(i).get(j)[4];//补充代码针对五维坐标
}
// 设置一个平均值
newCenter[0] = newCenter[0] / n;
newCenter[1] = newCenter[1] / n;
//newCenter[2] = newCenter[2] / n;
//newCenter[3] = newCenter[3] / n;
//newCenter[4] = newCenter[4] / n;
center.set(i, newCenter);
}
}
}
public void printDataArray(ArrayList<float[]> dataArray) {
//打印数据
for (int i = 0; i < dataArray.size(); i++) {
/*
System.out.println("print:(" + dataArray.get(i)[0] + "," + dataArray.get(i)[1] +","+
dataArray.get(i)[2]+","+dataArray.get(i)[3]+","+dataArray.get(i)[4]+")");
*/
System.out.println("print:(" + dataArray.get(i)[0] + "," + dataArray.get(i)[1] +")");
}
System.out.println("===================================");
}
public void printCenter(){
for(int i = 0;i < center.size(); i++){
/*
System.out.println("k"+i+":("+center.get(i)[0] +","+ center.get(i)[1] +","+
center.get(i)[2]+","+center.get(i)[3]+","+center.get(i)[4]+")");
*/
System.out.println("k"+i+":("+center.get(i)[0] +","+ center.get(i)[1] +")");
}
}
void kmeans() {
init();
// 循环分组,直到误差不变为止
int num = 0;
while (true) {
clusterSet();
countRule();
// 误差不变了,分组完成
if (m != 0) {
if (jc.get(m) - jc.get(m - 1) == 0) {
break;
}
}
setNewCenter();
m++;
cluster.clear();
cluster = initCluster();
}
}
}
k-means测试算法主方法
package kmeans;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.Iterator;
import Features.FirstProcessing;
import MySQL.ChaRu;
import MySQL.ChaZhao;
/*
* 查找特征向量,计算特征值并更新表中特征值
*/
public class main01 {
public static void main(String[] args) {
//集合中以数组为单位存储,每个数组中存着?维坐标
ArrayList<float[]> dataSet=new ArrayList<float[]>();
Connection conn1 = ChaZhao.getconn();
dataSet = ChaZhao.select(conn1);
ChaZhao.close();
Connection conn2 = ChaRu.getconn();
Test_k_means.fuzhi1();
Test_k_means.fuzhi2();
Test_k_means.fuzhi3();
Test_k_means.fuzhi4();
Iterator it = dataSet.iterator();
float[] data;
int[] k = new int[4] ;
float[] data1 = new float[3];
float[] data2 = new float[5];
float[] data3 = new float[5];
float[] data4 = new float[2];
int flag = 4;
while(it.hasNext()){
data = (float[]) it.next();
for(int i=2,j=0;i<5;i++,j++){
data1[j] = data[i];
}
for(int i=5,j=0;i<10;i++,j++){
data2[j] = data[i];
}
for(int i=10,j=0;i<15;i++,j++){
data3[j] = data[i];
}
for(int i=15,j=0;i<17;i++,j++){
data4[j] = data[i];
}
k[0] = Test_k_means.getkt(16, 3, data1, Test_k_means.center1);
k[1] = Test_k_means.getkt(14, 5, data2, Test_k_means.center2);
k[2] = Test_k_means.getkt(10, 5, data3, Test_k_means.center3);
k[3] = Test_k_means.getkt(10, 2, data4, Test_k_means.center4);
ChaRu.insert(conn2,(int)data[0],(int)data[1],k,flag);
}
/*
Test_k_means.fuzhi1();
//float[] data1 = {(float) -0.015048233299745577,(float) 0.009071020859739883,(float) -1.5238239430724612};
float[] data1 = {(float) -0.037566654703485844,(float) -0.013700087129106654,(float) 1.561930435068692};
int k = Test_k_means.getkt(16, 3, data1, Test_k_means.center1);
System.out.println("k="+k);
*/
ChaRu.close();
}
}
测试用的k-means方法类
package kmeans;
import java.util.ArrayList;
import java.util.Random;
public class Test_k_means {
static ArrayList<float[]> center1 = new ArrayList<float[]>();
static ArrayList<float[]> center2 = new ArrayList<float[]>();
static ArrayList<float[]> center3 = new ArrayList<float[]>();
static ArrayList<float[]> center4 = new ArrayList<float[]>();
static float[] ct1 = {(float) -0.36549547,(float) -0.19680291,(float) 1.3342463};
static float[] ct2 = {(float) -0.26852378,(float) -0.03381751,(float) 1.0894369};
static float[] ct3 = {(float) -0.16302495,(float) -0.19998512,(float) 0.329478};
static float[] ct4 = {(float) -0.87378883,(float) -0.32445675,(float) 1.2311304};
static float[] ct5 = {(float) -0.14907424,(float) -0.066931404,(float) 1.442742};
static float[] ct6 = {(float) -0.11564034,(float) -0.28077325,(float) -0.4544813};
static float[] ct7 = {(float) -1.2184348,(float) -0.24974133,(float) 1.1926856};
static float[] ct8 = {(float) -0.59237033,(float) -0.18715477,(float) 1.1560123};
static float[] ct9 = {(float) -0.052315436,(float) -0.15902035,(float) -1.2516569};
static float[] ct10 = {(float) -0.715085,(float) -0.34694636,(float) -1.4817164};
static float[] ct11 = {(float) -0.0319672,(float) 0.123795904,(float) 1.205883};
static float[] ct12 = {(float) -0.059195306,(float) -0.06899053,(float) -1.4732516};
static float[] ct13 = {(float) -0.06828567,(float) -0.27861646,(float) -0.9786703};
static float[] ct14 = {(float) 0.06383231,(float) 0.06881439,(float) 1.4630694};
static float[] ct15 = {(float) 0.38046,(float) 0.086474285,(float) -1.3731346};
static float[] ct16 = {(float) -0.6380327,(float) -0.3705011,(float) 1.3980854};
static float[] ctt1 = {(float) 19.124573,(float) 19.942137,(float) 20.352076,(float) 20.683052,(float) 20.818832};
static float[] ctt2 = {(float) 19.998987,(float) 20.386002,(float) 20.081959,(float) 20.245035,(float) 20.507982};
static float[] ctt3 = {(float) 18.835457,(float) 18.02001,(float) 18.01416,(float) 18.275656,(float) 18.509668};
static float[] ctt4 = {(float) 17.78561,(float) 17.379637,(float) 17.538177,(float) 17.822744,(float) 17.819603};
static float[] ctt5 = {(float) 18.246813,(float) 18.072685,(float) 18.26946,(float) 18.531868,(float) 18.765728};
static float[] ctt6 = {(float) 18.83908,(float) 18.665567,(float) 18.598078,(float) 18.782398,(float) 19.004646};
static float[] ctt7 = {(float) 19.299448,(float) 19.514294,(float) 19.550995,(float) 19.709429,(float) 19.876785};
static float[] ctt8 = {(float) 0.0,(float) 0.0,(float) 0.0,(float) 0.0,(float) 0.0};
static float[] ctt9 = {(float) 19.147707,(float) 19.63055,(float) 18.349651,(float) 16.903051,(float) 0.0};
static float[] ctt10= {(float) 18.181501,(float) 0.0,(float) 0.0,(float) 0.0,(float) 0.0};
static float[] ctt11= {(float) 18.396076,(float) 18.365593,(float) 18.652214,(float) 18.946945,(float) 19.146654};
static float[] ctt12= {(float) 17.994272,(float) 17.755035,(float) 17.938347,(float) 18.166737,(float) 18.32163};
static float[] ctt13= {(float) 18.771173,(float) 18.516142,(float) 10.54976,(float) 0.0,(float) 0.0};
static float[] ctt14= {(float) 18.899158,(float) 19.003944,(float) 19.06351,(float) 19.24987,(float) 19.461609};
static float[] cttt1 = {(float) 1.5396874,(float) 1.5743555,(float) 1.5751604,(float) 1.5739932,(float) 1.5768001};
static float[] cttt2 = {(float) 0.779205,(float) 1.5462575,(float) 1.5648361,(float) 1.5776663,(float) 1.5919755};
static float[] cttt3 = {(float) 1.6717223,(float) 1.8396813,(float) 1.8731005,(float) 1.8762144,(float) 1.8320376};
static float[] cttt4 = {(float) 0.67196554,(float) 0.7107125,(float) 0.70706266,(float) 0.7159178,(float) 0.7500223};
static float[] cttt5 = {(float) 2.1454394,(float) 2.1112318,(float) 2.0890515,(float) 2.0410147,(float) 1.9534185};
static float[] cttt6 = {(float) 2.1100385,(float) 1.7329234,(float) 1.639594,(float) 1.5791867,(float) 1.5464628};
static float[] cttt7 = {(float) 1.1653012,(float) 1.22876,(float) 1.3362818,(float) 1.3627602,(float) 1.4075104};
static float[] cttt8 = {(float) 0.84546506,(float) 0.8630724,(float) 0.8741382,(float) 0.88455427,(float) 0.91003066};
static float[] cttt9 = {(float) 0.9648166,(float) 1.0143458,(float) 1.0654656,(float) 1.113567,(float) 1.1450683};
static float[] cttt10= {(float) 2.333743,(float) 2.3290477,(float) 2.331421,(float) 2.3287027,(float) 2.3180618};
static float[] ctttt1 = {(float) 5.026384,(float) -23.825523};
static float[] ctttt2 = {(float) 17.161247,(float) 0.46476126};
static float[] ctttt3 = {(float) 3.3841784,(float) 7.9062724};
static float[] ctttt4 = {(float) 11.013154,(float) -1.2864549};
static float[] ctttt5 = {(float) 6.640934,(float) 24.923754};
static float[] ctttt6 = {(float) 8.692141,(float) -19.800547};
static float[] ctttt7 = {(float) 12.416968,(float) -23.395496};
static float[] ctttt8 = {(float) 10.311456,(float) -14.101471};
static float[] ctttt9 = {(float) 3.5202806,(float) -13.321848};
static float[] ctttt10= {(float) 5.50634,(float) -4.683569};
public static void fuzhi1(){
center1.add(ct1);center1.add(ct2);center1.add(ct3);center1.add(ct4);
center1.add(ct5);center1.add(ct6);center1.add(ct7);center1.add(ct8);
center1.add(ct9);center1.add(ct10);center1.add(ct11);center1.add(ct12);
center1.add(ct13);center1.add(ct14);center1.add(ct15);center1.add(ct16);
}
public static void fuzhi2(){
center2.add(ctt1);center2.add(ctt2);center2.add(ctt3);center2.add(ctt4);center2.add(ctt5);center2.add(ctt6);center2.add(ctt7);
center2.add(ctt8);center2.add(ctt9);center2.add(ctt10);center2.add(ctt11);center2.add(ctt12);center2.add(ctt13);center2.add(ctt14);
}
public static void fuzhi3(){
center3.add(cttt1);center3.add(cttt2);center3.add(cttt3);center3.add(cttt4);center3.add(cttt5);
center3.add(cttt6);center3.add(cttt7);center3.add(cttt8);center3.add(cttt9);center3.add(cttt10);
}
public static void fuzhi4(){
center4.add(ctttt1);center4.add(ctttt2);center4.add(ctttt3);center4.add(ctttt4);center4.add(ctttt5);
center4.add(ctttt6);center4.add(ctttt7);center4.add(ctttt8);center4.add(ctttt9);center4.add(ctttt10);
}
public static void clear(){
center1.clear();
center2.clear();
center3.clear();
center4.clear();
}
public static float distance(float[] element, float[] center,int k) {
//计算两个点之间的距离
float n = 0;
float distance = 0.0f;
for(int i=0;i<k;i++){
float x = element[i] - center[i];
n = n + x*x;
}
distance = (float) Math.sqrt(n);
return distance;
}
public static int minDistance(float[] distance) {//获取坐标,距离集合中最小距离的位置
Random random = new Random();
float minDistance = distance[0];
int minLocation = 0;
for (int i = 1; i < distance.length; i++) {
if (distance[i] < minDistance) {
minDistance = distance[i];
minLocation = i;
}
else if (distance[i] == minDistance){ // 如果相等,随机返回一个位置
if (random.nextInt(10) < 5) { //使用随机数来实现随机返回
minLocation = i;
}
}
}
return minLocation;
}
/*
* k 簇数
* kk 维数
*/
public static int getkt(int k,int kk,float[] data,ArrayList<float[]> center) {//将当前元素放到最小距离中心相关的簇中
float[] dataSet = data;
float[] distance = new float[k];
for (int j = 0; j < k; j++) {//计算数据集中,每个数据,到簇中心的距离
distance[j] = distance(dataSet, center.get(j),kk);
}
int minLocation = minDistance(distance);//找出距离最小的的坐标
return minLocation;
}
}