package com.*.prophet.algorithm.sugar;
import com.*.prophet.algorithm.core.KMeans;
/**
* @Author:langjf
* @Date: 2021/9/14
* @Desc K——Menaus 判断 位置相距多少米为最优K
*/
public class LocationsKMeansHelper {
private final static double PI = 3.14159265358979323; // 圆周率
private final static double R = 6371229; // 地球的半径
public static double getDistance(double lng1, double lat1, double lng2, double lat2) {
double x, y, distance;
x = (lng2 - lng1) * PI * R
* Math.cos(((lat1 + lat2) / 2) * PI / 180) / 180;
y = (lat2 - lat1) * PI * R / 180;
distance = Math.hypot(x, y);
return distance;
}
//以两个位置之间的距离寻找最优解
public static void KMeans(float[][] datas, double range) {
//默认簇节点为4
com.gwm.prophet.algorithm.core.KMeans s = new KMeans(datas);
s.cluster();
while (!isClassify(s, range)) {
s.setResent(s.getClassCount() + 1, s);
}
s.printConsole();
}
//判断是否最优K
public static boolean isClassify(KMeans s, double range) {
boolean isClassify = true;
for (int i = 0; i < s.getInstanceNumber(); i++) {
int j = Integer.valueOf(String.valueOf(s.getData()[i][2]).substring(0, 1));
float[][] data = s.getData();
double distance = getDistance(data[i][0], data[i][1], data[j][0], data[j][1]);
if (range < distance) {
isClassify = false;
break;
}
}
return isClassify;
}
public static void main(String[] args) {
float[][] data = new float[10][3];
data[0] = new float[]{(float) 99.75346, (float) 24.09003, 0};
data[1] = new float[]{(float) 106.75346, (float) 25.09003, 0};
data[2] = new float[]{(float) 107.75346, (float) 27.09003, 0};
data[3] = new float[]{(float) 106.75346, (float) 27.09003, 0};
data[4] = new float[]{(float) 105.75346, (float) 28.09003, 0};
data[5] = new float[]{(float) 104.75346, (float) 25.09003, 0};
data[6] = new float[]{(float) 104.75346, (float) 25.09003, 0};
data[7] = new float[]{(float) 104.75346, (float) 25.09003, 0};
data[8] = new float[]{(float) 104.75346, (float) 21.09003, 0};
data[8] = new float[]{(float) 104.75346, (float) 21.09003, 0};
KMeans(data, 500);
}
}
package com.gwm.prophet.algorithm.core;
import lombok.Data;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
/**
* @Author:langjf
* @Date: 2021/9/1
* @Desc
*/
class PPoint {
public float x;
public float y;
public int flag = -1;
public PPoint() {
}
public PPoint(float x, float y) {
this.x = x;
this.y = y;
}
}
@Data
public class KMeans {
//聚类的数目
int ClassCount =4;
//样本数目(测试集)
int InstanceNumber = 9;
//样本属性数目(测试)
int FieldCount = 2;
//设置异常点阈值参数(每一类初始的最小数目为InstanceNumber/ClassCount^t)
final static double t = 2.0;
//存放数据的矩阵
private float[][] data;
//每个类的均值中心
private float[][] classData;
//噪声集合索引
private ArrayList<Integer> noises;
//存放每次变换结果的矩阵
private ArrayList<ArrayList<Integer>> resultList;
//数据归1最大值
float[] max = new float[FieldCount];
public KMeans(float[][] data) {
this.data = data;
this.classData = new float[ClassCount][FieldCount];
this.InstanceNumber=data.length;
noises = new ArrayList<Integer>();
resultList = new ArrayList<ArrayList<Integer>>(ClassCount);
}
public KMeans() {
//最后一位用来储存结果
classData = new float[ClassCount][FieldCount];
data = new float[InstanceNumber][FieldCount+1];
noises = new ArrayList<Integer>();
resultList = new ArrayList<ArrayList<Integer>>(ClassCount);
data[0]= new float[]{(float) 99.75346, (float)24.09003,0};
data[1]= new float[]{(float) 106.75346,(float)25.09003,0};
data[2]= new float[]{(float) 107.75346,(float)27.09003,0};
data[3]= new float[]{(float) 106.75346,(float)27.09003,0};
data[4]= new float[]{(float) 105.75346,(float)28.09003,0};
data[5]= new float[]{(float) 104.75346,(float)25.09003,0};
data[6]= new float[]{(float) 104.75346,(float)25.09003,0};
data[7]= new float[]{(float) 104.75346,(float)25.09003,0};
data[8]= new float[]{(float) 104.75346,(float)21.09003,0};
}
public static void main(String[] args) {
KMeans s= new KMeans();
s.cluster();
s.printConsole();
}
public void setResent(int classCount, KMeans s){
s.ClassCount = classCount;
s.data = data;
s.classData = new float[ClassCount][FieldCount];
s.InstanceNumber=data.length;
s.noises = new ArrayList<Integer>();
s.resultList = new ArrayList<ArrayList<Integer>>(ClassCount);
s.cluster();
}
public void cluster() {
//数据归一化
normalize();
//标记是否需要重新找初始点
boolean needFindInitials = true;
//找初始点的迭代次数
int times = 1;
//找初始点
while (needFindInitials) {
needFindInitials = false;
resultList.clear();
System.out.println("寻找第" + (times++) + "次");
//一次找初始点的尝试和根据初始点的分类
findInitials();
firstClassify();
for (int i = 0; i < resultList.size(); i++) {
if (resultList.get(i).size() < InstanceNumber / Math.pow(ClassCount, t)) {
needFindInitials = true;
noises.addAll(resultList.get(i));
}
}
}
Adjust();
}
private void normalize() {
// 计算数据每个维度最大值max
for (int i = 0; i < InstanceNumber; i++) {
for (int j = 0; j < FieldCount; j++) {
if (data[i][j] > max[j]) {
max[j] = data[i][j];
}
}
}
// 每个维度归一化值=原始值/max
for (int i = 0; i < InstanceNumber; i++) {
for (int j = 0; j < FieldCount; j++) {
data[i][j] = data[i][j] / max[j];
}
}
}
/**
* 寻找初始聚类中心
*/
private void findInitials() {
int i, j, a, b;
i = j = a = b = 0;
float maxDis = 0;
int alreadyCls = 2;
// 选取距离最远的两个点a,b作为聚类中心点
ArrayList<Integer> initials = new ArrayList<Integer>();
for (; i < InstanceNumber; i++) {
// 噪声点不参与计算
if (noises.contains(i)) {
continue;
}
j = i + 1;
for (; j < InstanceNumber; j++) {
// 噪声点不参与计算
if (noises.contains(j)) {
continue;
}
float newDis = calDis(data[i], data[j]);
if (maxDis < newDis) {
a = i;
b = j;
maxDis = newDis;
}
}
}
// initials添加初始聚类中心点序号a,b
initials.add(a);
initials.add(b);
// classData添加聚类中心点data[a],data[b]
classData[0] = data[a];
classData[1] = data[b];
// 新增两个聚类,并添加聚类成员
ArrayList<Integer> resultOne = new ArrayList<Integer>();
ArrayList<Integer> resultTwo = new ArrayList<Integer>();
resultOne.add(a);
resultTwo.add(b);
resultList.add(resultOne);
resultList.add(resultTwo);
// 1、计算剩下每个点x与其他点的最小距离l,并记录Map<x,l>
// 2、选取Map<x,l>中的最大l,并以对应的点x作为新的聚类中心
while (alreadyCls < ClassCount) {
i = j = 0;
float maxMin = 0;
int newClass = -1;
for (; i < InstanceNumber; i++) {
float min = 0;
float newMin = 0;
if (initials.contains(i)) {
continue;
}
if (noises.contains(i)) {
continue;
}
for (j = 0; j < alreadyCls; j++) {
newMin = calDis(data[i], classData[j]);
if (min == 0 || newMin < min) {
min = newMin;
}
}
if (min > maxMin) {
maxMin = min;
newClass = i;
}
}
// initials添加新的聚类中心点序号newClass
initials.add(newClass);
// classData添加新的聚类中心点data[newClass]
classData[alreadyCls++] = data[newClass];
// 新增一个聚类,并添加成员
ArrayList<Integer> rslt = new ArrayList<Integer>();
rslt.add(newClass);
resultList.add(rslt);
}
}
/**
* 首次聚类分配
* 点x到哪个聚类中心点最近,则划分到哪个聚类
*/
public void firstClassify() {
for (int i = 0; i < InstanceNumber; i++) {
float min = 0f;
int clsId = -1;
for (int j = 0; j < classData.length; j++) {
// 欧式距离
float newMin = calDis(classData[j], data[i]);
if (clsId == -1 || newMin < min) {
clsId = j;
min = newMin;
}
}
if (!resultList.get(clsId).contains(i)) {
resultList.get(clsId).add(i);
}
}
}
// 迭代分类,直到各个类的数据不再变化
public void Adjust() {
// 记录是否发生变化
boolean change = true;
// 循环的次数
int times = 1;
while (change) {
// 复位
change = false;
System.out.println("迭代" + (times++) + "次");
// 重新计算每个类的均值
for (int i = 0; i < ClassCount; i++) {
// 原有的数据
ArrayList<Integer> cls = resultList.get(i);
// 新的均值
float[] newMean = new float[FieldCount];
// 计算均值
for (Integer index : cls) {
for (int j = 0; j < FieldCount; j++)
newMean[j] += data[index][j];
}
for (int j = 0; j < FieldCount; j++) {
newMean[j] /= cls.size();
}
if (!compareMean(newMean, classData[i])) {
classData[i] = newMean;
change = true;
}
}
// 清空之前的数据
for (ArrayList<Integer> cls : resultList) {
cls.clear();
}
// 重新分配
for (int i = 0; i < InstanceNumber; i++) {
float min = 0f;
int clsId = -1;
for (int j = 0; j < classData.length; j++) {
float newMin = calDis(classData[j], data[i]);
if (clsId == -1 || newMin < min) {
clsId = j;
min = newMin;
}
}
data[i][FieldCount] = clsId;
resultList.get(clsId).add(i);
}
}
}
/**
* 计算a样本和b样本的欧式距离作为不相似度
*
* @param aVector 样本a
* @param bVector 样本b
* @return 欧式距离长度
*/
private float calDis(float[] aVector, float[] bVector) {
double dis = 0;
int i = 0;
/* 最后一个数据在训练集中为结果,所以不考虑 */
for (; i < aVector.length; i++)
dis += Math.pow(bVector[i] - aVector[i], 2);
dis = Math.pow(dis, 0.5);
return (float) dis;
}
/**
* 判断两个均值向量是否相等
*
* @param a 向量a
* @param b 向量b
* @return
*/
private boolean compareMean(float[] a, float[] b) {
if (a.length != b.length)
return false;
for (int i = 0; i < a.length; i++) {
if (a[i] > 0 && b[i] > 0 && a[i] != b[i]) {
return false;
}
}
return true;
}
/**
* 打印结果
*/
public void printConsole(){
for (int i = 0; i < InstanceNumber; i++) {
System.out.println(String.valueOf(data[i][FieldCount]).substring(0, 1));
}
// 统计每类的数目,打印到控制台
for (int i = 0; i < ClassCount; i++) {
System.out.println("第" + (i + 1) + "类数目: "
+ resultList.get(i).size());
}
}
}