KMapper.java
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URI;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class KMapper extends Mapper<LongWritable, Text, Text, Text> {
private String[] center;
//读取3.txt中更新的中心点坐标,并将坐标存入center数组中
protected void setup(Context context) throws IOException,InterruptedException //read centerlist, and save to center[]
{
String centerlist = "hdfs://localhost:9000/home/administrator/hadoop/kmeans/input2/3.txt"; //center文件
Configuration conf1 = new Configuration();
conf1.set("hadoop.job.ugi", "hadoop-user,hadoop-user");
FileSystem fs = FileSystem.get(URI.create(centerlist),conf1);
FSDataInputStream in = null;
ByteArrayOutputStream out = new ByteArrayOutputStream();
try{
in = fs.open( new Path(centerlist) );
IOUtils.copyBytes(in,out,100,false);
center = out.toString().split(" ");
}finally{
IOUtils.closeStream(in);
}
}
//从hadoop接收的数据在2.txt中保存
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException
{
StringTokenizer itr = new StringTokenizer(value.toString());
//从2.txt读入数据,以空格为分割符,一个一个处理
while(itr.hasMoreTokens())//用于判断所要分析的字符串中,是否还有语言符号,如果有则返回true,反之返回false
{
//计算第一个坐标跟第一个中心的距离min
String outValue = new String(itr.nextToken());//逐个获取以空格为分割符的字符串(2,3) (10,30) (34,40) (1,1)
String[] list = outValue.replace("(", "").replace(")", "").split(",");
String[] c = center[0].replace("(", "").replace(")", "").split(",");
float min = 0;
int pos = 0;
for(int i=0;i<list.length;i++)
{
System.out.println(i+"list:"+list[i]);
System.out.println(i+"c:"+c[i]);
min += (float) Math.pow((Float.parseFloat(list[i]) - Float.parseFloat(c[i])),2);//求欧式距离,为加根号
}
for(int i=0;i<center.length;i++)
{
String[] centerStrings = center[i].replace("(", "").replace(")", "").split(",");
float distance = 0;
for(int j=0;j<list.length;j++)
distance += (float) Math.pow((Float.parseFloat(list[j]) - Float.parseFloat(centerStrings[j])),2);
if(min>distance)
{
min=distance;
pos=i;
}
}
context.write(new Text(center[pos]), new Text(outValue));//输出:中心点,对应的坐标
System.out.println("中心点"+center[pos]+"对应坐标"+outValue);
System.out.println("Mapper输出:"+center[pos]+" "+outValue);
}
}
}
KReduce.java
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class KReducer extends Reducer<Text, Text, Text, Text> {
//<中心点类别,中心点对应的坐标集合>,每个中心点类别的坐标集合求新的中心点
public void reduce(Text key,Iterable<Text> value,Context context) throws IOException,InterruptedException
{
String outVal = "";
int count=0;
String center="";
System.out.println("Reduce过程第一次");
System.out.println(key.toString()+"Reduce");
int length = key.toString().replace("(", "").replace(")", "").replace(":", "").split(",").length;
float[] ave = new float[Float.SIZE*length];
for(int i=0;i<length;i++)
ave[i]=0;
for(Text val:value)
{
System.out.println("val:"+val.toString());
System.out.println("values:"+value.toString());
outVal += val.toString()+" ";
String[] tmp = val.toString().replace("(", "").replace(")", "").split(",");
System.out.println("temlength:"+tmp.length);
for(int i=0;i<tmp.length;i++)
ave[i] += Float.parseFloat(tmp[i]);
count ++;
}
System.out.println("count:"+count);
System.out.println("outVal:"+outVal+"/outVal");
for (int i=0;i<2;i++)
{
System.out.println("ave"+i+"i"+ave[i]);
}
//ave[0]存储X坐标之和,ave[1]存储Y坐标之和
for(int i=0;i<length;i++)
{
ave[i]=ave[i]/count;
if(i==0)
center += "("+ave[i]+",";
else {
if(i==length-1)
center += ave[i]+")";
else {
center += ave[i]+",";
}
}
}
System.out.println("写入part:"+key+" "+outVal+" "+center);
context.write(key, new Text(outVal+center));
}
}
NewCenter.java
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class NewCenter {
int k = 2;
float shold=Integer.MIN_VALUE;
String[] line;
String newcenter = new String("");
public float run(String[] args) throws IOException,InterruptedException
{
Configuration conf = new Configuration();
conf.set("hadoop.job.ugi", "hadoop,hadoop");
FileSystem fs = FileSystem.get(URI.create(args[2]+"/part-r-00000"),conf);
FSDataInputStream in = null;
ByteArrayOutputStream out = new ByteArrayOutputStream();
try{
in = fs.open( new Path(args[2]+"/part-r-00000"));
IOUtils.copyBytes(in,out,50,false);
line = out.toString().split("\n");
} finally {
IOUtils.closeStream(in);
}
//System.out.println("上一次的MapReduce结果:"+out.toString());
System.out.println("上一次MapReduce结果:第一行:"+line[0]);
System.out.println("第二行:"+line[1]);
System.out.println("。");
for(int i=0;i<k;i++)
{
String[] l = line[i].replace("\t", " ").split(" ");//如果这行有tab的空格,可以替代为空格
//(key,values)key和values同时输出是,中间保留一个Tab的距离,即'\t'
String[] startCenter = l[0].replace("(", "").replace(")", "").split(",");
//上上次的中心点startCenter[0]=(10,30);startCenter[1]=(2,3);
String[] finalCenter = l[l.length-1].replace("(", "").replace(")", "").split(",");
//上一次的中心点finalCenter[0]=(22,35);finalCenter[1]=(1.5,2.0);
float tmp = 0;
for(int j=0;j<startCenter.length;j++)
tmp += Math.pow(Float.parseFloat(startCenter[j])-Float.parseFloat(finalCenter[j]), 2);
//两个中心点间的欧式距离的平方
newcenter = newcenter + l[l.length - 1].replace("\t", "") + " ";
if(shold <= tmp)
shold = tmp;
System.out.println(i+"坐标距离:"+tmp);
}
System.out.println("新中心点:"+newcenter);
OutputStream out2 = fs.create(new Path(args[1]+"/center/3.txt") );
IOUtils.copyBytes(new ByteArrayInputStream(newcenter.getBytes()), out2, 4096,true);
//System.out.println(newcenter);
return shold;
//return 0;
}
}