hadoop下kmeans算法实现四

最新推荐文章于 2021-07-11 23:27:00 发布

小飞侠-2

最新推荐文章于 2021-07-11 23:27:00 发布

阅读量425

点赞数

分类专栏： hadoop下算法开发

hadoop下算法开发专栏收录该内容

4 篇文章 0 订阅

订阅专栏

KMapper.java

[java]view plaincopy 
   
 import java.io.ByteArrayOutputStream;  
 import java.io.IOException;  
 import java.net.URI;  
 import java.util.StringTokenizer;  
   
 import org.apache.hadoop.conf.Configuration;  
 import org.apache.hadoop.fs.FSDataInputStream;  
 import org.apache.hadoop.fs.FileSystem;  
 import org.apache.hadoop.fs.Path;  
 import org.apache.hadoop.io.IOUtils;  
 import org.apache.hadoop.io.LongWritable;  
 import org.apache.hadoop.io.Text;  
 import org.apache.hadoop.mapreduce.Mapper;  
   
   
 public class KMapper extends Mapper<LongWritable, Text, Text, Text> {  
       
     private String[] center;  
     //读取3.txt中更新的中心点坐标，并将坐标存入center数组中  
     protected void setup(Context context) throws IOException,InterruptedException  //read centerlist, and save to center[]  
     {  
         String centerlist = "hdfs://localhost:9000/home/administrator/hadoop/kmeans/input2/3.txt"; //center文件  
         Configuration conf1 = new Configuration();  
         conf1.set("hadoop.job.ugi", "hadoop-user,hadoop-user");  
        FileSystem fs = FileSystem.get(URI.create(centerlist),conf1);  
        FSDataInputStream in = null;  
        ByteArrayOutputStream out = new ByteArrayOutputStream();  
        try{  
                
            in = fs.open( new Path(centerlist) );  
            IOUtils.copyBytes(in,out,100,false);    
            center = out.toString().split(" ");  
            }finally{  
                 IOUtils.closeStream(in);  
             }  
     }  
     //从hadoop接收的数据在2.txt中保存  
     public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException  
     {  
         StringTokenizer itr = new StringTokenizer(value.toString());  
         //从2.txt读入数据，以空格为分割符，一个一个处理  
         while(itr.hasMoreTokens())//用于判断所要分析的字符串中，是否还有语言符号，如果有则返回true，反之返回false  
         {  
               
             //计算第一个坐标跟第一个中心的距离min  
             String outValue = new String(itr.nextToken());//逐个获取以空格为分割符的字符串(2,3) (10,30) (34,40) (1,1)  
             String[] list = outValue.replace("(", "").replace(")", "").split(",");  
             String[] c = center[0].replace("(", "").replace(")", "").split(",");  
             float min = 0;  
             int pos = 0;  
             for(int i=0;i<list.length;i++)  
             {  
                 System.out.println(i+"list:"+list[i]);  
                 System.out.println(i+"c:"+c[i]);  
                 min += (float) Math.pow((Float.parseFloat(list[i]) - Float.parseFloat(c[i])),2);//求欧式距离，为加根号  
             }  
               
               
             for(int i=0;i<center.length;i++)  
             {  
                 String[] centerStrings = center[i].replace("(", "").replace(")", "").split(",");  
                 float distance = 0;  
                 for(int j=0;j<list.length;j++)  
                     distance += (float) Math.pow((Float.parseFloat(list[j]) - Float.parseFloat(centerStrings[j])),2);  
                 if(min>distance)  
                 {  
                     min=distance;  
                     pos=i;  
                 }  
             }  
             context.write(new Text(center[pos]), new Text(outValue));//输出：中心点，对应的坐标  
             System.out.println("中心点"+center[pos]+"对应坐标"+outValue);  
             System.out.println("Mapper输出："+center[pos]+" "+outValue);  
         }  
     }  
   
 }  

KReduce.java

[java]view plaincopy 
   
 import java.io.IOException;  
   
 import org.apache.hadoop.io.Text;  
 import org.apache.hadoop.mapreduce.Reducer;  
   
   
 public class KReducer extends Reducer<Text, Text, Text, Text> {  
     //<中心点类别,中心点对应的坐标集合>,每个中心点类别的坐标集合求新的中心点  
       
     public void reduce(Text key,Iterable<Text> value,Context context) throws IOException,InterruptedException  
     {  
         String outVal = "";  
         int count=0;  
         String center="";  
         System.out.println("Reduce过程第一次");  
         System.out.println(key.toString()+"Reduce");  
         int length = key.toString().replace("(", "").replace(")", "").replace(":", "").split(",").length;  
         float[] ave = new float[Float.SIZE*length];  
         for(int i=0;i<length;i++)  
             ave[i]=0;   
         for(Text val:value)  
         {  
             System.out.println("val:"+val.toString());  
             System.out.println("values:"+value.toString());  
             outVal += val.toString()+" ";  
             String[] tmp = val.toString().replace("(", "").replace(")", "").split(",");  
             System.out.println("temlength:"+tmp.length);  
             for(int i=0;i<tmp.length;i++)  
                 ave[i] += Float.parseFloat(tmp[i]);  
             count ++;  
         }  
         System.out.println("count:"+count);  
         System.out.println("outVal:"+outVal+"/outVal");  
         for (int i=0;i<2;i++)  
         {  
             System.out.println("ave"+i+"i"+ave[i]);  
         }  
         //ave[0]存储X坐标之和，ave[1]存储Y坐标之和  
         for(int i=0;i<length;i++)  
         {  
             ave[i]=ave[i]/count;  
             if(i==0)  
                 center += "("+ave[i]+",";  
             else {  
                 if(i==length-1)  
                     center += ave[i]+")";  
                 else {  
                     center += ave[i]+",";  
                 }  
             }  
         }  
         System.out.println("写入part："+key+" "+outVal+" "+center);  
         context.write(key, new Text(outVal+center));  
     }  
   
 }  

NewCenter.java

[java]view plaincopy 
   
 import java.io.ByteArrayInputStream;  
 import java.io.ByteArrayOutputStream;  
 import java.io.IOException;  
 import java.io.OutputStream;  
 import java.net.URI;  
   
 import org.apache.hadoop.conf.Configuration;  
 import org.apache.hadoop.fs.FSDataInputStream;  
 import org.apache.hadoop.fs.FileSystem;  
 import org.apache.hadoop.fs.Path;  
 import org.apache.hadoop.io.IOUtils;  
   
   
 public class NewCenter {  
       
     int k = 2;  
     float shold=Integer.MIN_VALUE;  
     String[] line;  
     String newcenter = new String("");  
       
     public float run(String[] args) throws IOException,InterruptedException  
     {  
         Configuration conf = new Configuration();  
         conf.set("hadoop.job.ugi", "hadoop,hadoop");   
         FileSystem fs = FileSystem.get(URI.create(args[2]+"/part-r-00000"),conf);  
         FSDataInputStream in = null;  
         ByteArrayOutputStream out = new ByteArrayOutputStream();  
         try{   
             in = fs.open( new Path(args[2]+"/part-r-00000"));   
             IOUtils.copyBytes(in,out,50,false);  
             line = out.toString().split("\n");  
             } finally {   
                 IOUtils.closeStream(in);  
             }  
       
         //System.out.println("上一次的MapReduce结果："+out.toString());  
         System.out.println("上一次MapReduce结果：第一行："+line[0]);  
         System.out.println("第二行："+line[1]);  
         System.out.println("。");  
         for(int i=0;i<k;i++)  
         {  
             String[] l = line[i].replace("\t", " ").split(" ");//如果这行有tab的空格，可以替代为空格  
             //(key,values)key和values同时输出是，中间保留一个Tab的距离，即'\t'  
             String[] startCenter = l[0].replace("(", "").replace(")", "").split(",");  
             //上上次的中心点startCenter[0]=(10,30);startCenter[1]=(2,3);  
             String[] finalCenter = l[l.length-1].replace("(", "").replace(")", "").split(",");  
             //上一次的中心点finalCenter[0]=(22,35);finalCenter[1]=(1.5,2.0);  
             float tmp = 0;  
             for(int j=0;j<startCenter.length;j++)  
                 tmp += Math.pow(Float.parseFloat(startCenter[j])-Float.parseFloat(finalCenter[j]), 2);  
             //两个中心点间的欧式距离的平方  
             newcenter = newcenter + l[l.length - 1].replace("\t", "") + " ";  
             if(shold <= tmp)  
                 shold = tmp;  
             System.out.println(i+"坐标距离："+tmp);  
         }  
         System.out.println("新中心点："+newcenter);  
         OutputStream out2 = fs.create(new Path(args[1]+"/center/3.txt") );   
         IOUtils.copyBytes(new ByteArrayInputStream(newcenter.getBytes()), out2, 4096,true);  
         //System.out.println(newcenter);  
         return shold;  
         //return 0;  
     }  
   
 }