./bin/hdfs dfs -rm -r input ./bin/hdfs dfs -put ./wordfile1.txt input

什么都不懂的小青蛙

已于 2024-01-01 15:34:33 修改

阅读量510

点赞数 8

文章标签： java 前端服务器

于 2023-12-18 18:55:55 首次发布

本文链接：https://blog.csdn.net/Aure219/article/details/135069357

版权

OpenJDK 64-Bit Server VM warning: INFO: os::commit_memory(...)failed; error='...' (errno=12)
./bin/hadoop jar ./myapp/WordCount.jar input output

public class Dedup { 
//map将输入中的value复制到输出数据的key上，并直接输出
public static class Map extends Mapper<Object,Text,Text,Text>{ 
     private static Text line=new Text();
     //实现map函数
     public void map(Object key,Text value,Context context) 
              throws IOException,InterruptedException{ 
          line=value; 
          context.write(line, new Text("")); 
     } 
} 
//reduce将输入中的key复制到输出数据的key上，并直接输出
public static class Reduce extends Reducer<Text,Text,Text,Text>{ 
//实现reduce函数
      public void reduce(Text key,Iterable<Text> values,Context context) 
               throws IOException,InterruptedException{ 
           context.write(key, new Text("")); 
      } 
} 
public static void main(String[] args) throws Exception{ 
Configuration conf = new Configuration(); 
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 
if (otherArgs.length != 2) { 
System.err.println("Usage: Data Deduplication <in> <out>"); 
System.exit(2); 
} 
Job job = new Job(conf, "Data Deduplication"); 
job.setJarByClass(Dedup.class); 
//设置Map、Combine和Reduce处理类
job.setMapperClass(Map.class); 
job.setCombinerClass(Reduce.class); 
job.setReducerClass(Reduce.class); 
//设置输出类型
job.setOutputKeyClass(Text.class); 
job.setOutputValueClass(Text.class); 
//设置输入和输出目录
FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); 
System.exit(job.waitForCompletion(true) ? 0 : 1); 
} 
}

import java.io.IOException; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.util.GenericOptionsParser; 
public class Sort { 
//map将输入中的value转化成IntWritable类型，作为输出的key
     public static class Map extends 
　　　　　　　Mapper<Object,Text,IntWritable,IntWritable>{ 
         private static IntWritable data=new IntWritable(); 
     //实现map函数
         public void map(Object key,Text value,Context context) 
throws IOException,InterruptedException{ 
             String line=value.toString(); 
             data.set(Integer.parseInt(line)); 
             context.write(data, new IntWritable(1)); 
    } 
} 
//reduce将输入中的key复制到输出数据的key上，
//然后根据输入的value-list中元素的个数决定key的输出次数
//用全局linenum来代表key的位次
public static class Reduce extends
Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{ 
    private static IntWritable linenum = new IntWritable(1); 
    //实现reduce函数
    public void reduce(IntWritable key,Iterable<IntWritable> values,Context context) 
            throws IOException,InterruptedException{ 
         for(IntWritable val:values){ 
              context.write(linenum, key); 
             linenum = new IntWritable(linenum.get()+1); 
         } 
    } 
} 
public static void main(String[] args) throws Exception{ 
Configuration conf = new Configuration(); 
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 
if (otherArgs.length != 2) { 
System.err.println("Usage: Data Sort <in> <out>"); 
System.exit(2); 
} 
Job job = new Job(conf, "Data Sort"); 
job.setJarByClass(Sort.class); 
//设置Map和Reduce处理类
job.setMapperClass(Map.class); 
job.setReducerClass(Reduce.class); 
//设置输出类型
job.setOutputKeyClass(IntWritable.class); 
job.setOutputValueClass(IntWritable.class); 
//设置输入和输出目录
FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); 
System.exit(job.waitForCompletion(true) ? 0 : 1); 
} 
}

import java.io.IOException; 
import java.util.Iterator; 
import java.util.StringTokenizer; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
import org.apache.hadoop.util.GenericOptionsParser; 
public class Score { 
public static class Map extends 
Mapper<Object, Text, Text, IntWritable> { 
// 实现map函数
       public void map(Object key,Text value,Context context) 
throws IOException, InterruptedException { 
            String[] str = value.toString().split(" ");
            String name = str[0];  // 学生姓名部分
            String grade = str[1];  // 成绩部分
            context.write(new Text(name),new IntWritable(Integer.parseInt(grade))); }} 
public static class Reduce extends 
Reducer<Text, IntWritable, Text, IntWritable> { 
// 实现reduce函数
     public void reduce(Text key, Iterable<IntWritable> values, 
Context context) throws IOException, InterruptedException { 
         int sum = 0; 
         int count = 0; 
         Iterator<IntWritable> iterator = values.iterator(); 
         while (iterator.hasNext()) { 
              sum += iterator.next().get();// 计算总分
              count++;// 统计总的科目数
          } 
          int average = (int) sum / count;// 计算平均成绩
          context.write(key, new IntWritable(average)); 
    } 
} 
public static void main(String[] args) throws Exception { 
Configuration conf = new Configuration(); 
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 
if (otherArgs.length != 2) { 
System.err.println("Usage: Score Average <in> <out>"); 
System.exit(2); 
} 
Job job = new Job(conf, "Score Average"); 
job.setJarByClass(Score.class); 
// 设置Map、Combine和Reduce处理类
job.setMapperClass(Map.class); 
job.setCombinerClass(Reduce.class); 
job.setReducerClass(Reduce.class); 
// 设置输出类型
job.setOutputKeyClass(Text.class); 
job.setOutputValueClass(IntWritable.class); 
// 将输入的数据集分割成小数据块splites，提供一个RecordReder的实现
job.setInputFormatClass(TextInputFormat.class); 
// 提供一个RecordWriter的实现，负责数据输出
job.setOutputFormatClass(TextOutputFormat.class); 
// 设置输入和输出目录
FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); 
System.exit(job.waitForCompletion(true) ? 0 : 1); 
} 
}

public static class MapClass extends Mapper<LongWritable, Text, Text, Text> 
{
	public void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException 
	// 输入key: 行偏移值；value: “citing专利号, cited专利号” 数据对
	{	String[] citation = value.toString().split(“,”);
		context.write(new Text(citation[1]), new Text(citation[0]));
	} // 输出key: cited 专利号；value: citing专利号} 
public static class ReduceClass extends Reducer<Text, Text, Text, Text> 
{
	public void reduce(Text key, Iterable<Text> values, Context context)
			throws IOException, InterruptedException 
	{	String csv = “”;
		for (Text val:values) 
		{    if (csv.length() > 0) csv += “,”;
		      csv += val.toString();
		}
		context.write(key, new Text(csv));
	 } // 输出key: cited专利号；value: “citing专利号1, citing专利号2,…” 
}

IntWritable one = new IntWritable(1);
public static class MapClass extends Mapper<LongWritable, Text, 
							Text, IntWritable> 
{
	public void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException 
	// 输入key: 行偏移值；value: “citing专利号, cited专利号” 数据对
	{	String[] citation = value.toString().split(“,”);
		context.write(new Text(citation[1]), one);
	} // 输出key: cited 专利号；value: 1
} 
public static class ReduceClass extends Reducer<Text, IntWritable, Text, Text> 
{
	public void reduce(Text key, Iterable<IntWritable> values, Context context)
			throws IOException, InterruptedException 
	{	int count = 0;
		Iterator<IntWritable> iter = values.iterator(); 
             while (iter.hasNext()) 
		{    
		       count += iter.next().get(); } 		
		}
		context.write(key, new IntWritable(count));
	 } // 输出key: 被引专利号；value: 被引次数
}

Spark实现冒泡选择

object SelectionSort {
  def selectionSort(l: List[Int]): List[Int] = l match {
    case List() => List()
    case list => {
      val smallest = list.min
      smallest :: selectionSort(list.filter(_ != smallest))
    }
  }

  def main(args: Array[String]) {
    val list = List(3, 12, 43, 23, 7, 1, 2, 20)
    println(selectionSort(list))
  }
}

/**
  * Created by Administrator on 2017/12/18.
  */
object BubbleSort {
  //  冒泡排序


  // 外层循环做拆分
  def bubbleSort(l: List[Int]): List[Int]  = l match {
    case List() => List()
    case head :: tail => bSort(head, bubbleSort(tail))
  }


  // 内层循环做排序

  def bSort(data: Int, dataSet: List[Int]): List[Int] = dataSet match {
      case List() => List(data)
      case head :: tail => if (data <= head) data :: dataSet else head :: bSort(data, tail)
    }




  def main(args: Array[String]) {
    val list = List(3, 12, 43, 23, 7, 1, 2, 20)
    println(bubbleSort(list))
  }

}

import io.Source
import java.io.PrintWriter

val path = "test.txt"

val reader = Source.fromFile(path).getLines()

val result = reader.toArray.reverse

val pw = new PrintWriter(path)

result.foreach(line => pw.write(line + "\n"))

pw.close()

import scala.io.source