OpenJDK 64-Bit Server VM warning: INFO: os::commit_memory(...)failed; error='...' (errno=12)
./bin/hadoop jar ./myapp/WordCount.jar input output
public class Dedup {
//map将输入中的value复制到输出数据的key上,并直接输出
public static class Map extends Mapper<Object,Text,Text,Text>{
private static Text line=new Text();
//实现map函数
public void map(Object key,Text value,Context context)
throws IOException,InterruptedException{
line=value;
context.write(line, new Text(""));
}
}
//reduce将输入中的key复制到输出数据的key上,并直接输出
public static class Reduce extends Reducer<Text,Text,Text,Text>{
//实现reduce函数
public void reduce(Text key,Iterable<Text> values,Context context)
throws IOException,InterruptedException{
context.write(key, new Text(""));
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: Data Deduplication <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Data Deduplication");
job.setJarByClass(Dedup.class);
//设置Map、Combine和Reduce处理类
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
//设置输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//设置输入和输出目录
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Sort {
//map将输入中的value转化成IntWritable类型,作为输出的key
public static class Map extends
Mapper<Object,Text,IntWritable,IntWritable>{
private static IntWritable data=new IntWritable();
//实现map函数
public void map(Object key,Text value,Context context)
throws IOException,InterruptedException{
String line=value.toString();
data.set(Integer.parseInt(line));
context.write(data, new IntWritable(1));
}
}
//reduce将输入中的key复制到输出数据的key上,
//然后根据输入的value-list中元素的个数决定key的输出次数
//用全局linenum来代表key的位次
public static class Reduce extends
Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{
private static IntWritable linenum = new IntWritable(1);
//实现reduce函数
public void reduce(IntWritable key,Iterable<IntWritable> values,Context context)
throws IOException,InterruptedException{
for(IntWritable val:values){
context.write(linenum, key);
linenum = new IntWritable(linenum.get()+1);
}
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: Data Sort <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Data Sort");
job.setJarByClass(Sort.class);
//设置Map和Reduce处理类
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
//设置输出类型
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
//设置输入和输出目录
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Score {
public static class Map extends
Mapper<Object, Text, Text, IntWritable> {
// 实现map函数
public void map(Object key,Text value,Context context)
throws IOException, InterruptedException {
String[] str = value.toString().split(" ");
String name = str[0]; // 学生姓名部分
String grade = str[1]; // 成绩部分
context.write(new Text(name),new IntWritable(Integer.parseInt(grade))); }}
public static class Reduce extends
Reducer<Text, IntWritable, Text, IntWritable> {
// 实现reduce函数
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
int count = 0;
Iterator<IntWritable> iterator = values.iterator();
while (iterator.hasNext()) {
sum += iterator.next().get();// 计算总分
count++;// 统计总的科目数
}
int average = (int) sum / count;// 计算平均成绩
context.write(key, new IntWritable(average));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: Score Average <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Score Average");
job.setJarByClass(Score.class);
// 设置Map、Combine和Reduce处理类
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
// 设置输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 将输入的数据集分割成小数据块splites,提供一个RecordReder的实现
job.setInputFormatClass(TextInputFormat.class);
// 提供一个RecordWriter的实现,负责数据输出
job.setOutputFormatClass(TextOutputFormat.class);
// 设置输入和输出目录
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
public static class MapClass extends Mapper<LongWritable, Text, Text, Text>
{
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
// 输入key: 行偏移值;value: “citing专利号, cited专利号” 数据对
{ String[] citation = value.toString().split(“,”);
context.write(new Text(citation[1]), new Text(citation[0]));
} // 输出key: cited 专利号;value: citing专利号}
public static class ReduceClass extends Reducer<Text, Text, Text, Text>
{
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException
{ String csv = “”;
for (Text val:values)
{ if (csv.length() > 0) csv += “,”;
csv += val.toString();
}
context.write(key, new Text(csv));
} // 输出key: cited专利号;value: “citing专利号1, citing专利号2,…”
}
IntWritable one = new IntWritable(1);
public static class MapClass extends Mapper<LongWritable, Text,
Text, IntWritable>
{
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
// 输入key: 行偏移值;value: “citing专利号, cited专利号” 数据对
{ String[] citation = value.toString().split(“,”);
context.write(new Text(citation[1]), one);
} // 输出key: cited 专利号;value: 1
}
public static class ReduceClass extends Reducer<Text, IntWritable, Text, Text>
{
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException
{ int count = 0;
Iterator<IntWritable> iter = values.iterator();
while (iter.hasNext())
{
count += iter.next().get(); }
}
context.write(key, new IntWritable(count));
} // 输出key: 被引专利号;value: 被引次数
}
Spark实现冒泡选择
object SelectionSort {
def selectionSort(l: List[Int]): List[Int] = l match {
case List() => List()
case list => {
val smallest = list.min
smallest :: selectionSort(list.filter(_ != smallest))
}
}
def main(args: Array[String]) {
val list = List(3, 12, 43, 23, 7, 1, 2, 20)
println(selectionSort(list))
}
}
/**
* Created by Administrator on 2017/12/18.
*/
object BubbleSort {
// 冒泡排序
// 外层循环做拆分
def bubbleSort(l: List[Int]): List[Int] = l match {
case List() => List()
case head :: tail => bSort(head, bubbleSort(tail))
}
// 内层循环做排序
def bSort(data: Int, dataSet: List[Int]): List[Int] = dataSet match {
case List() => List(data)
case head :: tail => if (data <= head) data :: dataSet else head :: bSort(data, tail)
}
def main(args: Array[String]) {
val list = List(3, 12, 43, 23, 7, 1, 2, 20)
println(bubbleSort(list))
}
}
import io.Source
import java.io.PrintWriter
val path = "test.txt"
val reader = Source.fromFile(path).getLines()
val result = reader.toArray.reverse
val pw = new PrintWriter(path)
result.foreach(line => pw.write(line + "\n"))
pw.close()
import scala.io.source