【无标题】

最新推荐文章于 2022-12-20 20:56:26 发布

nknkl20

最新推荐文章于 2022-12-20 20:56:26 发布

阅读量73

点赞数

文章标签： java

本文链接：https://blog.csdn.net/nknkl20/article/details/124967955

版权

import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount {

public static class TokenizerMapper

extends Mapper<Object, Text, Text, IntWritable>{

private final static IntWritable one = new IntWritable(1);

private Text word = new Text();

public void map(Object key, Text value, Context context

) throws IOException, InterruptedException {

StringTokenizer itr = new StringTokenizer(value.toString());

while (itr.hasMoreTokens()) {

word.set(itr.nextToken());

context.write(word, one);

}

public static class IntSumReducer

extends Reducer<Text,IntWritable,Text,IntWritable> {

private IntWritable result = new IntWritable();

public void reduce(Text key, Iterable<IntWritable> values,

Context context

) throws IOException, InterruptedException {

int sum = 0;

for (IntWritable val : values) {

sum += val.get();

}

result.set(sum);

context.write(key, result);

}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

if (otherArgs.length != 2) {

System.err.println("Usage: wordcount <in> <out>");

System.exit(2);

}

Job job = new Job(conf, "word count");

job.setJarByClass(WordCount.class);

job.setMapperClass(TokenizerMapper.class);

job.setCombinerClass(IntSumReducer.class);

job.setReducerClass(IntSumReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

import java.io.IOException;

import java.sql.Date

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataInputStream;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

public class hdfs {

public static void main(String[] args) throws IOException {

Configuration conf = new Configuration();

FileSystem fs = FileSystem.get(conf);

System.out.println(fs.getUri());

Path file = new Path("/user/hadoop/myfile");

if (fs.exists(file)) {

System.out.println("File exists.");

} else

{

FSDataOutputStream outStream = fs.create(file);

outStream.writeUTF("china cstor cstor cstor china");

outStream.close();

}

FSDataInputStream inStream = fs.open(file);

String data = inStream.readUTF();

FileSystem hdfs = file.getFileSystem(conf);

FileStatus[] fileStatus = hdfs.listStatus(file);

for(FileStatus status:fileStatus)

{

System.out.println("FileOwer:"+status.getOwner());

System.out.println("FileReplication:"+status.getReplication());

System.out.println("FileModificationTime:"+new Date(status.getModificationTime()));

System.out.println("FileBlockSize:"+status.getBlockSize());

}

System.out.println(data);

System.out.println("Filename:"+file.getName());

inStream.close();

fs.close();

}

import java.io.IOException;

import java.util.HashMap;

import java.util.Hashtable;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.util.Iterator;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.util.GenericOptionsParser;

public class InvertedIndex {

public static class InvertedIndexMapper extends Mapper<LongWritable, Text, Text, Text>

{

public void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException

{

FileSplit fileSplit = (FileSplit)context.getInputSplit();

String fileName = fileSplit.getPath().getName();

String word;

IntWritable frequence=new IntWritable();

int one=1;

Hashtable<String,Integer> hashmap=new Hashtable();

StringTokenizer itr = new StringTokenizer(value.toString());

for(;itr.hasMoreTokens(); )

{

word=itr.nextToken();

if(hashmap.containsKey(word)){

hashmap.put(word,hashmap.get(word)+1);

}else{

hashmap.put(word, one);

}

for(Iterator<String> it=hashmap.keySet().iterator();it.hasNext();){

word=it.next();

frequence=new IntWritable(hashmap.get(word));

Text fileName_frequence = new Text(fileName+"@"+frequence.toString());

context.write(new Text(word),fileName_frequence);

}

public static class InvertedIndexCombiner extends Reducer<Text,Text,Text,Text>{

protected void reduce(Text key,Iterable<Text> values,Context context)

throws IOException ,InterruptedException{

String fileName="";

int sum=0;

String num;

String s;

for (Text val : values) {

s= val.toString();

fileName=s.substring(0, val.find("@"));

num=s.substring(val.find("@")+1, val.getLength());

sum+=Integer.parseInt(num);

}

IntWritable frequence=new IntWritable(sum);

context.write(key,new Text(fileName+"@"+frequence.toString()));

}

public static class InvertedIndexReducer extends Reducer<Text, Text, Text, Text>

{ @Override

protected void reduce(Text key, Iterable<Text> values, Context context)

throws IOException, InterruptedException

{ Iterator<Text> it = values.iterator();

StringBuilder all = new StringBuilder();

if(it.hasNext()) all.append(it.next().toString());

for(;it.hasNext();) {

all.append(";");

all.append(it.next().toString());

}

context.write(key, new Text(all.toString()));

}

public static void main(String[] args)

{

if(args.length!=2){

System.err.println("Usage: InvertedIndex <in> <out>");

System.exit(2);

}

try {

Configuration conf = new Configuration();

String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

Job job = new Job(conf, "invertedindex");

job.setJarByClass(InvertedIndex.class);

job.setMapperClass(InvertedIndexMapper.class);

job.setCombinerClass(InvertedIndexCombiner.class);

job.setReducerClass(InvertedIndexReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);

} catch (Exception e) {

e.printStackTrace();

}

nknkl20

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫