基于情感分析的mapreduce

最新推荐文章于 2024-03-23 16:35:37 发布

倪石

最新推荐文章于 2024-03-23 16:35:37 发布

阅读量1.2k

点赞数

分类专栏： Hadoopd 文章标签： mapreduce

本文链接：https://blog.csdn.net/nextstepfans/article/details/76010642

版权

Hadoopd 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

package dshuju1;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



public class dashuju1 {
	
	public static class amapper extends Mapper<LongWritable, Text, Text, Text>{
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String line = value.toString();
			System.out.println(line);
			context.write(new Text(line),new Text("1"));

		
		}
	 }
	
	 public static class amreducer extends Reducer<Text, Text, Text, Text>{
		@Override
		protected void reduce(Text key, Iterable<Text> values,Context context)
				throws IOException, InterruptedException {
//			context.write(new Text("-1"),key);
			context.write(new Text("1"),key);
//			StringBuffer sb = new StringBuffer();
//			for (Text text :values){
//			sb.append(values.iterator().next());
				
//			}
//			context.write(key, new Text("1");
//			context.write(new Text("1"), new Text(sb.toString());
			
		}
	 }
	 
	 public static void main(String[] args) throws Exception {
			Configuration conf = new Configuration();
			Job job = Job.getInstance(conf);
			job.setJarByClass(dashuju1.class);
			
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(Text.class);
			
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(Text.class);
			
			job.setMapperClass(amapper.class);
			job.setReducerClass(amreducer.class);
			
//			
			Path in = new Path("C:/good.txt"); 
	        Path out = new Path("C:/goodmodel");
			
			FileInputFormat.setInputPaths(job, in);
			FileOutputFormat.setOutputPath(job, out);
			
			job.waitForCompletion(true);
			
		}
	

}

package dshuju1;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class dashuju2 {
	
	static class fenleiMapper extends Mapper<LongWritable, Text, Text, Text>{
		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			String[] line = value.toString().split("\t");
//			System.out.println(line[0]);
			
			String[] line1 = line[1].split(" ");
//			System.out.println(line1[0]);
			
//			context.write(new Text("1"), new Text("1"));
			for(int i = 0;i < line1.length; i ++){
				int n = 0;
			    for(int j = 0; j < line1[i].toString().length(); j++) {
			        n = (int)line1[i].toString().charAt(j);
			        if((19968 <= n && n <40869)) {
			        	context.write(new Text(line[0] +":" + line1[i]), new Text("1"));
			        }
			    }

				
//				if(line1[i].toString().getClass().getName().equals("java.lang.String")){
//				context.write(new Text(line[0] +":" + line1[i]), new Text("1"));
//				}
			}
//			System.out.println("===============================");
//			System.out.println(a);
//			System.out.println("===============================");
			
			
			
		}
	}
	
	
//	static class fenleiMapper extends Mapper<LongWritable, Text, Text, Text>{
//		@Override
//		protected void map(LongWritable key, Text value, Context context)
//				throws IOException, InterruptedException {
//			String[] line = value.toString().split("\t");
			System.out.println(line[0]);
//			
//			String[] line1 = line[1].split(" ");
			System.out.println(line1[0]);
//			
			context.write(new Text("1"), new Text("1"));
//			for(int i = 0;i < line1.length; i ++){
//				if(line1[i].toString().getClass().getName().equals("java.lang.String")){
//				context.write(new Text(line[0] +":" + line1[i]), new Text("1"));
//				}
//			}
			System.out.println("===============================");
			System.out.println(a);
			System.out.println("===============================");
//			
//			
//			
//		}
//	}
//	
	 static class fenleiReducer extends Reducer<Text, Text, Text, IntWritable>{
		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			int sum = 0;
			for (Text val:values){
				//values里面存放的是单词个数
				sum += Integer.parseInt(val.toString());
//				System.out.println("===============================");
//				System.out.println(sum);
//				System.out.println("===============================");
			}
			context.write(key, new IntWritable(sum));
			//输出标签和每个标签下面的单词数
		}
	}
	
	//主函数
//	 public static boolean run() throws IOException,ClassNotFoundException,InterruptedException 
	public static void main(String[] args) throws Exception 
	 {
		Configuration conf = new Configuration();
		Path in = new Path("C:/model.txt");  
        Path out = new Path("C:/wwwwwwwwwwwwwwwwwwwwwww");
//		String input = "C:\fenlei.txt";
//		String output = "C:\fenlei";
        
        //细节
        FileSystem hdfs = FileSystem.get(conf);  
        if(hdfs.exists(out)) hdfs.delete(out); 
		Job job = Job.getInstance(conf,"fenlei");
		job.setJarByClass(dashuju2.class);
		job.setMapperClass(fenleiMapper.class);
    
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(fenleiReducer.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		FileInputFormat.setInputPaths(job, in);
		FileOutputFormat.setOutputPath(job, out);
		
//		FileInputFormat.setInputPaths(job, new Path(args[0]));
//		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		job.waitForCompletion(true);
		
	}
	
	

}

package dshuju1;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.LineReader;


public class dashuju3 {
	
	public static class amapper extends Mapper<LongWritable, Text, Text, Text>{
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String line = value.toString();
			String[] line1 = line.split("\t");
//			System.out.println(line1[0]);
			context.write(new Text(line1[0]),new Text("1"));

		
		}
	 }
	
	 public static class amreducer extends Reducer<Text, Text, Text, Text>{
		 public Map<String,Integer> map;
		 @Override
		protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
//			 Map<String,Integer> map = null ;
//				String sss = "C:/goodbadfenlei.txt";
				Configuration conf = context.getConfiguration();
				String ss = conf.get("ss");
				try {
					map=Utilsss.getMapFormHDFS(ss);
				} catch (Exception e) {
//					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			
		}
		@Override
		protected void reduce(Text key, Iterable<Text> values,Context context)
				throws IOException, InterruptedException {
			String[] label = key.toString().split(":");
//			String aaaa = label[1];
//			System.out.println(key);
//			System.out.println(label[0]);
//			System.out.println(label[1]);
			
//			Integer b= 0;
//			Integer a= 0;
			double tiaojian;
			System.out.println(label[0]);
//			if(label[0].equals("1")){
				//如果map中有标签为1的那组
				if(map.containsKey(key)){
			Integer a = map.get(key.toString());
			Integer b = 0;
			//先取出这个单词出现的次数
			if(label[0].equals("1")){
				//若果这个单词的标签是1
				String labelb = new String("-1"+":" +label[1]);
				
				if(map.containsKey(labelb)){
			//如果含有标签为-1的map的那组
					 b = map.get(labelb.toString());
					//获取-1下面的单词数量
		        }
				double a1 = Integer.parseInt(a.toString());
				double b1 = Integer.parseInt(b.toString());
				tiaojian = (a1+1)/(a1+b1+2);
//				tiaojian = (a+1)/(b+1);
//				System.out.println(tiaojian);
				String tiaojians = String.valueOf(tiaojian);
				context.write(key, new Text(tiaojians));
				
			}else{
				//这组标签为-1的单词
				String labelb = new String("1"+":" +label[1]);
				if(map.containsKey(labelb)){
			//如果含有标签为-1的map的那组
					 b = map.get(labelb.toString());
					//获取-1下面的单词数量
		        }
				double a1 = Integer.parseInt(a.toString());
				double b1 = Integer.parseInt(b.toString());
				tiaojian = (a1+1)/(a1+b1+2);
//				tiaojian = (a+1)/(b+1);
//				System.out.println(tiaojian);
				String tiaojians = String.valueOf(tiaojian);
				context.write(key, new Text(tiaojians));
				
			}
				
				//取出这组标签为1的单词数量
//				String labelb = "-1"+":" +label[1];
				//转换标签,看标签为-1的时候的那组
//				System.out.println(labelb);
//				Integer b= 0;
				
//				Boolean aa = map.containsKey(labelb);
//				 if(map.containsKey(labelb)){
					//如果含有标签为-1的map的那组
//					 b = map.get(labelb.toString());
//				 }
//				}
//				}else{
//					//如果不含有就算了
//				}
				Object a;
//				double a1 = Integer.parseInt(a.toString());
//				double b1 = Integer.parseInt(b.toString());
				
//				System.out.println(a+"==========="+b);
				
//				//计算条件概率
//				tiaojian = a/b;
//				tiaojian = (a1+1)/(a1+b1+2);
//				System.out.println(tiaojian);
//				context.write(key, new Text("tiaojian"));
//			}
//			}else{
//				context.write(key, new Text("tiaojian"));
//			}
//				
//			}else{
//				//如果map中有标签为-1的那组
//				Integer a = map.get(key.toString());
//				//取出这组标签为1的单词数量
//				String labelb = "1"+":" +label[1];
//				//转换标签,看标签为-1的时候的那组
				System.out.println(labelb);
//				Integer b= 0;
//				if(map.containsKey(labelb)){
//					//如果含有标签为1的map的那组
//					 b = map.get(labelb);
//				}else{
//					//如果不含有就算了
//				}
//				double a1 = Integer.parseInt(a.toString());
//				double b1 = Integer.parseInt(b.toString());
//				
//				System.out.println(a+"==========="+b);
//				
//				//计算条件概率
				tiaojian = a/b;
//				tiaojian = (a1+1)/(a1+b1+2);
//				System.out.println(tiaojian);
//				context.write(key, new Text("tiaojian"));
//				
//			}
//			context.write(key, new Text("tiaojian"));
//			
//
		}
		
	 }
	 
	 public static void main(String[] args) throws Exception {
			Configuration conf = new Configuration();
			String sss = "C:/goodbadfenlei.txt";
			conf.set("ss", sss);
			Job job = Job.getInstance(conf);
			job.setJarByClass(dashuju3.class);
			
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(Text.class);
			
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(Text.class);
			
			job.setMapperClass(amapper.class);
			job.setReducerClass(amreducer.class);
			
//			
//			Path in = new Path("C:/jiangshanshan.txt"); 
			Path in = new Path("C:/goodbadfenlei.txt");
	        Path out = new Path("C:/goodbadtiaojian");
	        FileSystem hdfs = FileSystem.get(conf);  
	        if(hdfs.exists(out)) hdfs.delete(out); 
			
			FileInputFormat.setInputPaths(job, in);
			FileOutputFormat.setOutputPath(job, out);
			
			job.waitForCompletion(true);
			
		}
	

}




class Utilsss{
	
	public static Map<String,Integer> getMapFormHDFS(String input) throws Exception{
		Configuration conf = new Configuration();
		Path path = new Path(input);
		FileSystem fs = path.getFileSystem(conf);
		FileStatus[] status = fs.listStatus(path);
		Map<String,Integer> map = new HashMap();
		for(int i= 0;i < status.length;i++){
			if(status[i].isFile()){
				FSDataInputStream infs = fs.open(status[i].getPath());
				LineReader reader = new LineReader(infs,conf);
				Text line = new Text();
				while (reader.readLine(line) > 0){
					String[] temp = line.toString().split("\t");
//					System.out.println("bbbbbbbbbbbbbbbbbbbbbbbbb");
//					System.out.println("取map里面的数据的数组");
//					System.out.println(temp);
//					System.out.println("bbbbbbbbbbbbbbbbbbbbbbbbb");
					
					map.put(temp[0].toString(), Integer.parseInt(temp[1]));
//					map.put(new String("1"), 10);
				}
				reader.close();
			}
		}
		
		
		return map;
	}
}

package dshuju1;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.LineReader;


public class dashuju4 {
	
	public static class amapper extends Mapper<LongWritable, Text, Text, Text>{
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String line = value.toString();
//			System.out.println(line);
			context.write(new Text(line),new Text("1"));

		
		}
	 }
	
	
	 public static class amreducer extends Reducer<Text, Text, Text, Text>{
		 public Map<String,String> map;
		 
		 
		 @Override
		protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
			 Configuration conf = context.getConfiguration();
				String ss = conf.get("sss");
				try {
					map=Utilss.getMapFormHDFS(ss);
//					System.out.println(map);
				} catch (Exception e) {
//					
					e.printStackTrace();
				}
		}
		 
		 
		 
		@Override
		protected void reduce(Text key, Iterable<Text> values,Context context)
				throws IOException, InterruptedException {
//              一下 一切 一室
//			System.out.println(key);
			String[] str = key.toString().split(" ");
			double sum1 = 1.0;
			double sum2 = 1.0;
			ArrayList<Double> sum_good= new ArrayList<Double>();
			ArrayList<Double> sum_bad = new ArrayList<Double>();
			for(int i =0;i<str.length;i++){
				System.out.println(str[i]);
				//int a = Integer.parseInt(line[i])
//				//首先拼接字符串
				String label1 = new String("-1"+":" +str[i]);
//				System.out.println(label1);
				String label2 = new String("1" +":" +str[i]);
				
//				System.out.println(map);
				if(map.containsKey(label1)){
					
					
					//如果坏的评论里面有的话
					String a = map.get(label1);
					double ddd = Double.parseDouble(map.get(label1));
					double ccc = ddd;
					System.out.println(ccc);
					try {
					    double bad = Double.parseDouble(a);
					    sum_bad.add(bad);
					} catch (NumberFormatException e) {
						double bad = 1.0;
					    sum_bad.add(bad);
					}
					//取出坏的评论
//					int bad = Integer.parseInt(a);
//					sum_bad.add(bad);
				
				}else if(map.containsKey(label1)){
//					//如果有好的评论
					String a = map.get(label2);
					try {
						double good = Double.parseDouble(a);
					    sum_good.add(good);
					} 
					catch (NumberFormatException e) {
					    double good = 1.0;
					    sum_good.add(good);
					}
//				}
//					int good = Integer.parseInt(a.toString());
//					sum_good.add(good);
				}else{
					sum_good.add(1.0);
					sum_bad.add(1.0);
				}
				for(int j = 0;j<sum_good.size();j++){
					sum1 = ((double)sum_good.get(j) * sum1);
				}
				for(int j = 0;j<sum_bad.size();j++){
					sum2 = ((double)sum_bad.get(j) * sum2);
				}
//				if(sum1 > sum2){
//					context.write(key, new Text("这是好的话"));
//				}else{
//					context.write(key, new Text("这是坏的话"));
//				}
//				
//				
//				
			}
			System.out.println(sum1+"========="+sum2);
			
			if(sum1 > sum2){
				context.write(key, new Text("这是好的话"));
			}else{
				context.write(key, new Text("这是坏的话"));
			}
//			context.write(new Text("1"),key);

		}
	 }
	 
	 public static void main(String[] args) throws Exception {
			Configuration conf = new Configuration();
			String sss = "C:/goodbadtiaojian.txt";
			conf.set("sss", sss);
			Job job = Job.getInstance(conf);
			job.setJarByClass(dashuju1.class);
			
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(Text.class);
			
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(Text.class);
			
			job.setMapperClass(amapper.class);
			job.setReducerClass(amreducer.class);
			
//			
			Path in = new Path("C:/nishi.txt"); 
//			Path in = new Path("C:/good.txt");
//			Path in = new Path("C:/daye/a.txt");
	        Path out = new Path("C:/nnnnnnnnnnnnsssssssssssss");
	        FileSystem hdfs = FileSystem.get(conf);  
	        if(hdfs.exists(out)) hdfs.delete(out);
			
			FileInputFormat.setInputPaths(job, in);
			FileOutputFormat.setOutputPath(job, out);
			
			job.waitForCompletion(true);
			
		}
	
	

}
class Utilss{
	
	public static Map<String,String> getMapFormHDFS(String input) throws Exception{
		Configuration conf = new Configuration();
		Path path = new Path(input);
		FileSystem fs = path.getFileSystem(conf);
		FileStatus[] status = fs.listStatus(path);
		Map<String,String> map = new HashMap();
		for(int i= 0;i < status.length;i++){
			if(status[i].isFile()){
				FSDataInputStream infs = fs.open(status[i].getPath());
				LineReader reader = new LineReader(infs,conf);
				Text line = new Text();
				while (reader.readLine(line) > 0){
					String[] temp = line.toString().split("\t");
//					System.out.println("bbbbbbbbbbbbbbbbbbbbbbbbb");
//					System.out.println("取map里面的数据的数组");
//					System.out.println(temp);
//					System.out.println("bbbbbbbbbbbbbbbbbbbbbbbbb");
					
					map.put(temp[0].toString(), temp[1].toString());
//					map.put(new String("1"), 10);
				}
				reader.close();
			}
		}
		
		
		return map;
	}
}

倪石

关注

0
点赞
踩
5

收藏

觉得还不错? 一键收藏
2
评论
基于情感分析的mapreduce

package dshuju1;import java.io.IOException;import java.util.Arrays;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import
复制链接

扫一扫

专栏目录