package dshuju1;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class dashuju1 {
public static class amapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
System.out.println(line);
context.write(new Text(line),new Text("1"));
}
}
public static class amreducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
// context.write(new Text("-1"),key);
context.write(new Text("1"),key);
// StringBuffer sb = new StringBuffer();
// for (Text text :values){
// sb.append(values.iterator().next());
// }
// context.write(key, new Text("1");
// context.write(new Text("1"), new Text(sb.toString());
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(dashuju1.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(amapper.class);
job.setReducerClass(amreducer.class);
//
Path in = new Path("C:/good.txt");
Path out = new Path("C:/goodmodel");
FileInputFormat.setInputPaths(job, in);
FileOutputFormat.setOutputPath(job, out);
job.waitForCompletion(true);
}
}
package dshuju1;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class dashuju2 {
static class fenleiMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] line = value.toString().split("\t");
// System.out.println(line[0]);
String[] line1 = line[1].split(" ");
// System.out.println(line1[0]);
// context.write(new Text("1"), new Text("1"));
for(int i = 0;i < line1.length; i ++){
int n = 0;
for(int j = 0; j < line1[i].toString().length(); j++) {
n = (int)line1[i].toString().charAt(j);
if((19968 <= n && n <40869)) {
context.write(new Text(line[0] +":" + line1[i]), new Text("1"));
}
}
// if(line1[i].toString().getClass().getName().equals("java.lang.String")){
// context.write(new Text(line[0] +":" + line1[i]), new Text("1"));
// }
}
// System.out.println("===============================");
// System.out.println(a);
// System.out.println("===============================");
}
}
// static class fenleiMapper extends Mapper<LongWritable, Text, Text, Text>{
// @Override
// protected void map(LongWritable key, Text value, Context context)
// throws IOException, InterruptedException {
// String[] line = value.toString().split("\t");
System.out.println(line[0]);
//
// String[] line1 = line[1].split(" ");
System.out.println(line1[0]);
//
context.write(new Text("1"), new Text("1"));
// for(int i = 0;i < line1.length; i ++){
// if(line1[i].toString().getClass().getName().equals("java.lang.String")){
// context.write(new Text(line[0] +":" + line1[i]), new Text("1"));
// }
// }
System.out.println("===============================");
System.out.println(a);
System.out.println("===============================");
//
//
//
// }
// }
//
static class fenleiReducer extends Reducer<Text, Text, Text, IntWritable>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (Text val:values){
//values里面存放的是单词个数
sum += Integer.parseInt(val.toString());
// System.out.println("===============================");
// System.out.println(sum);
// System.out.println("===============================");
}
context.write(key, new IntWritable(sum));
//输出标签和每个标签下面的单词数
}
}
//主函数
// public static boolean run() throws IOException,ClassNotFoundException,InterruptedException
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
Path in = new Path("C:/model.txt");
Path out = new Path("C:/wwwwwwwwwwwwwwwwwwwwwww");
// String input = "C:\fenlei.txt";
// String output = "C:\fenlei";
//细节
FileSystem hdfs = FileSystem.get(conf);
if(hdfs.exists(out)) hdfs.delete(out);
Job job = Job.getInstance(conf,"fenlei");
job.setJarByClass(dashuju2.class);
job.setMapperClass(fenleiMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(fenleiReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, in);
FileOutputFormat.setOutputPath(job, out);
// FileInputFormat.setInputPaths(job, new Path(args[0]));
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
package dshuju1;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.LineReader;
public class dashuju3 {
public static class amapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] line1 = line.split("\t");
// System.out.println(line1[0]);
context.write(new Text(line1[0]),new Text("1"));
}
}
public static class amreducer extends Reducer<Text, Text, Text, Text>{
public Map<String,Integer> map;
@Override
protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
// Map<String,Integer> map = null ;
// String sss = "C:/goodbadfenlei.txt";
Configuration conf = context.getConfiguration();
String ss = conf.get("ss");
try {
map=Utilsss.getMapFormHDFS(ss);
} catch (Exception e) {
// // TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
protected void reduce(Text key, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
String[] label = key.toString().split(":");
// String aaaa = label[1];
// System.out.println(key);
// System.out.println(label[0]);
// System.out.println(label[1]);
// Integer b= 0;
// Integer a= 0;
double tiaojian;
System.out.println(label[0]);
// if(label[0].equals("1")){
//如果map中有标签为1的那组
if(map.containsKey(key)){
Integer a = map.get(key.toString());
Integer b = 0;
//先取出这个单词出现的次数
if(label[0].equals("1")){
//若果这个单词的标签是1
String labelb = new String("-1"+":" +label[1]);
if(map.containsKey(labelb)){
//如果含有标签为-1的map的那组
b = map.get(labelb.toString());
//获取-1下面的单词数量
}
double a1 = Integer.parseInt(a.toString());
double b1 = Integer.parseInt(b.toString());
tiaojian = (a1+1)/(a1+b1+2);
// tiaojian = (a+1)/(b+1);
// System.out.println(tiaojian);
String tiaojians = String.valueOf(tiaojian);
context.write(key, new Text(tiaojians));
}else{
//这组标签为-1的单词
String labelb = new String("1"+":" +label[1]);
if(map.containsKey(labelb)){
//如果含有标签为-1的map的那组
b = map.get(labelb.toString());
//获取-1下面的单词数量
}
double a1 = Integer.parseInt(a.toString());
double b1 = Integer.parseInt(b.toString());
tiaojian = (a1+1)/(a1+b1+2);
// tiaojian = (a+1)/(b+1);
// System.out.println(tiaojian);
String tiaojians = String.valueOf(tiaojian);
context.write(key, new Text(tiaojians));
}
//取出这组标签为1的单词数量
// String labelb = "-1"+":" +label[1];
//转换标签,看标签为-1的时候的那组
// System.out.println(labelb);
// Integer b= 0;
// Boolean aa = map.containsKey(labelb);
// if(map.containsKey(labelb)){
//如果含有标签为-1的map的那组
// b = map.get(labelb.toString());
// }
// }
// }else{
// //如果不含有就算了
// }
Object a;
// double a1 = Integer.parseInt(a.toString());
// double b1 = Integer.parseInt(b.toString());
// System.out.println(a+"==========="+b);
// //计算条件概率
// tiaojian = a/b;
// tiaojian = (a1+1)/(a1+b1+2);
// System.out.println(tiaojian);
// context.write(key, new Text("tiaojian"));
// }
// }else{
// context.write(key, new Text("tiaojian"));
// }
//
// }else{
// //如果map中有标签为-1的那组
// Integer a = map.get(key.toString());
// //取出这组标签为1的单词数量
// String labelb = "1"+":" +label[1];
// //转换标签,看标签为-1的时候的那组
System.out.println(labelb);
// Integer b= 0;
// if(map.containsKey(labelb)){
// //如果含有标签为1的map的那组
// b = map.get(labelb);
// }else{
// //如果不含有就算了
// }
// double a1 = Integer.parseInt(a.toString());
// double b1 = Integer.parseInt(b.toString());
//
// System.out.println(a+"==========="+b);
//
// //计算条件概率
tiaojian = a/b;
// tiaojian = (a1+1)/(a1+b1+2);
// System.out.println(tiaojian);
// context.write(key, new Text("tiaojian"));
//
// }
// context.write(key, new Text("tiaojian"));
//
//
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String sss = "C:/goodbadfenlei.txt";
conf.set("ss", sss);
Job job = Job.getInstance(conf);
job.setJarByClass(dashuju3.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(amapper.class);
job.setReducerClass(amreducer.class);
//
// Path in = new Path("C:/jiangshanshan.txt");
Path in = new Path("C:/goodbadfenlei.txt");
Path out = new Path("C:/goodbadtiaojian");
FileSystem hdfs = FileSystem.get(conf);
if(hdfs.exists(out)) hdfs.delete(out);
FileInputFormat.setInputPaths(job, in);
FileOutputFormat.setOutputPath(job, out);
job.waitForCompletion(true);
}
}
class Utilsss{
public static Map<String,Integer> getMapFormHDFS(String input) throws Exception{
Configuration conf = new Configuration();
Path path = new Path(input);
FileSystem fs = path.getFileSystem(conf);
FileStatus[] status = fs.listStatus(path);
Map<String,Integer> map = new HashMap();
for(int i= 0;i < status.length;i++){
if(status[i].isFile()){
FSDataInputStream infs = fs.open(status[i].getPath());
LineReader reader = new LineReader(infs,conf);
Text line = new Text();
while (reader.readLine(line) > 0){
String[] temp = line.toString().split("\t");
// System.out.println("bbbbbbbbbbbbbbbbbbbbbbbbb");
// System.out.println("取map里面的数据的数组");
// System.out.println(temp);
// System.out.println("bbbbbbbbbbbbbbbbbbbbbbbbb");
map.put(temp[0].toString(), Integer.parseInt(temp[1]));
// map.put(new String("1"), 10);
}
reader.close();
}
}
return map;
}
}
package dshuju1;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.LineReader;
public class dashuju4 {
public static class amapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
// System.out.println(line);
context.write(new Text(line),new Text("1"));
}
}
public static class amreducer extends Reducer<Text, Text, Text, Text>{
public Map<String,String> map;
@Override
protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
String ss = conf.get("sss");
try {
map=Utilss.getMapFormHDFS(ss);
// System.out.println(map);
} catch (Exception e) {
//
e.printStackTrace();
}
}
@Override
protected void reduce(Text key, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
// 一下 一切 一室
// System.out.println(key);
String[] str = key.toString().split(" ");
double sum1 = 1.0;
double sum2 = 1.0;
ArrayList<Double> sum_good= new ArrayList<Double>();
ArrayList<Double> sum_bad = new ArrayList<Double>();
for(int i =0;i<str.length;i++){
System.out.println(str[i]);
//int a = Integer.parseInt(line[i])
// //首先拼接字符串
String label1 = new String("-1"+":" +str[i]);
// System.out.println(label1);
String label2 = new String("1" +":" +str[i]);
// System.out.println(map);
if(map.containsKey(label1)){
//如果坏的评论里面有的话
String a = map.get(label1);
double ddd = Double.parseDouble(map.get(label1));
double ccc = ddd;
System.out.println(ccc);
try {
double bad = Double.parseDouble(a);
sum_bad.add(bad);
} catch (NumberFormatException e) {
double bad = 1.0;
sum_bad.add(bad);
}
//取出坏的评论
// int bad = Integer.parseInt(a);
// sum_bad.add(bad);
}else if(map.containsKey(label1)){
// //如果有好的评论
String a = map.get(label2);
try {
double good = Double.parseDouble(a);
sum_good.add(good);
}
catch (NumberFormatException e) {
double good = 1.0;
sum_good.add(good);
}
// }
// int good = Integer.parseInt(a.toString());
// sum_good.add(good);
}else{
sum_good.add(1.0);
sum_bad.add(1.0);
}
for(int j = 0;j<sum_good.size();j++){
sum1 = ((double)sum_good.get(j) * sum1);
}
for(int j = 0;j<sum_bad.size();j++){
sum2 = ((double)sum_bad.get(j) * sum2);
}
// if(sum1 > sum2){
// context.write(key, new Text("这是好的话"));
// }else{
// context.write(key, new Text("这是坏的话"));
// }
//
//
//
}
System.out.println(sum1+"========="+sum2);
if(sum1 > sum2){
context.write(key, new Text("这是好的话"));
}else{
context.write(key, new Text("这是坏的话"));
}
// context.write(new Text("1"),key);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String sss = "C:/goodbadtiaojian.txt";
conf.set("sss", sss);
Job job = Job.getInstance(conf);
job.setJarByClass(dashuju1.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(amapper.class);
job.setReducerClass(amreducer.class);
//
Path in = new Path("C:/nishi.txt");
// Path in = new Path("C:/good.txt");
// Path in = new Path("C:/daye/a.txt");
Path out = new Path("C:/nnnnnnnnnnnnsssssssssssss");
FileSystem hdfs = FileSystem.get(conf);
if(hdfs.exists(out)) hdfs.delete(out);
FileInputFormat.setInputPaths(job, in);
FileOutputFormat.setOutputPath(job, out);
job.waitForCompletion(true);
}
}
class Utilss{
public static Map<String,String> getMapFormHDFS(String input) throws Exception{
Configuration conf = new Configuration();
Path path = new Path(input);
FileSystem fs = path.getFileSystem(conf);
FileStatus[] status = fs.listStatus(path);
Map<String,String> map = new HashMap();
for(int i= 0;i < status.length;i++){
if(status[i].isFile()){
FSDataInputStream infs = fs.open(status[i].getPath());
LineReader reader = new LineReader(infs,conf);
Text line = new Text();
while (reader.readLine(line) > 0){
String[] temp = line.toString().split("\t");
// System.out.println("bbbbbbbbbbbbbbbbbbbbbbbbb");
// System.out.println("取map里面的数据的数组");
// System.out.println(temp);
// System.out.println("bbbbbbbbbbbbbbbbbbbbbbbbb");
map.put(temp[0].toString(), temp[1].toString());
// map.put(new String("1"), 10);
}
reader.close();
}
}
return map;
}
}