Configuration conf = context.getConfiguration();
FileSystem fs = FileSystem.get(conf);
// FSDataInputStream fin = fs.open(new Path(conf.get("emotionPath")));
FSDataInputStream fin = fs.open(new Path("/user/lvxinjian/negative.txt"));
BufferedReader in = null;
String line;
try {
in = new BufferedReader(new InputStreamReader(fin, "UTF-8"));
while ((line = in.readLine()) != null) {
wordSet.add(line);
}
System.out.println(wordSet.size());
} finally {
if(in != null)
in.close();
}
public class GetSentenceWithPos {
public void read () throws IOException
{
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
SequenceFile.Reader sreader = null;
try {
sreader = new SequenceFile.Reader(fs, new Path("/user/lvxinjian/tfidf/mediafile/dictionary.file-0"),conf);
Text key = new Text();// key 和 value的类型要和当前读取文件的key val 一致
IntWritable val = new IntWritable();
HashMap<Integer , String> WordList = new HashMap<Integer, String>();//词典
System.out.println("load dictionary 0...");
while (sreader.next(key, val)) {
WordList.put(val.get(),key.toString());
}
System.out.println("load dictionary 1...");
sreader = null;
sreader = new SequenceFile.Reader(fs, new Path("/user/lvxinjian/tfidf/mediafile/dictionary.file-1"),conf);
while (sreader.next(key, val)) {
WordList.put(val.get(),key.toString());
}
System.out.println("load dictionary 2...");
sreader = null;
sreader = new SequenceFile.Reader(fs, new Path("/user/lvxinjian/tfidf/mediafile/dictionary.file-2"),conf);
while (sreader.next(key, val)) {
WordList.put(val.get(),key.toString());
}
Configuration conf1 = new Configuration();;
FileSystem fs2 = FileSystem.get(conf1);
FSDataInputStream fin = fs2.open(new Path("/user/lvxinjian/showTfidf49AllData/part-r-00000"));
BufferedReader in = null;
String line;
System.out.println("load wordindex_count...");
ArrayList<String> wordInfo = new ArrayList<String>(); //mapreduce结果
in = new BufferedReader(new InputStreamReader(fin, "UTF-8"));
while ((line = in.readLine()) != null) {
wordInfo.add(line);
}
System.out.println("sizef:\t"+ wordInfo.size());
System.out.println("get word ...");
ArrayList<String> lstResult = new ArrayList<String>();
int count = 0;
for(String str : wordInfo){
if(count % 1000 == 0)
System.out.println(count);
count++;
String [] arr = str.split("\t");
if(arr.length != 2)
continue;
if(WordList.containsKey(Integer.parseInt(arr[0]))){
String word = WordList.get(Integer.parseInt(arr[0]));
lstResult.add(word + "\t" + arr[1]);
}
}
System.out.println("saving....");
FileTool.SaveListToFile(lstResult, "./2013052802.txt", false, Charset.forName("utf-8"));
}
finally {
IOUtils.closeStream(sreader);
}
}
static public void main(String [] args)
{
try {
GetSentenceWithPos getSentenceWithPos = new GetSentenceWithPos();
getSentenceWithPos.read();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}