/**
* First assignment for Big data systems and big data analysis
* @author LMC
* @version V2.0
* @date: 2018.04.23
*/
import java.io.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
/**
* @ClassName: Hw1Grp2
* @Description: Main Class
* @author:LMC
* @date: 2018.04.23
*/
public class Hw1Grp2 {
private HTable table;
public void setTable(HTable table) {
this.table = table;
}
public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException, URISyntaxException {
/**
* @param java Hw1Grp2 R=/hw1/lineitem.tbl groupby:R0 'res:count,avg(R2),max(R3)'
*/
// create table descriptor
String tableName = "Result";
HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
// create column descriptor
String colnumFamily = "res";
HColumnDescriptor cf = new HColumnDescriptor(colnumFamily);
htd.addFamily(cf);
// configure HBase
Configuration configuration = HBaseConfiguration.create();
HBaseAdmin hAdmin = new HBaseAdmin(configuration);
if (hAdmin.tableExists(tableName)) {
System.out.println("Table already exists");
}
else {
hAdmin.createTable(htd);
System.out.println("table "+tableName+ " created successfully");
}
hAdmin.close();
HTable table = new HTable(configuration,tableName);
String file = StringUtils.substringAfter(args[0], "=");
String keyNum = StringUtils.substringAfter(args[1], "R");
String colsName = StringUtils.substringAfter(args[2], ":");
int rowKey = Integer.parseInt(keyNum);
String colStr = null;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(file), conf);
Path path = new Path(file);
FSDataInputStream inStream = fs.open(path);
BufferedReader in = new BufferedReader(new InputStreamReader(inStream));
String[] tempStr = colsName.split(",");
Map<String, Integer> temp = new HashMap<String, Integer>();
for(int i = 0; i < tempStr.length; i++) {
if(!tempStr[i].equals("count")) {
temp.put(StringUtils.substringBefore(tempStr[i], "("), Integer.parseInt(StringUtils.substringBetween(tempStr[i],"R", ")")));
} else {
temp.put(tempStr[i], rowKey);
}
}
System.out.println("file:" + file);
for(String key : temp.keySet()) {
System.out.println(key + ":" + temp.get(key));
}
Hw1Grp2 h = new Hw1Grp2();
h.setTable(table);
h.mainProcess(file, rowKey, temp);
System.out.println("sucessfully");
}
/**
* @Title: ${mainProcess}
* @Description: ${hash based group by}
* @param: ${file,rowKey,temp}
*/
public void mainProcess(String file, int rowKey, Map<String, Integer> args) throws IOException, URISyntaxException {
String colStr = null;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(file), conf);
Path path = new Path(file);
FSDataInputStream inStream = fs.open(path);
BufferedReader in = new BufferedReader(new InputStreamReader(inStream));
/**
* created hashtable
*/
Map<String, Integer> countMap = new HashMap<String, Integer>();
Map<String, Integer> sumMap = new HashMap<String, Integer>();
Map<String, Integer> maxMap = new HashMap<String, Integer>();
Map<String, Float> avgMap = new HashMap<String,Float>();
int maxColnum = -1, avgColnum = -1, sumColnum =-1, countColnum = -1;
/**
* containsKey() testing
*/
if(args.containsKey("count")) {
countColnum = args.get("count");
}
if(args.containsKey("avg")) {
avgColnum = args.get("avg");
}
if(args.containsKey("max")) {
maxColnum = args.get("max");
}
if(args.containsKey("sum")) {
sumColnum = args.get("sum");
}
String str;
while ((str=in.readLine())!=null) {
String[] colnum = str.split("\\|");
if(countMap.containsKey(colnum[rowKey])) {
countMap.put(colnum[rowKey], countMap.get(colnum[rowKey]) +1 );
} else {
countMap.put(colnum[rowKey], 1);
}
if(sumColnum != -1) {
if(sumMap.containsKey(colnum[rowKey])) {
sumMap.put(colnum[rowKey], sumMap.get(colnum[rowKey]) +Integer.parseInt(colnum[sumColnum]) );
} else {
sumMap.put(colnum[rowKey], Integer.parseInt(colnum[sumColnum]));
}
}
if(avgColnum != -1) {
if(avgMap.containsKey(colnum[rowKey])) {
avgMap.put(colnum[rowKey], avgMap.get(colnum[rowKey]) +Float.parseFloat(colnum[avgColnum]) );
} else {
avgMap.put(colnum[rowKey], Float.parseFloat(colnum[avgColnum]));
}
}
if(maxColnum != -1) {
if(maxMap.containsKey(colnum[rowKey])) {
if(Integer.parseInt(colnum[maxColnum]) > maxMap.get(colnum[rowKey]))
maxMap.put(colnum[rowKey], Integer.parseInt(colnum[maxColnum]));
} else {
maxMap.put(colnum[rowKey], Integer.parseInt(colnum[maxColnum]));
}
}
}
/**
*insert*into hashtable from hashmap
*/
for(String key : countMap.keySet()) {
if(countColnum != -1) {
colStr = "count";
Put put = new Put(key.getBytes());
put.add("res".getBytes(),colStr.getBytes(),(countMap.get(key) + "").getBytes());
table.put(put);
}
if(avgColnum != -1) {
colStr = "avg(R" + avgColnum + ")";
avgMap.put(key, (float)Math.round(avgMap.get(key)/countMap.get(key)*100)/100);
Put put = new Put(key.getBytes());
put.add("res".getBytes(),colStr.getBytes(),(avgMap.get(key) + "").getBytes());
table.put(put);
}
if(maxColnum != -1) {
colStr = "max(R" + maxColnum + ")";
Put put = new Put(key.getBytes());
put.add("res".getBytes(),colStr.getBytes(),(maxMap.get(key) + "").getBytes());
table.put(put);
}
if(sumColnum != -1) {
colStr = "sum(R" + sumColnum + ")";
Put put = new Put(key.getBytes());
put.add("res".getBytes(),colStr.getBytes(),(sumMap.get(key) + "").getBytes());
table.put(put);
}
}
System.out.println("Main process success");
}
public static boolean isInteger(String colStr){
for(int i=colStr.length();--i>=0;){
int chr=colStr.charAt(i);
if(chr<48 || chr>57)
return false;
}
return true;
}
}
readme
--------------------------------------------------------------------------------
README
--------------------------------------------------------------------------------
PLEASE save your code and data to a portable drive!!!
WARNING: this VM will be cleaned without notice after you log out.
Your code and data on the VM will get lost!!!
1. start hdfs and hbase
$ start-dfs.sh
$ start-hbase.sh
2. stop hdfs and hbase
r
$ stop-hbase.sh
$ stop-dfs.sh
3. hdfs directory is ~/work/hdfs
4. To compile your java code MyCode.java (implementing class MyCode)
$ javac MyCode
then to run it
$ java MyCode <args>
5. compile and run HDFSTest.java
$ javac HDFSTest.java
$ java HDFSTest
6. compile and run HBaseTest.java
$ javac HBaseTest.java
$ java HBaseTest
check if we have successfully create mytable and put the new row
start hbase shell and run command in hbase shell
$ hbase shell
hbase(main):001:0> scan 'mytable'
ROW COLUMN+CELL
abc column=mycf:a, timestamp=1428459927307, value=789
1 row(s) in 1.8950 seconds
hbase(main):002:0> disable 'mytable'
0 row(s) in 1.9050 seconds
hbase(main):003:0> drop 'mytable'
0 row(s) in 1.2320 seconds
hbase(main):004:0> exit
--------------------------------------------------------------------------------