一、Hadoop
1.创建hadoop用户
sudo useradd -m username -s /bin/bash #/bin/bash作为shell
sudo passwd 123 #设置密码,如:123
sudo adduser username
sudo #为用户增加管理员权限
su - lln #切换当前用户为lln
su root #进入超级用户
2.文件操作
sudo tar zxvf /exitpath/zxvf filename.tar.gz -C /topath #解压文件
vim ~/.bashrc #修改配置文件
source ~/.bashrc #生效配置文件
sudo apt-get install vim #apt-get安装
sudo chown lln: ./eclipse.desktop #修改所有者
sudo chmod u+x eclipse.desktop #修改权限
3.启动hadoop
NameNode 的格式化: ./bin/hdfs namenode -format
启动hadoop:./sbin/start-all.sh
查看java进程:jps
Web 界面查看: http://localhost:50070
离开安全模式:hdfs dfsadmin -safemode leave
4.hadoop shell命令
以运行wordcount为例
找到运行路径:cd /usr/local/hadoop-2.6.5/share/hadoop/mapreduce
创建目录: hadoop fs -mkdir -p /data/wordcount
hadoop fs -mkdir -p /output/
本地创建输入文件:vim /usr/inputWord
上传至HDFS:hadoop fs -put /usr/inputWord /data/wordcount
查看:hadoop fs -ls /data/wordcount
hadoop fs -text /data/wordcount/inputWord
运行:
hadoop jarhadoop-mapreduce-examples-2.6.5.jar wordcount /data/wordcount /output/wordcountresult
shell下java程序的编译与运行
1.查看java路径:which java
2.编译(.java—->.class):javac Filename.java
3.生成jar包:jar cvf Filename.jar Filename*class
4.hadoop下运行: /usr/local/hadoop-2.6.5/bin/hadoop jar Filename.jar Filename
Hadoop fs官网: http://hadoop.apache.org/docs/r2.6.5/hadoop-project-dist/hadoop-common/FileSystemShell.html
hdfs dfs -appendToFile <localsrc> ... <dst>
hdfs dfs -cat URI [URI ...]
hdfs dfs -copyFromLocal <localsrc> URI
hdfs dfs -copyToLocal [-ignorecrc] [-crc] URI <localdst>
hdfs dfs -count [-q] [-h] <paths>
hdfs dfs -cp [-f] [-p | -p[topax]] URI [URI ...] <dest>
hdfs dfs -get [-ignorecrc] [-crc] <src> <localdst>
hdfs dfs -ls [-R] <args>
hdfs dfs -mkdir [-p] <paths>
hdfs dfs -moveFromLocal <localsrc> <dst>
hdfs dfs -moveToLocal [-crc] <src> <dst>
hdfs dfs -mv URI [URI ...] <dest>
hdfs dfs -put <localsrc> ... <dst>
hdfs dfs -rm [-f] [-r|-R] [-skipTrash] URI [URI ...]
hdfs dfs -text <src>
hdfs dfs -touchz pathname
5.hadoop java API 编程
Hadoop API官网:
http://hadoop.apache.org/docs/r2.6.5/api/index.html
控制台打印
System.out.println("append or overwrite?" );
Scanner sc = new Scanner(System.in);
String s = sc.next();
if(s.equals("append")) {
fs.append(path);
System.out.println("文件已追加" );
}else {
fs.create(path, true);
System.out.println("文件已覆盖" );
}
判断文件是否存在
fs.exists(path)
判断要删除的文件是否存在
fs.deleteOnExit(path)
编程顺序
private static final String HDFS = “hdfs://localhost:9000”;
String remote= HDFS + “/user/upload”; //HDFS文件path
String local = “/home/lln/下载/test”; //本地文件path
private Configuration conf = new Configuration() ;
FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
fs.close();
//上传文件
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(HDFS + "/test");
fs.append(path);//文件已存在,追加至文件末尾
fs.create(path, true);//文件已存在,覆盖文件
fs.copyFromLocalFile(new Path(local), new Path(remote));//文件不存在,上传
fs.close();
//下载文件
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(local);
fs.rename(new Path(local), new Path("/home/lln/文档/test1"));//文件已存在,重命名为"/home/lln/文档/test1"
fs.copyToLocalFile(new Path(remote), new Path(local));//下载文件至本地
fs.close();
//显示 HDFS 中指定的文件的读写权限、大小、创建时间、路径等信息
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
FileStatus fileStatus = fs.getFileStatus(path);
System.out.println("路径:"+fileStatus.getPath());
System.out.println("大小:"+fileStatus.getBlockSize());
fs.close();
//递归列出所有文件夹
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
FileStatus[] fileList = fs.listStatus(path);
for (FileStatus f : fileList) {
System.out.printf("name: %s | folder: %s | size: %d\n", f.getPath(), f.isDir() , f.getLen());
try{
FileStatus[] fileListR = fs.listStatus(f.getPath());
for(FileStatus fr:fileListR){
System.out.printf("name: %s | folder: %s | size: %d\n", fr.getPath(), fr.isDir() , fr.getLen());
}
}finally{
continue;
}
}
fs.close();
//创建和删除文件
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
fs.create(path);//创建
fs.delete(path);//删除
fs.close();
//创建和删除目录
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
fs.mkdirs(path);//创建
fs.listStatus(path);
fs.delete(path);//删除
fs.close();
//向 HDFS 中指定的文件追加内容
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
FileOutputStream out=new FileOutputStream(local);
fs.append(path);
IOUtils.closeStream(out);
fs.close();
//删除 HDFS 中指定的文件
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
fs.delete(path);
fs.close();
//将文件从源路径移动到目的路径
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
Path path1 = new Path(local);
fs.moveFromLocalFile(path1,path);
fs.close();
//显示指定文件内容1
FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
FSDataInputStream fsdis = null;
System.out.println("cat: " + folder);
try {
fsdis =fs.open(path);
IOUtils.copyBytes(fsdis, System.out, 4096, false);
} finally {
IOUtils.closeStream(fsdis);
fs.close();
}
//显示指定文件内容2
FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
Path path = new Path(folder);
FSDataInputStream fp = fs.open(path) ;
InputStreamReader isr = new InputStreamReader(fp) ;
BufferedReader br = new BufferedReader(isr) ;
String line = br.readLine() ;
while(line !=null){
System.out.println(line);
line = br.readLine() ;
}
//显示指定文件内容3
FileSystem fs = FileSystem. get(URI.create (uri), conf);
InputStream in = null;
try {
in = fs.open( new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
}
二、Hbase
运行:
./bin/hbase shell
Shell命令
create ‘test’, ‘cf’
list ‘test’
describe ‘test’
put ‘test’, ‘row1’, ‘cf:a’, ‘value1’
scan ‘test’
get ‘test’, ‘row1’
disable ‘test’
drop ‘test’
Quit
停止:
./bin/stop-hbase.sh
//列出 HBase 所有的表的相关信息,例如表名
public static void listTables() throws IOException {
init();
HTableDescriptor hTableDescriptors[] = admin.listTables();
for(HTableDescriptor hTableDescriptor: hTableDescriptors){
System.out.println("表名:"+hTableDescriptor.getNameAsString());
}
close();
}
//在终端打印出指定的表的所有记录数据;
public static void scanData(String tableName) throws IOException {
init();
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan=new Scan();
ResultScanner scanner=table.getScanner(scan);
for(Result result:scanner) {
showCell(result);
}
close();
}
//向已经创建好的表添加和删除指定的列族或列;
public static void insertRow(String tableName,String rowkey,String colFamily,String col,String val) throws IOException {
init();
Table table = connection.getTable(TableName.valueOf(tableName));
Put put = new Put(rowkey.getBytes());
put.addColumn(colFamily.getBytes(), col.getBytes(), val.getBytes());
table.put(put);
table.close();
close();
}
public static void deleteRow(String tableName,String rowkey,String colFamily,String col) throws IOException {
init();
Table table = connection.getTable(TableName.valueOf(tableName));
Delete delete = new Delete(rowkey.getBytes());
//删除指定列族的所有数据
//delete.addFamily(colFamily.getBytes());
//删除指定列的数据
delete.addColumn(colFamily.getBytes(), col.getBytes());
table.delete(delete);
table.close();
close();
}
//清空指定的表的所有记录数据;
public static void clearRows(String tableName) throws IOException {
init();
TableName tn = TableName.valueOf(tableName);
admin.disableTable(tn);
admin.deleteTable(tn);
HTableDescriptor hTableDescriptor =new HTableDescriptor(tableName);
admin.createTable(hTableDescriptor);
close();
}
//统计表的行数。
public static void countRows(String tableName) throws IOException {
init();
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan=new Scan();
ResultScanner scanner=table.getScanner(scan);
int num=0;
for(Result result=scanner.next();result!=null;result=scanner.next()) {
num++;
}
System.out.println("行数:"+num);
scanner.close();
close();
}
//创建表
public static void createTable(String myTableName,String[] colFamily) throws IOException {
init();
TableName tableName = TableName.valueOf(myTableName);
if(admin.tableExists(tableName)){
System.out.println("table is exists!");
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);
for(String str:colFamily){
HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(str);
hTableDescriptor.addFamily(hColumnDescriptor);
}
admin.createTable(hTableDescriptor);
System.out.println("create table success");
close();
}
//添加记录
public static void addRecord(String tableName,String row,String[] fields,String[] values) throws IOException {
init();
Table table = connection.getTable(TableName.valueOf(tableName));
for(int i=0;i!=fields.length;i++) {
Put put = new Put(row.getBytes());
String[] cols=fields[i].split(":");
put.addColumn(cols[0].getBytes(), cols[1].getBytes(), values[1].getBytes());
table.put(put);
}
table.close();
close();
}
//浏览表 tableName 某一列的数据
public static void scanColumn(String tableName,String column) throws IOException {
init();
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan=new Scan();
scan.addFamily(Bytes.toBytes(column));
ResultScanner scanner=table.getScanner(scan);
for(Result result=scanner.next();result!=null;result=scanner.next()) {
showCell(result);
}
table.close();
close();
}
//修改表
public static void modifyData(String tableName,String row,String column,String val) throws IOException {
init();
Table table = connection.getTable(TableName.valueOf(tableName));
Put put=new Put(row.getBytes());
put.addColumn(column.getBytes(), null, val.getBytes());
table.put(put);
table.close();
close();
}
//删除表
public static void deleteRow(String tableName,String rowkey,String colFamily,String col) throws IOException {
init();
Table table = connection.getTable(TableName.valueOf(tableName));
Delete delete = new Delete(rowkey.getBytes());
//删除制定列族的所有数据
//delete.addFamily(colFamily.getBytes());
//删除指定列的数据
//delete.addColumn(colFamily.getBytes(), col.getBytes());
table.delete(delete);
table.close();
close();
}
三、文件操作
//文件合并和去重
public class Merge {
public static class Map extends Mapper<Object, Text, Text, Text> {
private static Text text = new Text();
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
text = value;
context.write(text, new Text(""));
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
context.write(key, new Text(""));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://localhost:9000/user/lln");
String[] otherArgs = new String[] { "input", "output" };
if (otherArgs.length != 2) {
System.err.println("Usage: Merge and duplicate removal <in> <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "Merge and duplicate removal");
job.setJarByClass(Merge.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
MergeSort.java
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MergeSort {
public static class Map extends
Mapper<Object, Text, IntWritable, IntWritable> {
private static IntWritable data = new IntWritable();
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
data.set(Integer.parseInt(line));
context.write(data, new IntWritable(1));
}
}
public static class Reduce extends
Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
private static IntWritable linenum = new IntWritable(1);
public void reduce(IntWritable key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
for (IntWritable val : values) {
context.write(linenum, key);
linenum = new IntWritable(linenum.get() + 1);
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://localhost:9000");
String[] otherArgs = new String[] { "input2", "output2" }; /* 直接设置输入参数 */
if (otherArgs.length != 2) {
System.err.println("Usage: mergesort <in> <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "mergesort");
job.setJarByClass(MergeSort.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
STjoin.java
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class STjoin {
public static int time = 0;
public static class Map extends Mapper<Object, Text, Text, Text> {
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String child_name = new String();
String parent_name = new String();
String relation_type = new String();
String line = value.toString();
int i = 0;
while (line.charAt(i) != ' ') {
i++;
}
String[] values = { line.substring(0, i), line.substring(i + 1) };
if (values[0].compareTo("child") != 0) {
child_name = values[0];
parent_name = values[1];
relation_type = "1";
context.write(new Text(values[1]), new Text(relation_type + "+"
+ child_name + "+" + parent_name));
relation_type = "2";
context.write(new Text(values[0]), new Text(relation_type + "+"
+ child_name + "+" + parent_name));
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
if (time == 0) {
context.write(new Text("grand_child"), new Text("grand_parent"));
time++;
}
int grand_child_num = 0;
String grand_child[] = new String[10];
int grand_parent_num = 0;
String grand_parent[] = new String[10];
Iterator ite = values.iterator();
while (ite.hasNext()) {
String record = ite.next().toString();
int len = record.length();
int i = 2;
if (len == 0)
continue;
char relation_type = record.charAt(0);
String child_name = new String();
String parent_name = new String();
while (record.charAt(i) != '+') {
child_name = child_name + record.charAt(i);
i++;
}
i = i + 1;
while (i < len) {
parent_name = parent_name + record.charAt(i);
i++;
}
if (relation_type == '1') {
grand_child[grand_child_num] = child_name;
grand_child_num++;
} else {
grand_parent[grand_parent_num] = parent_name;
grand_parent_num++;
}
}
if (grand_parent_num != 0 && grand_child_num != 0) {
for (int m = 0; m < grand_child_num; m++) {
for (int n = 0; n < grand_parent_num; n++) {
context.write(new Text(grand_child[m]), new Text(
grand_parent[n]));
}
}
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://localhost:9000");
String[] otherArgs = new String[] { "input3", "output3" };
if (otherArgs.length != 2) {
System.err.println("Usage: Single Table Join <in> <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "Single table join ");
job.setJarByClass(STjoin.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}