A.hadoop rpc使用
a.RPC 服务器端
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.ipc.VersionedProtocol;
public class TestIPC {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
new TestIPC().start();
}
public void start(){
try {
Configuration conf = new Configuration();
ServerImpl1 s1 = new ServerImpl1();
Server server = RPC.getServer(s1, "localhost", 16000, conf);
server.start();
ServerImpl1 s2 = new ServerImpl1();
Server server2 = RPC.getServer(s2, "localhost", 16008, conf);
server2.start();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//定义接口
public interface Serverif1 extends VersionedProtocol{
public String mymethod(String args);
}
//服务器端的实现
public static class ServerImpl1 implements Serverif1{
//业务逻辑的实现
public String mymethod(String args){
return "hello,"+args+" from Serverif1";
}
@Override
public long getProtocolVersion(String protocol, long clientVersion)
throws IOException {
// TODO Auto-generated method stub
return 123456l;
}
}
}
b.RPC 客户端
import java.lang.reflect.Method;
import java.net.InetSocketAddress;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ipc.RPC;
public class TestIPCClient {
private Configuration conf = new Configuration();
public String method_proxy(String args) throws Exception{
InetSocketAddress sa=new InetSocketAddress("localhost",16000);
TestIPC.Serverif1 si=(TestIPC.Serverif1) RPC.getProxy(TestIPC.Serverif1.class,123456l,sa, conf);
return si.mymethod(args);
}
public Object[] method_call(String args) throws Exception{
InetSocketAddress[] sas={
new InetSocketAddress("localhost",16000),
new InetSocketAddress("localhost",16008)
};
Method m1 = TestIPC.Serverif1.class.getMethod("mymethod", new Class[] { String.class });
String[][] params = new String[][]{{"param1"},{"param2"}};
return (Object[])RPC.call(m1, params, sas, conf);
}
public static void main(String args[]) throws Exception {
String remoteIP="localhost";
TestIPCClient c= new TestIPCClient();
System.out.println(c.method_proxy("YaoLei"));
Object[] result = c.method_call("Hadoop");
for(Object rs:result){
System.out.println(rs.toString());
}
}
}
B.hadoop启动与关闭
安装:http://book.douban.com/people/48778472/annotation/4817792/
参考:http://hadoop.apache.org/common/docs/stable/single_node_setup.html
启动与关闭选项包括五个部分:namenode,datanode,seconarynamenode,jobtracker,tasktracker.
Administrator@NB-LEI-YAO /cygdrive/e/d-tools/hadoop-0.21.0
$ bin/start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-mapred.sh
namenode running as process 3452. Stop it first.
localhost: datanode running as process 3056. Stop it first.
localhost: secondarynamenode running as process 4664. Stop it first.
jobtracker running as process 4880. Stop it first.
localhost: tasktracker running as process 2420. Stop it first.
Administrator@NB-LEI-YAO /cygdrive/e/d-tools/hadoop-0.21.0
$ bin/stop-all.sh
This script is Deprecated. Instead use stop-dfs.sh and stop-mapred.sh
stopping namenode
localhost: stopping datanode
localhost: stopping secondarynamenode
stopping jobtracker
localhost: stopping tasktracker
Administrator@NB-LEI-YAO /cygdrive/e/d-tools/hadoop-0.21.0
C.一些命令
1.bin/start-all.sh --启动
2.mkdir ty_input --创建目录ty_input
cp conf/* ty_input
3.bin/hadoop fs -put ./ty_input ty_input --把本地文件添加到hdfs中
4.bin/hadoop fs -ls
5.bin/hadoop fs -rmr output* --删除以output开头的文件夹
6.bin/hadoop jar hadoop*examples*.jar wordcount ty_input ty_output
--统计ty_input下文件夹里文件的字数 输出到ty_output文件夹
7.bin/hadoop fs -ls --可以看到两个ty_input 和 ty_output文件夹
8.bin/hadoop fs -ls ty_output/* --可以看到ty_output下的文件结构
9.bin/hadoop fs -cat ty_output/* --可以看到ty_output下的文件内容
10.bin/hadoop fs -put ./ty_input/*.sql ty_input
11.bin/hadoop jar hadoop*examples*.jar wordcount ty_input/misc.sql ty_output2
12.bin/hadoop jar hadoop*examples*jar grep ty_input/misc.sql ty_output3 'iqcar*'
13.bin/hadoop fs -cat ty_output3/*
14.jps --对应ps命令,java自带工具
3964 Jps
3172 Program
jstat -- java 自带工具
jstat -gcutil -t 3172 200 10 --即可每200毫秒连续打印10次带有时间戳的GC统计信息
15.bin/hadoop dfsadmin -report
D.阅读笔记
1.RPC
a.JobClient通过RPC机制调用JobTracker的submitJob方法来提交job.
Ps.即JobClient端生成JobTracker的代理对象
b.TaskTracker通过RPC机制调用JobTracker的heartbeat来检测心跳
c.JobTracker启动时根据mapreduce.jobtracker.address参数的地址来启动一个RPC Server
TaskTracker回启动一个taskReportServer的IPC
d.NameNode也会启动一个RPC Server,用来给DataNode提供服务
f.DataNode会启动一个RPC Server,用来在DataNode之间通信
g.DataNode会向NameNode发送心跳
h.DataNode和TaskTracker均实现了Runnable接口
2.命令 whoami 可以查看当前用户
3.数据以数据块形式分布在多个DataNode上,每个DataNode只有一个数据块,一个数据块在多个DataNode上有备份.
4.DataNode之间的数据块复制由DataXceiverServer完成,通过socket读写数据流形式