Hadoop笔记

A.hadoop rpc使用

a.RPC 服务器端

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.ipc.VersionedProtocol;

public class TestIPC {
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		  new TestIPC().start();
	}
	public void start(){
		try {
			Configuration conf = new Configuration();
			ServerImpl1 s1 = new ServerImpl1();
			Server server = RPC.getServer(s1, "localhost", 16000, conf);
			server.start();

			ServerImpl1 s2 = new ServerImpl1();
			Server server2 = RPC.getServer(s2, "localhost", 16008, conf);
			server2.start();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	//定义接口
	public interface Serverif1 extends VersionedProtocol{
	    public String mymethod(String args);
	}
	//服务器端的实现
	public static class ServerImpl1 implements Serverif1{
	    //业务逻辑的实现
	    public String mymethod(String args){
	        return "hello,"+args+" from Serverif1";
	    }

		@Override
		public long getProtocolVersion(String protocol, long clientVersion)
				throws IOException {
			// TODO Auto-generated method stub
			return 123456l;
		}
	}
}

b.RPC 客户端

import java.lang.reflect.Method;
import java.net.InetSocketAddress;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ipc.RPC;

public class TestIPCClient {
	 private Configuration conf = new Configuration();
     
     public String method_proxy(String args) throws Exception{
        InetSocketAddress sa=new InetSocketAddress("localhost",16000);
        TestIPC.Serverif1 si=(TestIPC.Serverif1) RPC.getProxy(TestIPC.Serverif1.class,123456l,sa, conf);
        return si.mymethod(args);
     }
     
     public Object[] method_call(String args) throws Exception{
    	 InetSocketAddress[] sas={
    			  new InetSocketAddress("localhost",16000),
    			  new InetSocketAddress("localhost",16008)
    	 };
    	 Method m1 = TestIPC.Serverif1.class.getMethod("mymethod", new Class[] { String.class });
    	 String[][] params = new String[][]{{"param1"},{"param2"}};
    	 return (Object[])RPC.call(m1, params, sas, conf);
     }
 
     public static void main(String args[]) throws Exception {
         String remoteIP="localhost";
         TestIPCClient c=  new TestIPCClient();
         System.out.println(c.method_proxy("YaoLei"));
         Object[] result = c.method_call("Hadoop");
         for(Object rs:result){
        	 System.out.println(rs.toString());
         }
     }
 }


B.hadoop启动与关闭

安装:http://book.douban.com/people/48778472/annotation/4817792/
参考:http://hadoop.apache.org/common/docs/stable/single_node_setup.html

启动与关闭选项包括五个部分:namenode,datanode,seconarynamenode,jobtracker,tasktracker.

Administrator@NB-LEI-YAO /cygdrive/e/d-tools/hadoop-0.21.0
$ bin/start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-mapred.sh
namenode running as process 3452. Stop it first.
localhost: datanode running as process 3056. Stop it first.
localhost: secondarynamenode running as process 4664. Stop it first.
jobtracker running as process 4880. Stop it first.
localhost: tasktracker running as process 2420. Stop it first.

Administrator@NB-LEI-YAO /cygdrive/e/d-tools/hadoop-0.21.0
$ bin/stop-all.sh
This script is Deprecated. Instead use stop-dfs.sh and stop-mapred.sh
stopping namenode
localhost: stopping datanode
localhost: stopping secondarynamenode
stopping jobtracker
localhost: stopping tasktracker

Administrator@NB-LEI-YAO /cygdrive/e/d-tools/hadoop-0.21.0


C.一些命令

1.bin/start-all.sh  --启动
2.mkdir ty_input    --创建目录ty_input
  cp   conf/* ty_input
3.bin/hadoop fs -put ./ty_input ty_input  --把本地文件添加到hdfs中
4.bin/hadoop fs -ls
5.bin/hadoop fs -rmr output*              --删除以output开头的文件夹
6.bin/hadoop jar hadoop*examples*.jar wordcount ty_input ty_output  
  --统计ty_input下文件夹里文件的字数 输出到ty_output文件夹
7.bin/hadoop fs -ls      --可以看到两个ty_input 和 ty_output文件夹
8.bin/hadoop fs -ls ty_output/*    --可以看到ty_output下的文件结构
9.bin/hadoop fs -cat ty_output/*   --可以看到ty_output下的文件内容
10.bin/hadoop fs -put ./ty_input/*.sql ty_input
11.bin/hadoop jar hadoop*examples*.jar wordcount ty_input/misc.sql ty_output2
12.bin/hadoop jar hadoop*examples*jar grep ty_input/misc.sql ty_output3 'iqcar*'
13.bin/hadoop fs -cat ty_output3/*   
14.jps --对应ps命令,java自带工具
   3964 Jps
   3172 Program
   jstat -- java 自带工具
   jstat -gcutil -t 3172 200 10 --即可每200毫秒连续打印10次带有时间戳的GC统计信息
15.bin/hadoop dfsadmin -report
 


 D.阅读笔记

1.RPC
  a.JobClient通过RPC机制调用JobTracker的submitJob方法来提交job.
    Ps.即JobClient端生成JobTracker的代理对象
  b.TaskTracker通过RPC机制调用JobTracker的heartbeat来检测心跳
  c.JobTracker启动时根据mapreduce.jobtracker.address参数的地址来启动一个RPC Server
    TaskTracker回启动一个taskReportServer的IPC
  d.NameNode也会启动一个RPC Server,用来给DataNode提供服务
  f.DataNode会启动一个RPC Server,用来在DataNode之间通信
  g.DataNode会向NameNode发送心跳
  h.DataNode和TaskTracker均实现了Runnable接口
2.命令 whoami 可以查看当前用户
3.数据以数据块形式分布在多个DataNode上,每个DataNode只有一个数据块,一个数据块在多个DataNode上有备份.
4.DataNode之间的数据块复制由DataXceiverServer完成,通过socket读写数据流形式

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值