一些总结:
0. 安装Java和Homebrew
Java安装方式请问度娘。
Homebrew的安装方式:执行如下命令:
ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
1. 配置SSH
为确保在远程管理Hadoop以及Hadoop节点在用户共享时候的安全性,Hadoop需要使用SSH协议。
在Mac上执行:
ssh localhost
如果执行失败,则需要修改一下系统设置:
系统偏好设置 -> 共享 -> 打开远程登录 -> 右侧选择允许所有用户访问。
生成密钥对:
ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
执行该命令后,会在当前用户目录的.ssh文件夹下生成id_rsa文件。然后在该目录下生成authorized_keys。命令如下:
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
2. 安装Hadoop
安装方式为执行以下命令:
brew install hadoop
命令执行完以后,Hadoop会被安装在/usr/local/Cellar/hadoop目录下。
2.1 配置Hadoop
在目录/usr/local/Cellar/hadoop/2.7.2/libexec/etc/hadoop下
1) 修改hadoop-env.sh文件
将其中的
# Extra Java runtime options. Empty by defaulte
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
修改为:
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
2) 修改core-site.xml文件为:
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/Cellar/hadoop/hdfs/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
3)修改mapred-site.xml文件为:(如果没有该文件,就把mapred-site.xml.template文件复制为该文件)
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9010</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
4)修改hdfs-site.xml文件为:
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
运行后台程序之前,需要格式化hdfs。执行命令如下:
hadoop namenode -format
2.2 启动Hadoop
在/usr/local/Cellar/hadoop/2.7.2/sbin目录下,执行如下命令:
#启动Hadoop
./start-dfs.sh
#停止Hadoop
./stop-dfs.sh
启动以后,可以通过http://localhost:50070/ 来访问Hadoop 页面。查看Hadoop。
3. 使用Maven开发Hadoop示例
3.1 创建Maven工程
使用maven-archetype-quickstart创建项目,jar包依赖如下:
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.2</version>
</dependency>
3.2 测试hdfs
import java.io.InputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class Test {
public static void main(String[] args) throws Exception {
String uri = "hdfs://localhost:9000";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI(uri), conf);
FSDataOutputStream os = fs.create(new Path("/user/lxlong/test.log"));
os.write("Hello lxlong".getBytes());
os.flush();
os.close();
InputStream is = fs.open(new Path("/user/lxlong/test.log"));
IOUtils.copyBytes(is, System.out, 1024, true);
FileStatus[] statuses = fs.listStatus(new Path("/user/lxlong"));
for(FileStatus status : statuses) {
System.out.println(status);
}
}
}
3.3 测试MapReduce
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
public static class MyMapper extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text event = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
int idx = value.toString().indexOf(" ");
if(idx > 0) {
String e = value.toString().substring(0, idx);
event.set(e);
context.write(event, one);
}
}
}
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for(IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if(otherArgs.length < 2) {
System.err.println("Usage: WordCount <in> <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "Word Count");
job.setJarByClass(WordCount.class);
job.setMapperClass(MyMapper.class);
job.setCombinerClass(MyReducer.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
1) 在该工程下使用mvn clean package命令生成jar包HadoopTest-0.0.1-SNAPSHOT.jar。
2)通过:
cp HadoopTest-0.0.1-SNAPSHOT.jar /usr/local/Cellar/hadoop/2.7.2/bin
命令将jar包拷贝到Hadoop命令目录下。。(只是为了方便一些而已)
3)复制几个文件到hdfs下:
./hdfs dfs -put /tmp/input /user/lxlong/input
4)执行Hadoop任务:
./hadoop jar HadoopTest-0.0.1-SNAPSHOT.jar com.test.HadoopTest.WordCount /user/lxlong/input /user/lxlong/output
5)在hdfs上查看输出:
hdfs dfs -cat /user/lxlong/output/part-r-00000