目录
🌿Java API操作
1、创建Maven工程
修改pom.xml文件
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
</dependencies>
等待即可
2、操作HDFS
- 创建类
package com.itcast.hdfsdemo;
import java.io.*;
import java.rmi.RemoteException;
import java.util.List;
import java.util.Optional;
import jdk.nashorn.internal.ir.Block;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
import org.junit.*;
public class HDFS_CRUD {
FileSystem fs = null;
@Before
// 初始化客户端对象
public void init() throws Exception{
// 构造一个配置参数对象,设置一个参数:要访问的HDFS的URL
Configuration conf = new Configuration();
// 这里指定使用的是HDFS
conf.set("fs.defaultFS","hdfs://192.168.39.101:9000");
// 通过如下的方式进行客户端身份设置
System.setProperty("HADOOP_USER_NAME", "root");
// 通过FileSystem静态方法获取文件系统客户端对象
fs = FileSystem.get(conf);
}
/*
// 上传文件到HDFS
@Test
public void testAddFileToHdfs() throws IOException{
// 要上传的文件所在本地路径
Path src = new Path("F:/student.txt");
// 要上传到HDFS的路径
Path dst = new Path("/testFile");
// 上传文件方法
fs.copyFromLocalFile(src, dst);
// 关闭资源
fs.close();
}
*/
/*
// 从HDFS下载文件到本地
@Test
public void testDownloadFileToLocal () throws IllegalArgumentException, IOException{
// 下载文件
fs.copyToLocalFile(false, new Path("/testFile"), new Path("F:/"), true);
fs.close();
}
*/
/*
// 目录操作
@Test
public void testMkdirAndDeleteAndRename() throws Exception{
// 创建目录
// fs.mkdirs(new Path("/a/b/c"));
// fs.mkdirs(new Path("/a2/b2/c2"));
// 重命名文件或文件夹
// fs.rename(new Path("/a"), new Path("/a3"));
// 删除文件夹,如果是非空文件夹,参数2必须给值true
fs.delete(new Path("/a3"), true);
}
*/
// 查看目录中的文件信息
@Test
public void tesfListFiles() throws FileNotFoundException, IllegalArgumentException, IOException{
// 获取迭代器对象
RemoteIterator<LocatedFileStatus>listFiles = fs.listFiles(new Path("/"), true);
while (listFiles.hasNext()){
LocatedFileStatus fileStatus = listFiles.next();
// 打印当前文件名
System.out.println("文件名" + fileStatus.getPath().getName());
// 打印当前文件块大小
System.out.println("文件块大小" + fileStatus.getBlockSize());
// 打印当前文件权限
System.out.println("文件权限" + fileStatus.getPermission());
// 打印当前文件内容长度
System.out.println("文件内容长度" + fileStatus.getLen());
// 获取该文件块信息(包含长度,数据块,DataNode的信息)
BlockLocation[] blockLocations = fileStatus.getBlockLocations();
for (BlockLocation b1:blockLocations){
System.out.println("文件块长度:" + b1.getLength() + "--" + "数据块:" + b1.getOffset());
String[] hosts = b1.getHosts();
for (String host:hosts){
System.out.println(host);
}
}System.out.println("----------分割线----------");
}
}
}
- 效果
🌿 MapReduce
参考链接
分布式
-
WordCountReducer
package MapReduce; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class WordCountreducer extends Reducer<Text, IntWritable, Text, IntWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> value, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException{ // 定义一个计数器 int count = 0; // 遍历一组迭代器,把每一个数量1累加起来就构成了单词的总次数 for (IntWritable iw:value){ count += iw.get(); }context.write(key, new IntWritable(count)); } }
-
WordCountMapper
package MapReduce; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { // Mapper组件 @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException{ // 接收传入进来的一行文本,把数据类型转换为String类型 String line = value.toString(); // 将这行内容按照分隔符切割 String[] words = line.split(" "); // 遍历数组,每出现一个单词就标记一个数组1例如:<单词,1> for (String word:words){ // 使用context,把Map阶段处理的数据发送给Reduce阶段作为输入数据 context.write(new Text(word), new IntWritable(1)); } } }
-
WordCountCombiner
package MapReduce; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class WordCountCombiner extends Reducer<Text, IntWritable, Text, IntWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable>values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException{ // 局部汇总 int count = 0; for (IntWritable v:values){ count += v.get(); } context.write(key, new IntWritable(count)); } }
-
OutputFormat
package MapReduce; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.jute.RecordWriter; import java.io.IOException; public abstract class OutputFormat<k, v> { // public abstract RecordWriter<k, v> getRecordWrite(TaskAttemptContext context) throws IOException, InterruptedException; public abstract void checkOutputSpecs(JobContext context) throws IOException, InterruptedException; public abstract OutputCommitter getOutputCommiter(TaskAttemptContext context) throws IOException, InterruptedException; }
-
WordCountDriver
package MapReduce; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.log4j.BasicConfigurator; public class WordCountDriver { public static void main(String[] args) throws Exception{ BasicConfigurator.configure(); // 通过Job来封装本次MR的相关信息 Configuration conf = new Configuration(); // 配置MR运行模式,使用local表示本地模式,可省略 // conf.set("mapreduce.framework.name", "local"); Job wcjob = Job.getInstance(conf); // 指定MR Job jar包 wcjob.setJarByClass(WordCountDriver.class); // 指定本次MR Job所有的Mapper Reducer类 wcjob.setMapperClass(WordCountMapper.class); wcjob.setReducerClass(WordCountReducer.class); // 设置业务逻辑Mapper类的输出Key和value的数据类型 wcjob.setMapOutputKeyClass(Text.class); wcjob.setMapOutputValueClass(IntWritable.class); // 设置业务逻辑Reduce类的输出Key和value的数据类型 wcjob.setOutputKeyClass(Text.class); wcjob.setOutputValueClass(IntWritable.class); // 使用本地模式指定要处理的数据所在位置 FileInputFormat.setInputPaths(wcjob, "D:/Temporal/Test"); // 使用本地模式指定处理完成之后的结果所保存的位置 FileOutputFormat.setOutputPath(wcjob,new Path("D:/Temporal/output")); // 提交程序并监控打印程序执行情况 boolean res = wcjob.waitForCompletion(true); System.exit(res ? 0 : 1); } }
倒序排序
-
InvertedIndexMapper
package cn.itcast.mr.invertedIndex; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.util.StringUtils; import java.io.IOException; public class InvertedIndexMapper extends Mapper<LongWritable, Text, Text, Text> { private static Text keyInfo = new Text(); // 存储单词和文档名称 // 存储词频,初始化为1 private static final Text valueInfo = new Text("1"); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{ String line = value.toString(); String[] fields = StringUtils.split(line, ' '); // 得到这行数据所在的文件切片 FileSplit fileSplit = (FileSplit) context.getInputSplit(); // 根据文件切片得到文件名 String fileName = fileSplit.getPath().getName(); for (String field:fields){ // key值由单词和文档名称组成,如“MapReduce:file1.txt” keyInfo.set(field + ":" + fileName); context.write(keyInfo, valueInfo); } } }
-
InvertedIndexCombiner
package cn.itcast.mr.invertedIndex; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class InvertedIndexCombiner extends Reducer<Text, Text, Text, Text> { private static Text info = new Text(); // 输入:<MapReduce:file3.txt{1,1,…}> // 输出:<MapReduce file3.txt:2> @Override protected void reduce(Text key, Iterable<Text>values, Context context) throws IOException, InterruptedException{ int sum = 0; // 统计词频 for (Text value:values){ sum += Integer.parseInt(value.toString()); } int splitIndex = key.toString().indexOf(":"); // 重新设置value值由文档名称和词频组成 info.set(key.toString().substring(splitIndex + 1) + ":" + sum); // 重新设置key值为单词 key.set(key.toString().substring(0, splitIndex)); context.write(key, info); } }
-
InvertedIndexReducer
package cn.itcast.mr.invertedIndex; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class InvertedIndexReducer extends Reducer<Text, Text, Text, Text>{ private static Text result = new Text(); // 输入:<MapReduce file3.txt:2> // 输出:<MapReduce file1.txt;file2.txt,file3.txt:2> @Override protected void reduce(Text key, Iterable<Text>values, Context context) throws IOException, InterruptedException{ // 生成文档列表 String fileList = new String(); for (Text value:values){ fileList += value.toString() + ":"; } result.set(fileList); context.write(key, result); } }
-
InvertedIndexDriver
package cn.itcast.mr.invertedIndex; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class InvertedIndexDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{ Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(InvertedIndexDriver.class); job.setMapperClass(InvertedIndexMapper.class); job.setCombinerClass(InvertedIndexCombiner.class); job.setReducerClass(InvertedIndexReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path("D:/Temporal/InvertedIndex/input/*")); // 指定处理完成之后的结果所保存的位置 FileOutputFormat.setOutputPath(job, new Path("D:/Temporal/InvertedIndex/output")); // 向YARN集群提交这个job boolean res = job.waitForCompletion(true); System.exit(res ? 0 : 1); } }
数据去重
-
DedupMapper
package cn.itcast.mr.dedup; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> { private static Text field = new Text(); // <0, 2018-3-1 a><11, 2018-3-2 b> @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{ field = value; // NullWritable.get() 方法设置空值 context.write(field, NullWritable.get()); } }
-
DedupReducer
package cn.itcast.mr.dedup; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> { // <2018-3-1 a, null><2018-3-2 b, null><2018-3-3 c, null> @Override protected void reduce(Text key, Iterable<NullWritable>values, Context context) throws IOException, InterruptedException{ context.write(key, NullWritable.get()); } }
-
DedupDriver
package cn.itcast.mr.dedup; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import javax.xml.soap.Text; import java.io.IOException; public class DedupDriver { public static void main(String[] args) throws IOException, IOException, ClassNotFoundException, InterruptedException{ Configuration conf = new org.apache.hadoop.conf.Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(DedupDriver.class); job.setMapperClass(DedupMapper.class); job.setReducerClass(DedupReducer.class); job.setOutputKeyClass(Text.class); job.setOutputKeyClass(NullWritable.class); // 指定处理完成之后的结果所保存的位置 FileInputFormat.setInputPaths(job, new Path("")); job.waitForCompletion(true); } }
TopN
-
TopNMapper
package cn.itcast.mr.topN; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.util.TreeMap; public class TopNMapper extends Mapper<LongWritable, Text, NullWritable, IntWritable> { private TreeMap<Integer, String> repToRecordMap = new TreeMap<Integer, String>(); @Override public void map(LongWritable key, Text value, Context context){ String line = value.toString(); String[] nums = line.split(" "); for(String num: nums){ // 读取每行数据写入TreeMap,超过5个就会移除最小的数值 repToRecordMap.put(Integer.parseInt(num), " "); if (repToRecordMap.size() > 5){ repToRecordMap.remove(repToRecordMap.firstKey()); } } } // 重写cleanup()方法,读取完所有文件行数据后,再输出到Reduce阶段 @Override protected void cleanup(Context context){ for (Integer i : repToRecordMap.keySet()){ try{ context.write(NullWritable.get(), new IntWritable(i)); }catch (Exception e){ e.printStackTrace(); } } } }
-
TopNReducer
package cn.itcast.mr.topN; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; import java.util.Comparator; import java.util.TreeMap; public class TopNReducer extends Reducer<NullWritable, IntWritable, NullWritable, IntWritable> { // 创建TreeMap,并实现自定义倒序排序规则 private TreeMap<Integer, String> repToRecordMap = new TreeMap<Integer, String>(new Comparator<Integer>(){ // int compare(object o1, object o2)返回一个基本类型的整形,谁大谁排再后面 // 返回复数表示:o1小于o2 // 返回0表示:o1和o2相等 // 返回正数表示:o1和o2 @Override public int compare(Integer a, Integer b) { return b - a; } } ); public void recude(NullWritable key, Iterable<IntWritable>values, Context context) throws IOException, InterruptedException{ for (IntWritable value: values) { repToRecordMap.put(value.get(), " "); if (repToRecordMap.size() > 5) { repToRecordMap.remove(repToRecordMap.firstKey()); } }for (Integer i : repToRecordMap.keySet()){ context.write(NullWritable.get(), new IntWritable(i)); } } }
-
TopNDriver
package cn.itcast.mr.topN; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class TopNDriver { public static void main(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(TopNDriver.class); job.setMapperClass(TopNMapper.class); job.setReducerClass(TopNReducer.class); job.setNumReduceTasks(1); // map阶段输出的key job.setMapOutputKeyClass(NullWritable.class); // map阶段输出的value job.setMapOutputValueClass(IntWritable.class); // reduce阶段输出的key job.setOutputKeyClass(NullWritable.class); // reduce阶段输出的value job.setOutputValueClass(IntWritable.class); // 文件路径 FileInputFormat.setInputPaths(job, new Path("")); // 输出路径 FileOutputFormat.setOutputPath(job, new Path("")); boolean res = job.waitForCompletion(true); System.exit(res ? 0:1); } }
-
测试文件
🌿 Zookeeper分布式
部署
1、解压安装包
mkdir -p /export/servers
tar -zxvf zookeeper-3.4.10.tar.gz -C /export/servers
2、修改配置文件
cd /export/servers/zookeeper-3.4.10/conf/
cp zook_sample.cfg zoo.cfg
vi zoo.cfg
# 将一下内容添加进文件
# 设置数据文件目录+数据持久化路径
dataDir=/export/data/zookeeper/zkdata
# 配置服务器编号与主机名映射关系,设置与主机连接的心跳端口和选举端口。
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888
### 3、保存退出
```bash
mkdir -p /export/data/zookeeper/zkdata
cd /export/data/zookeeper/zkdata/
echo 1 > myid
4、配置环境变量
vi /etc/profile
export ZK_HOME=/export/servers/zookeeper-3.4.10
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$ZK_HOME/bin
5、分发
mkdir -p /export/servers/
6、
在二号、三号机修改myid内容
cd /export/data/zkdata
vi myid
三台机操作,生效环境变量
source /etc/profile
在主机上操作
scp -r /export/servers/zookeeper-3.4.10/ slave1:/export/servers/
scp -r /export/servers/zookeeper-3.4.10/ slave2:/export/servers/
scp -r /export/data/zookeeper/ slave1:/export/data/
scp -r /export/data/zookeeper/ slave2:/export/data/
scp /etc/profile slave1:/etc/profile
scp /etc/profile slave2:/etc/profile
7、全部启动zookeeper
zkServer.sh start
# 全部启动完后,查看状态
zkServer.sh status
-
Shell操作
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-8G0OV2Ix-1671331237550)(Hadoop%20dddabd4960874f9b8be9135ee7a3c4a8/Untitled%202.jpeg)]
Java API操作
依赖
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.10</version>
</dependency>
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.itcast</groupId>
<artifactId>HadoopDemo</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.2</version> <!--2.7.2是gu虚拟机的版本号-->
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.10</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
</dependencies>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
</project>
-
ZookeeperTest
package cn.itcast.zookeeper; import org.apache.zookeeper.*; public class ZookeeperTest { public static void main(String[] args) throws Exception{ // 步骤一: 创建Zookeeper客户端 // 参数1: zk地址; 参数2: 会话超时时间(与系统默认一致); 参数3: 监视器 ZooKeeper zk = new ZooKeeper("192.168.39.153:2181,192.168.39.154:2181,192.168.39.155:2181", 30000, new Watcher() { @Override public void process(WatchedEvent event) { // 监视所有被触发的事件(也就是再这里进行事件的处理) System.out.println("事件类型为:" + event.getType()); System.out.println("事件发生的路径:" + event.getPath()); System.out.println("通知状态为:" + event.getState()); } }); // 步骤二: 创建目录节点 // 参数1: 要创建的节点的路径; 参数2: 节点数据; 参数3: 节点权限; 参数4: 节点类型 zk.create("/testRootPath", "testRootData".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); // 步骤三: 创建子目录节点 zk.create("/testRootPath/testChildPathOne", "testChildDataOne".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); // 步骤四: 获取目录节点数据 // 参数1: 存储节点数据的路径 // 参数2: 是否需要监控此节点(true / flase) // 参数3: stat 节点的统计信息(一般为null) System.out.println("testRootData节点数据为:" + new String(zk.getData("/testRootPath", false, null))); // 步骤五: 获取子目录节点数据 System.out.println(zk.getChildren("/testRootPath", true)); // 步骤六: 修改子目录节点数据, 使得监听触发 // 参数1: 存储子目录节点数据的路径 // 参数2: 要修改的数据 // 参数3: 语气要匹配的版本(设置为 -1, 则匹配任何节点的版本) zk.setData("/testRootPath/testChildPathOne", "modifyChildDataOne".getBytes(), -1); // 步骤七: 判断目录节点是否存在 System.out.println("目录节点状态:[" + zk.exists("/testRootPath", true) + "]"); // 步骤八: 删除子节点 zk.delete("/testRootPath/testChildPathOne", -1); // 步骤九: 删除目录节点 zk.delete("/testRootPath", -1); zk.close(); } }
-
结果
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-Lws7WbtE-1671331237550)(Hadoop%20dddabd4960874f9b8be9135ee7a3c4a8/Untitled%2031.png)]
🌿Hive
安装MySQL
1、虚拟机配置
cd /etc/yum.repos.d
2、重命名
mv CentOS-Base.repo CentOS-Base.repo.bak
mv CentOS-Debuginfo.repo CentOS-Debuginfo.repo.bak
mv CentOS-fasttrack.repo CentOS-fasttrack.repo.bak
mv CentOS-Vault.repo CentOS-Vault.repo.bak
vi CentOS-Media.repo
# 将文件内容
[c6-media]
name=CentOS-$releasever - Media
baseurl=file:///media/
gpgcheck=0
enabled=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
# 更改为:
[c6-media]
name=CentOS-$releasever - Media
baseurl=file:///media/
gpgcheck=0
enabled=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
3、挂载
cd /media
mount /dev/dvd /media
# 更新源
yum clean all
# 使用yum安装软件
yum install -y vim zip openssh-server openssh-clients
-
完成样例
4、安装
rpm -qa |grep mysql
# 如果输入完这个后有显示相关MySQL内容,就将它删除:
# rpm -e mysql-libs-5.1.73-8.el6_8.x86_64 --nodeps
yum install mysql mysql-server mysql-devel -y
启动MySQL服务
/etc/init.d/mysqld start
# 登陆MySQL
mysql
- 完成样例
成功进入
5、修改密码
USE mysql;
UPDATE user SET Password=PASSWORD('123456') WHERE user='root';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' -> IDENTIFIED BY '123456' WITH GRANT OPTION;
FLUSH PRIVILEGES;
安装Hive
1、解压
tar -zxvf apache-hive-1.2.1-bin.tar.gz -C /export/servers/
2、配置
1、
cd /export/servers/apache-hive-1.2.1-bin/conf
cp hive-env.sh.template hive-env.sh
#修改环境变量:
vim hive-env.sh
#修改为:
export HADOOP_HOME=/usr/local/hadoop-2.7.3
2、修改文件hive-site.xml
vi hive-site.xml
<-- 将以下内容加入 -->
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value>
<description>Mysql连接协议</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>JDBC连接驱动</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>用户名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>密码</description>
</property>
</configuration>
3、把下载好的mysql-connector-java-5.1.32.jar,上传到Hive安装目录下的lib文件夹下
vim /etc/profile
export HIVE_HOME=/export/servers/apache-hive-1.2.1-bin
export PATH=$PATH:$HIVE_HOME/bin
source /etc/profile
# 任意目录下运行hive测试
hive
4、查看MySQL数据库
mysql -uroot -p
show databases
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-2CNqpX5d-1671331237552)(Hadoop%20dddabd4960874f9b8be9135ee7a3c4a8/Untitled%2040.png)]
操作Hive
二号机操作
bin/beeline
!connect jdbc:hive2://master:10000
安装Flume
mkdir -p /export/servers/flume
tar zxvf /opt/file/apache-flume-1.8.0-bin.tar.gz
mv apache-flume-1.8.0-bin/ /export/servers/flume
1、配置
cd /export/servers/flume/conf
cp flume-env.sh.template flume-env.sh
vim flume-env.sh
# 将22行内容修改为
export JAVA_HOME=/usr/java/jdk1.8.0_261-amd64
- 图例
2、环境变量
vim /etc/profile
export FLUME_HOME=/export/servers/flume
source /etc/profile
将netcat-logger.conf传输到虚拟机 /export/export/servers/flume/conf 目录下
3、启动Flume
cd /export/servers/flume/bin
flume-ng agent --conf conf/ --conf-file /export/servers/flume/conf/netcat-logger.conf --name a1 -Dflume.root.logger=INFO,consol
- 图例
4、Flume 采集数据测试
安装telnet
yum install telnet
# 或者
yum install nc.x86_64
- 图例
测试代码
telnet localhost 44444
# 或者
nc localhost 44444