大数据学习记录

最新推荐文章于 2023-07-23 14:27:39 发布

置顶圆内~搁浅

最新推荐文章于 2023-07-23 14:27:39 发布

阅读量924

点赞数 1

文章标签： hadoop

本文链接：https://blog.csdn.net/qq_45626867/article/details/113446142

版权

2021.01.28

SSH免密服务


 1. 生成ssh公钥和私钥 ssh-keygen -t rsa
 2. 把集群里面的公钥发到node1中 ssh-copy-id node1
 3. 集群node1向每台机子都执行 scp /root/.ssh/公钥名称 node02:/root/.ssh

2.时钟同步

安装ntp服务

 1. yum install ntp -y
 2. 启动定时服务 crontab -e
 3. 写入内容 */1 * * * * /usr/sbin.ntpdate ntp4.aliyun,com;

安装mysql服务

参考链接：[https://www.jianshu.com/p/276d59cbc529](https://www.jianshu.com/p/276d59cbc529)

4.zookeeper集群搭建

1.在/root新建zookeeper文件 vim zookeeper

2.进入zookeeper文件 cd zookeeper

3.下载zookeeper wget http://archive.apache.org/dist/zookeeper/zookeeper-
3.4.9/zookeeper-3.4.9.tar.gz
网站：http://archive.apache.org/dist/zookeeper/zookeeper-3.4.9/

4.解压文件 tar zxvf zookeeper-3.4.9

5.切换文件夹 cd zookeeper-3.4.9/conf

6.cp zoo_sample.cfg zoo.cfg

7.编辑文件 
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just 
# example sakes.
dataDir=/root/zookeeper/zookeeper-3.4.9/zkdatas
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the 
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
autopurge.purgeInterval=1


server.1=node1:2888:3888
server.2=node2:2888:3888
server.3=node3:2888:3888

8.切换到zsdata文件，新建文件 vim myid 写入1

9.配置环境变量
vim /etc/profle
#set zookeeper environment
export ZK_HOME=/root/zookeeper/zookeeper-3.4.9
export PATH=$PATH:$ZK_HOME/bin
　　然后通过如下命令使得环境变量生效：

source /etc/profle
10.　启动命令：

zkServer.sh start
　　停止命令：

zkServer.sh stop
　　重启命令：

zkServer.sh restart
　　查看集群节点状态：

zkServer.sh status

11.复制到各个服务器
scp /root/zookeeper/zookeeper-3.4.9/ node2:/root/zookeeper/
scp /root/zookeeper/zookeeper-3.4.9/ node3:/root/zookeeper/

12.修改各个服务器的myid文件
启动服务即可

2021.02.01

5.zookeeper命令的使用

进入命令行模式：bin/zkCli.sh -server node1:2181

创建znode：create 路径 数据
		-e临时节点
		-s永久序列化节点
		
删除: rmr 路径

获取节点数据：get 路径

修改节点： set 路径 数据

watch机制
get 路径 watch

zookeeperAPI的使用

maven配置
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>bigdata</groupId>
    <artifactId>bigdata</artifactId>
    <version>1.0-SNAPSHOT</version>
<dependencies>
    <dependency>
        <groupId>org.apache.curator</groupId>
        <artifactId>curator-framework</artifactId>
        <version>2.12.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.curator</groupId>
        <artifactId>curator-recipes</artifactId>
        <version>2.12.0</version>
    </dependency>
    <dependency>
        <groupId>com.google.collections</groupId>
        <artifactId>google-collections</artifactId>
        <version>1.0</version>
    </dependency>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>RELEASE</version>
    </dependency>
    <dependency>
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-simple</artifactId>
        <version>1.7.25</version>
    </dependency>

</dependencies>

</project>


API代码

package zookeeper;

import org.apache.curator.RetryPolicy;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.framework.recipes.cache.ChildData;
import org.apache.curator.framework.recipes.cache.TreeCache;
import org.apache.curator.framework.recipes.cache.TreeCacheEvent;
import org.apache.curator.framework.recipes.cache.TreeCacheListener;
import org.apache.curator.retry.ExponentialBackoffRetry;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.data.Stat;
import org.junit.Test;

public class znodecreate {


    @Test
    public void createznode() throws Exception {
        System.out.print("111");
//        重新连接时间,次数
        RetryPolicy retryPolicy=new ExponentialBackoffRetry(1000,1);
        String ip="192.168.112.128:2181,192.168.112.129:2181,192.168.112.130:2181";
        String ips="8.135.44.130:2181";
        CuratorFramework curatorFramework = CuratorFrameworkFactory.newClient(ip, 8000, 8000, retryPolicy);

        curatorFramework.start();
//        curatorFramework.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath("/hello/a","word".getBytes());
        curatorFramework.setData().forPath("/hello","hello".getBytes());
        byte[] stat = curatorFramework.getData().forPath("/hello");
        System.out.print(new String(stat));
        curatorFramework.close();

    }

    @Test
    public void watchznode() throws Exception {
        RetryPolicy retryPolicy=new ExponentialBackoffRetry(1000,1);
        String ip="192.168.112.128:2181,192.168.112.129:2181,192.168.112.130:2181";
        String ips="8.135.44.130:2181";
        CuratorFramework curatorFramework = CuratorFrameworkFactory.newClient(ip, 8000, 8000, retryPolicy);

        curatorFramework.start();
        final TreeCache treeCache=new TreeCache(curatorFramework,"/hello");

//        自定义监听器
        treeCache.getListenable().addListener(new TreeCacheListener() {
            public void childEvent(CuratorFramework curatorFramework, TreeCacheEvent treeCacheEvent) throws Exception {
                ChildData data=treeCacheEvent.getData();
                switch (treeCacheEvent.getType()){
                    case NODE_ADDED:
                        System.out.println("监听到有新节点!");
                        break;
                    case NODE_REMOVED:
                        System.out.println("监听到有节点移除!");
                        break;
                    case NODE_UPDATED:
                        System.out.println("监听到有节点更新!");
                        break;
                    default:
                        break;



                }
            }
        });
        treeCache.start();
        Thread.sleep(1000000);

    }
}

2012.02.02

HDFSAPI连接配置

1:在Windows下配置Hadoop的运行环境
 第一步：将hadoop2.7.5文件夹拷贝到一个没有中文没有空格的路径下面
 第二步：在windows上面配置hadoop的环境变量： HADOOP_HOME，并将%HADOOP_HOME%\bin添加到path中
 第三步：把hadoop2.7.5文件夹中bin目录下的hadoop.dll文件放到系统盘:C:\Windows\System32 目录
 第四步：关闭windows重启


idea导入依赖


<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>cn.itcast</groupId>
    <artifactId>day04_hdfs_api_demo</artifactId>
    <version>1.0-SNAPSHOT</version>
    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>RELEASE</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                    <!--    <verbal>true</verbal>-->
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.4.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <minimizeJar>true</minimizeJar>
                        </configuration>
                    </execution>
                </executions>
            </plugin>

        </plugins>
    </build>
    
</project>

API代码

package hdfs.url.wjxt;


import org.apache.commons.httpclient.URIException;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.Test;

import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

public class File {
    FileSystem fileSystem=FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());

    public File() throws IOException, URISyntaxException {
    }

    @Test
    public void getFileSystem1() throws IOException {

        Configuration configuration = new Configuration();
        configuration.set("fs.defaultFS","hdfs://8.135.44.130:9000");
//        configuration.set("fs.defaultFS","hdfs://192.168.112.128:8020");
        FileSystem fileSystem = FileSystem.get(configuration);
        System.out.print(fileSystem);


    }
    @Test
    public void listFile() throws IOException, URISyntaxException {
        RemoteIterator<LocatedFileStatus> iterator=fileSystem.listFiles(new Path("/"),true);
        while (iterator.hasNext()){
            LocatedFileStatus fileStatus=iterator.next();
            System.out.println(fileStatus.getPath()+"----"+fileStatus.getPath().getName());
            BlockLocation[] blockLocations=fileStatus.getBlockLocations();
            System.out.println(blockLocations.length);
            System.out.println(blockLocations.hashCode());
        }
        fileSystem.close();
    }
    @Test
    public void mkdieFile() throws IOException {
        boolean mkdirs = fileSystem.mkdirs(new Path("/mp4"));
//        if (mkdirs){
//            fileSystem.create(new Path("/aaa/aa.txt"));
//        }
        System.out.println(mkdirs);
        fileSystem.close();
    }
    @Test
    public void dowloadFile() throws IOException {
        FSDataInputStream inputStream = fileSystem.open(new Path("/aaa/aa.txt"));
        FileOutputStream fileOutputStream = new FileOutputStream("F://a.txt");
        IOUtils.copy(inputStream,fileOutputStream);
        IOUtils.closeQuietly(fileOutputStream);
        IOUtils.closeQuietly(inputStream);
        fileOutputStream.close();

    }
    @Test
    public void dowloadFile1() throws IOException {
        fileSystem.copyToLocalFile(new Path("/aaa/aa.txt"),new Path("F://b.txt"));
        fileSystem.close();
    }
    @Test
    public void uploadFile() throws IOException {
        fileSystem.copyFromLocalFile(new Path("C:\\Users\\asus\\Music\\囚鸟.mp3"),new Path("/mp4/"));
        fileSystem.close();

    }
    //小文件合并后上传
    @Test
    public  void biguploadFile(){



    }
}

1-HDFS的高可用机制

2021.02.04

wordcount案例代码

jobmain类
package wordcount;

import jdk.nashorn.internal.scripts.JO;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.net.URI;

public class jobmain extends Configured implements Tool {


    @Override
    public int run(String[] args) throws Exception{
                Job job= Job.getInstance(super.getConf(),"wordcount");

                job.setJarByClass(jobmain.class);
                job.setInputFormatClass(TextInputFormat.class);
                TextInputFormat.addInputPath(job,new Path("hdfs://node1:8020/wordcount"));
//        TextInputFormat.addInputPath(job, new Path("file:///E:\\mapreduce\\input"));

        //第二步:指定Map阶段的处理方式和数据类型
        job.setMapperClass(map.class);
        //设置Map阶段K2的类型
        job.setMapOutputKeyClass(Text.class);
        //设置Map阶段V2的类型
        job.setMapOutputValueClass(LongWritable.class);


        //第三，四，五，六 采用默认的方式

        //第七步：指定Reduce阶段的处理方式和数据类型
        job.setReducerClass(reduce.class);
        //设置K3的类型
        job.setOutputKeyClass(Text.class);
        //设置V3的类型
        job.setOutputValueClass(LongWritable.class);

        //第八步: 设置输出类型
        job.setOutputFormatClass(TextOutputFormat.class);
        //设置输出的路径

                job.setOutputFormatClass(TextOutputFormat.class);
                Path path = new Path("hdfs://node1:8020/wordcount_out");
//        TextOutputFormat.setOutputPath(job, new Path("file:///E:\\mapreduce\\output"));
                TextOutputFormat.setOutputPath(job,path);
                FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"), new Configuration());
//                判断目录是否存在
                boolean bl2 = fileSystem.exists(path);
                if(bl2){
                    //删除目标目录
                    fileSystem.delete(path, true);
                        }
        boolean bl = job.waitForCompletion(true);

        return bl ? 0:1;
    }



    public static void main(String[] args)throws Exception{
        Configuration configuration = new Configuration();

        //启动job任务
        int run = ToolRunner.run(configuration, new jobmain(), args);
        System.exit(run);


    }
}



map类
package wordcount;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class map extends Mapper<LongWritable,Text,Text,LongWritable>{

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        Text text = new Text();
        LongWritable longWritable = new LongWritable();
        //1:将一行的文本数据进行拆分
        String[] split = value.toString().split(",");
        //2:遍历数组，组装 K2 和 V2
        for (String word : split) {
            //3:将K2和V2写入上下文
            text.set(word);
            longWritable.set(1);
            if (text.toString().equals("word")){
                context.write(text, longWritable);
            }
        }

    }
}

reduce类
package wordcount;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class reduce extends Reducer<Text,LongWritable,Text,LongWritable>{
    long a=0;
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long count = 0;
        //1:遍历集合，将集合中的数字相加，得到 V3
        if(key.toString().equals("word")){
            for (LongWritable value : values) {
                count += value.get();
                a+=1;
            }
        }

        //2:将K3和V3写入上下文中
        System.out.println(key.toString().equals("word"));
        context.write(key, new LongWritable(count));
    }
}

2021.02.05

shuffle分区partitioner代码

map类

package pratitioner;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


public class map extends Mapper<LongWritable,Text,Text,NullWritable>{


    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        context.write(value, NullWritable.get());
    }
}

reduce类
package pratitioner;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;


public class reduce extends Reducer<Text,NullWritable,Text,NullWritable> {


    @Override
    protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
        context.write(key,NullWritable.get());
    }
}

partitioner类

package pratitioner;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class mypratitoner   extends Partitioner<Text,NullWritable> {
    @Override
    public int getPartition(Text text, NullWritable nullWritable, int i) {
        String[] split = text.toString().split(",");
        String s=split[2];
        System.out.println(s);
        if(Integer.valueOf(s)>10){
            return 1;
        }else {
            return 0;
        }

    }
}

job类

package pratitioner;

import hdfs.wjxt.File;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.net.URI;

public class job extends Configured implements Tool{
    @Override
    public int run(String[] strings) throws Exception {
        Job job= Job.getInstance(super.getConf(),"partitioner");
        job.setJarByClass(job.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://node1:8020/input"));

        job.setMapperClass(map.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setPartitionerClass(mypratitoner.class);


        job.setReducerClass(reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        job.setNumReduceTasks(2);

        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("hdfs://node1:8020/out/partition"));

        FileSystem fileSystem= FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());
        boolean file = fileSystem.exists(new Path("hdfs://node1:8020/out/partition"));
        if (file){
            fileSystem.delete(new Path("hdfs://node1:8020/out/partition"),true);
        }

        boolean b = job.waitForCompletion(true);



        return b ? 0:1;
    }
    public static void main(String[] args) throws Exception {
        Configuration configuration=new Configuration();
        int run = ToolRunner.run(configuration, new job(), args);
        System.exit(run);
    }
}

配置log4j日志

# Configure logging for testing: optionally with log file

#log4j.rootLogger=debug,appender
log4j.rootLogger=info,appender  
#log4j.rootLogger=error,appender

#\u8F93\u51FA\u5230\u63A7\u5236\u53F0
log4j.appender.appender=org.apache.log4j.ConsoleAppender  
#\u6837\u5F0F\u4E3ATTCCLayout
log4j.appender.appender.layout=org.apache.log4j.TTCCLayout

案例

20210210

流量排序

map代码
package fllow;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class folwmapp   extends Mapper<LongWritable,Text,Text,FlowBean> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        FlowBean flowBean = new FlowBean();
        String[] split = value.toString().split("\t");
        String phonenumber=split[1];
        flowBean.setUpFlow(Integer.valueOf(split[6]));
        flowBean.setDownFlow(Integer.valueOf(split[7]));
        flowBean.setUpcountFlow(Integer.valueOf(split[8]));
        flowBean.setDownCountFlow(Integer.valueOf(split[9]));
        context.write(new Text(phonenumber),flowBean);


    }


}

reduce代码
package fllow;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class flowreduce extends Reducer<Text,FlowBean,Text,FlowBean> {

    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
        Integer upFlow=0;
        Integer downFlow=0;
        Integer upcountFlow=0;
        Integer downCountFlow=0;
        for(FlowBean i:values){
            upFlow+=i.getUpFlow();
            downFlow+=i.getDownFlow();
            upcountFlow+=i.getUpcountFlow();
            downCountFlow+=i.getDownCountFlow();


        }
        FlowBean flowBean=new FlowBean();
        flowBean.setUpFlow(upFlow);
        flowBean.setDownFlow(downFlow);
        flowBean.setUpcountFlow(upcountFlow);
        flowBean.setDownCountFlow(downCountFlow);

        context.write(key,flowBean);

    }
}
分区代码
package fllow;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class flowpartition extends Partitioner<Text,FlowBean>{
    @Override
    public int getPartition(Text text, FlowBean flowBean, int i) {
        String phonenumber = text.toString();
        if (phonenumber.startsWith("135")){
            return 0;
        }else if (phonenumber.startsWith("136")){
            return 1;
        }else {
            return 2;
        }

    }
}

排序代码
package fllow;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Serializable;

public class FlowBean implements WritableComparable<FlowBean>{
    private Integer upFlow;
    private Integer downFlow;
    private Integer upcountFlow;
    private Integer downCountFlow;




    public Integer getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(Integer upFlow) {
        this.upFlow = upFlow;
    }

    public Integer getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(Integer downFlow) {
        this.downFlow = downFlow;
    }

    public Integer getUpcountFlow() {
        return upcountFlow;
    }

    public void setUpcountFlow(Integer upcountFlow) {
        this.upcountFlow = upcountFlow;
    }

    public Integer getDownCountFlow() {
        return downCountFlow;
    }

    public void setDownCountFlow(Integer downCountFlow) {
        this.downCountFlow = downCountFlow;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(upFlow);
        dataOutput.writeInt(downFlow);
        dataOutput.writeInt(upcountFlow);
        dataOutput.writeInt(downCountFlow);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.upFlow=dataInput.readInt();
        this.downFlow=dataInput.readInt();
        this.upcountFlow=dataInput.readInt();
        this.downCountFlow=dataInput.readInt();
    }

    @Override
    public String toString() {
        return "    " +
                "   " + upFlow +
                "   " + downFlow +
                "   " + upcountFlow +
                "   " + downCountFlow ;
    }

    @Override
    public int compareTo(FlowBean o) {
        System.out.println("------------------------------"+o);
        return this.upFlow - o.upFlow;
    }
}
job代码
package fllow;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.net.URI;

public class flowjob extends Configured implements Tool{


    @Override
    public int run(String[] strings) throws Exception {
        Job job=Job.getInstance(super.getConf(),"flowjob");
        job.setJarByClass(flowjob.class);


        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://node1:8020/flowinput"));

        job.setMapperClass(folwmapp.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);


        job.setPartitionerClass(flowpartition.class);


        job.setReducerClass(flowreduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);
        job.setNumReduceTasks(3);
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("hdfs://node1:8020/flowoutput/par"));
        FileSystem fileSystem=FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());
        if(fileSystem.exists(new Path("hdfs://node1:8020/flowoutput"))){
            fileSystem.delete(new Path("hdfs://node1:8020/flowoutput"),true);
        }
        boolean b=job.waitForCompletion(true);

        return b ? 1:0;
    }

    public static void main(String[] args) throws Exception {
        Configuration configuration=new Configuration();
        int run = ToolRunner.run(configuration, new flowjob(), args);
        System.exit(run);
    }
}

20210215

join文件连接

map代码
package join;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class joinduce extends Reducer<Text,Text,Text,Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        String coun1="";
        String coun2="";
        for (Text i:values){
            if(i.toString().startsWith("p")){
                coun1=i.toString();
            }else {
                coun2=i.toString();
            }

        }
        System.out.println(coun1);
        System.out.println(coun2);
        context.write(key,new Text(coun1+coun2));



    }
}
reduce代码
package join;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class joinduce extends Reducer<Text,Text,Text,Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        String coun1="";
        String coun2="";
        for (Text i:values){
            if(i.toString().startsWith("p")){
                coun1=i.toString();
            }else {
                coun2=i.toString();
            }

        }
        System.out.println(coun1);
        System.out.println(coun2);
        context.write(key,new Text(coun1+coun2));



    }
}
job代码
package join;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.omg.CORBA.PUBLIC_MEMBER;
import pratitioner.job;
import pratitioner.map;
import pratitioner.mypratitoner;
import pratitioner.reduce;

import java.net.URI;


public class joinjob extends Configured implements Tool {


    @Override
    public int run(String[] strings) throws Exception {
        Job job= Job.getInstance(super.getConf(),"partitioner");
        job.setJarByClass(joinjob.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://node1:8020/joininput"));

        job.setMapperClass(joinmap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(joinduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);


        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("hdfs://node1:8020/joinouput"));

        FileSystem fileSystem= FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());
        boolean file = fileSystem.exists(new Path("hdfs://node1:8020/joinouput"));
        if (file){
            fileSystem.delete(new Path("hdfs://node1:8020/joinouput"),true);
        }

        boolean b = job.waitForCompletion(true);



        return b ? 0:1;

    }
    public static void main(String[] args) throws Exception {
        Configuration configuration=new Configuration();
        int run = ToolRunner.run(configuration, new joinjob(), args);
        System.exit(run);
    }
}

20210216

共同好友的统计


一共用到两个mapreduce
产生中间文件
第一步

```c
map代码
package commonfriend;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class map extends Mapper<LongWritable,Text,Text,Text>{


    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split(":");
        String[] split1 = split[1].toString().split(",");
        for (String i:split1){
            context.write(new Text(i),new Text(split[0]));
        }


    }
}

reduce代码
package commonfriend;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class reduce extends Reducer<Text,Text,Text,Text> {


    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        String coun="";
        for (Text i:values){
            coun+=i.toString()+"-";
        }
        context.write(new Text(coun),key);



    }
}
job代码
package commonfriend;

import join.joinduce;
import join.joinjob;
import join.joinmap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.net.URI;

public class job extends Configured implements Tool {
    @Override
    public int run(String[] strings) throws Exception {
        Job job= Job.getInstance(super.getConf(),"partitioner");
        job.setJarByClass(job.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://node1:8020/common/input1"));

        job.setMapperClass(map.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);


        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("hdfs://node1:8020/common/ouput1"));

        FileSystem fileSystem= FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());
        boolean file = fileSystem.exists(new Path("hdfs://node1:8020/common/ouput1"));
        if (file){
            fileSystem.delete(new Path("hdfs://node1:8020/common/ouput1"),true);
        }

        boolean b = job.waitForCompletion(true);



        return b ? 0:1;

    }
    public static void main(String[] args) throws Exception {
        Configuration configuration=new Configuration();
        int run = ToolRunner.run(configuration, new job(), args);
        System.exit(run);
    }
}

第二步
处理中间文件

```c
map代码
package commonfriend;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.lang.reflect.Array;
import java.util.Arrays;

public class map2 extends Mapper<LongWritable,Text,Text,Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split("\t");
        String[] split1 = split[0].toString().split("-");
        Arrays.sort(split1);
        System.out.println(split1[0]);
        for(int i=0;i<split1.length-1;i++){
            for (int j=1;j<split1.length;j++){
                if (!split1[i].equals(split1[j])){
                    context.write(new Text(split1[i]+"-"+split1[j]),new Text(split[1]));

                }

            }
        }


    }
}

reduce代码
package commonfriend;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class reduce2 extends Reducer<Text,Text,Text,Text> {

    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        String coun="";
        for (Text i:values){
            coun+=i.toString()+"-";
        }
        String s=coun.substring(0,coun.length()-1);
        context.write(key,new Text(s));



    }
}
job代码
package commonfriend;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.net.URI;

public class job2  extends Configured implements Tool {
    @Override
    public int run(String[] strings) throws Exception {
        Job job= Job.getInstance(super.getConf(),"partitioner");
        job.setJarByClass(job2.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://node1:8020/common/ouput1"));

        job.setMapperClass(map2.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(reduce2.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);


        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("hdfs://node1:8020/common/ouput2"));

        FileSystem fileSystem= FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());
        boolean file = fileSystem.exists(new Path("hdfs://node1:8020/common/ouput2"));
        if (file){
            fileSystem.delete(new Path("hdfs://node1:8020/common/ouput2"),true);
        }

        boolean b = job.waitForCompletion(true);



        return b ? 0:1;

    }
    public static void main(String[] args) throws Exception {
        Configuration configuration=new Configuration();
        int run = ToolRunner.run(configuration, new job2(), args);
        System.exit(run);
    }
}

文件合并

重新输入类
package format;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;

public class MyINputFormat extends FileInputFormat<NullWritable,BytesWritable> {
    @Override
    public RecordReader<NullWritable, BytesWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        MyRecordReader myRecordReader=new MyRecordReader();
        myRecordReader.initialize(inputSplit,taskAttemptContext);


        return myRecordReader;
    }

    @Override
    protected boolean isSplitable(JobContext context, Path filename) {

        return false;
    }
}


package format;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class MyRecordReader extends RecordReader<NullWritable,BytesWritable> {
    Configuration configuration=null;
    FileSplit fileSplit=null;
    boolean processec=false;
    BytesWritable byteWritable=new BytesWritable();
    FileSystem fileSystem=null;
    FSDataInputStream fsDataInputStream=null;

    //    用于初始化
    @Override
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            configuration=taskAttemptContext.getConfiguration();
            fileSplit= (FileSplit) inputSplit;

    }
//用于获取k1，v1
    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (!processec){
             fileSystem=FileSystem.get(configuration);
             System.out.println(fileSplit.getPath());
             fsDataInputStream=fileSystem.open(fileSplit.getPath());
            byte[] bytes=new byte[(int) fileSplit.getLength()];
            IOUtils.readFully(fsDataInputStream,bytes,0, (int) fileSplit.getLength());
            byteWritable.set(bytes,0, (int) fileSplit.getLength());
            processec=true;
            return true;
    }
        return false;
    }
//返回k1
    @Override
    public NullWritable getCurrentKey() throws IOException, InterruptedException {
        return NullWritable.get();
    }
//返回v1
    @Override
    public BytesWritable getCurrentValue() throws IOException, InterruptedException {
        return byteWritable;
    }
//    获取文件读取进度
    @Override
    public float getProgress() throws IOException, InterruptedException {
        return 0;
    }

    @Override
    public void close() throws IOException {
        fileSystem.close();
        fsDataInputStream.close();

    }
}
map代码
package format;

import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class map extends Mapper<NullWritable,BytesWritable,Text,BytesWritable>{
    @Override
    protected void map(NullWritable key, BytesWritable value, Context context) throws IOException, InterruptedException {
        FileSplit inputSplit = (FileSplit) context.getInputSplit();
        String name = inputSplit.getPath().getName();
        context.write(new Text(name),value);


    }
}
job代码
package format;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.net.URI;

public class job extends Configured implements Tool {
    @Override
    public int run(String[] strings) throws Exception {
        Job job=Job.getInstance(super.getConf(),"format");
        job.setInputFormatClass(MyINputFormat.class);
        MyINputFormat.addInputPath(job,new Path("file:///C:\\Users\\asus\\Desktop\\jar"));
        job.setMapperClass(map.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(BytesWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(BytesWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        Path path=new Path("hdfs://node1:8020/format/output");
        SequenceFileOutputFormat.setOutputPath(job,path);
        FileSystem fileSystem=FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());
        if(fileSystem.exists(path)){
            fileSystem.delete(path,true);
        }
        boolean b = job.waitForCompletion(true);
        return b ? 0:1;
    }
    public static void main(String[] args) throws Exception {
        Configuration configuration=new Configuration();
        int run = ToolRunner.run(configuration, new job(), args);
        System.out.println(run);
    }
}

20210217

自定义文件输出OutputFormat

map类

package outputFormat;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class map extends Mapper<LongWritable,Text,Text,NullWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        context.write(value,NullWritable.get());



    }
}
MyOutputFormat 类
package outputFormat;


import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MyOutputFormat extends FileOutputFormat<Text,NullWritable> {
    @Override
    public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        FileSystem fileSystem = FileSystem.get(taskAttemptContext.getConfiguration());
        FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("hdfs://node1:8020/output1/output"));
        FSDataOutputStream fsDataOutputStream1 = fileSystem.create(new Path("hdfs://node1:8020/output1/output1"));
        MyRecordWriter myRecordWriter = new MyRecordWriter(fsDataOutputStream, fsDataOutputStream1);



        return myRecordWriter;
    }
}
MyRecordWriter 类

package outputFormat;

import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import java.io.IOException;

public class MyRecordWriter extends RecordWriter<Text,NullWritable> {
    FSDataOutputStream fsDataOutputStream ;
    FSDataOutputStream fsDataOutputStream1;

    public MyRecordWriter(FSDataOutputStream fsDataOutputStream, FSDataOutputStream fsDataOutputStream1) {
        this.fsDataOutputStream = fsDataOutputStream;
        this.fsDataOutputStream1 = fsDataOutputStream1;
    }

    @Override
    public void write(Text text, NullWritable nullWritable) throws IOException, InterruptedException {
        String[] split = text.toString().split("\t");
        int jiu=Integer.valueOf(split[9]);
        if (jiu<=1){
            fsDataOutputStream.write(text.toString().getBytes());
            fsDataOutputStream.write("\r\n".getBytes());

        }else {
            fsDataOutputStream1.write(text.toString().getBytes());
            fsDataOutputStream1.write("\r\n".getBytes());
        }


    }

    @Override
    public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
//        fsDataOutputStream.close();
//        fsDataOutputStream1.close();
        IOUtils.closeStream(fsDataOutputStream);
        IOUtils.closeStream(fsDataOutputStream1);
    }
}
job类
package outputFormat;

import commonfriend.map;
import commonfriend.reduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.net.URI;

public class job extends Configured implements Tool {
    @Override
    public int run(String[] strings) throws Exception {
        Job job= Job.getInstance(super.getConf(),"partitioner");
        job.setJarByClass(job.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://node1:8020/output/input"));

        job.setMapperClass(outputFormat.map.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);



        job.setOutputFormatClass(MyOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("hdfs://node1:8020/output1"));

        FileSystem fileSystem= FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());
        boolean file = fileSystem.exists(new Path("hdfs://node1:8020/output1"));
        boolean file1 = fileSystem.exists(new Path("hdfs://node1:8020/output1"));
        if (file&&file1){
            fileSystem.delete(new Path("hdfs://node1:8020/output1"),true);
            fileSystem.delete(new Path("hdfs://node1:8020/output1"),true);
        }

        boolean b = job.waitForCompletion(true);



        return b ? 0:1;

    }
    public static void main(String[] args)throws Exception{
        Configuration configuration=new Configuration();
//        configuration.addResource(new Path("/root/hadoop/hadoop-2.7.5/etc/hadoop/core-site.xml"));
//        configuration.set("fs.defaultFS", "hdfs://node1:8020");
        int run = ToolRunner.run(configuration, new job(), args);
        System.exit(run);
    }
}

20210218

hive安装
cd /export/softwares/
tar -zxvf apache-hive-2.1.1-bin.tar.gz -C ../servers/
修改hive-env.sh
cd /export/servers/apache-hive-2.1.1-bin/conf
cp hive-env.sh.template hive-env.sh
HADOOP_HOME=/export/servers/hadoop-2.7.5
export HIVE_CONF_DIR=/export/servers/apache-hive-2.1.1-bin/conf
修改hive-site.xml

cd /export/servers/apache-hive-2.1.1-bin/conf
vim hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
      <name>javax.jdo.option.ConnectionUserName</name>
      <value>root</value>
  </property>
  <property>
      <name>javax.jdo.option.ConnectionPassword</name>
      <value>123456</value>
  </property>
  <property>
      <name>javax.jdo.option.ConnectionURL</name>
      <value>jdbc:mysql://node03:3306/hive?
createDatabaseIfNotExist=true&amp;useSSL=false</value>
  </property>
hive使用mysql作为元数据存储，必然需要连接mysql数据库，所以我们添加一个mysql的连接
驱动包到hive的安装目录下，然后就可以准备启动hive了
将我们准备好的mysql-connector-java-5.1.38.jar 这个jar包直接上传到
/export/servers/apache-hive-2.1.1-bin/lib 这个目录下即可
至此，hive的安装部署已经完成，接下来我们来看下hive的三种交互方式
第五步：配置hive的环境变量
node03服务器执行以下命令配置hive的环境变量
2.6. Hive 的交互方式
第一种交互方式 bin/hive
  <property>
      <name>javax.jdo.option.ConnectionDriverName</name>
      <value>com.mysql.jdbc.Driver</value>
  </property>
  <property>
      <name>hive.metastore.schema.verification</name>
      <value>false</value>
  </property>
  <property>
    <name>datanucleus.schema.autoCreateAll</name>
    <value>true</value>
</property>
<property>
 <name>hive.server2.thrift.bind.host</name>
 <value>node03</value>
   </property>
</configuration>
sudo vim /etc/profile
export HIVE_HOME=/export/servers/apache-hive-2.1.1-bin
export PATH=:$HIVE_HOME/bin:$PATH

20210220

hive操作学习
hql语句

20210227

安装
上传解压HBase安装包
tar -xvzf hbase-2.1.0.tar.gz -C ../server/
修改HBase配置文件
hbase-env.sh
cd /export/server/hbase-2.1.0/conf
vim hbase-env.sh
# 第28行
export JAVA_HOME=/export/server/jdk1.8.0_241/
export HBASE_MANAGES_ZK=false
2.1.2.2 hbase-site.xml
vim hbase-site.xml
------------------------------
<configuration>
        <!-- HBase数据在HDFS中的存放的路径 -->
        <property>
            <name>hbase.rootdir</name>
            <value>hdfs://node1.itcast.cn:8020/hbase</value>
        </property>
        <!-- Hbase的运行模式。false是单机模式，true是分布式模式。若为false,Hbase和Zookeeper会运行在同一个JVM里面 -->
        <property>
            <name>hbase.cluster.distributed</name>
            <value>true</value>
        </property>
        <!-- ZooKeeper的地址 -->
        <property>
            <name>hbase.zookeeper.quorum</name>
            <value>node1.itcast.cn,node2.itcast.cn,node3.itcast.cn</value>
        </property>
        <!-- ZooKeeper快照的存储位置 -->
        <property>
            <name>hbase.zookeeper.property.dataDir</name>
            <value>/export/server/apache-zookeeper-3.6.0-bin/data</value>
        </property>
        <!--  V2.1版本，在分布式情况下, 设置为false -->
        <property>
            <name>hbase.unsafe.stream.capability.enforce</name>
            <value>false</value>
        </property>
</configuration>
配置环境变量
# 配置Hbase环境变量
vim /etc/profile
export HBASE_HOME=/export/server/hbase-2.1.0
export PATH=$PATH:${HBASE_HOME}/bin:${HBASE_HOME}/sbin

#加载环境变量
source /etc/profile
2.1.4 复制jar包到lib
cp $HBASE_HOME/lib/client-facing-thirdparty/htrace-core-3.1.0-incubating.jar $HBASE_HOME/lib/
修改regionservers文件
vim regionservers 
node1.itcast.cn
node2.itcast.cn
node3.itcast.cn
 分发安装包与配置文件
cd /export/server
scp -r hbase-2.1.0/ node2.itcast.cn:$PWD
scp -r hbase-2.1.0/ node3.itcast.cn:$PWD
scp -r /etc/profile node2.itcast.cn:/etc
scp -r /etc/profile node3.itcast.cn:/etc

在node2.itcast.cn和node3.itcast.cn加载环境变量
source /etc/profile
 启动HBase
cd /export/onekey
# 启动ZK
./start-zk.sh
# 启动hadoop
start-dfs.sh
# 启动hbase
start-hbase.sh
2.1.8 验证Hbase是否启动成功
# 启动hbase shell客户端
hbase shell
# 输入status

[root@node1 onekey]# hbase shell
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/export/server/hadoop-2.7.5/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/export/server/hbase-2.1.0/lib/client-facing-thirdparty/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
HBase Shell
Use "help" to get list of supported commands.
Use "exit" to quit this interactive shell.
Version 2.1.0, re1673bb0bbfea21d6e5dba73e013b09b8b49b89b, Tue Jul 10 17:26:48 CST 2018
Took 0.0034 seconds                                                                                                                                           
Ignoring executable-hooks-1.6.0 because its extensions are not built. Try: gem pristine executable-hooks --version 1.6.0
Ignoring gem-wrappers-1.4.0 because its extensions are not built. Try: gem pristine gem-wrappers --version 1.4.0
2.4.1 :001 > status
1 active master, 0 backup masters, 3 servers, 0 dead, 0.6667 average load
Took 0.4562 seconds                                                                                                                                           
2.4.1 :002 >
 WebUI
http://node1.itcast.cn:16010/master-status

20210301

javaapi操作
1.导入依赖
 <repositories><!-- 代码库 -->
        <repository>
            <id>aliyun</id>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>false</enabled>
                <updatePolicy>never</updatePolicy>
            </snapshots>
        </repository>
    </repositories>

    <dependencies>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>2.1.0</version>
        </dependency>
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.6</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.testng</groupId>
            <artifactId>testng</artifactId>
            <version>6.14.3</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <target>1.8</target>
                    <source>1.8</source>
                </configuration>
            </plugin>
        </plugins>
    </build>
2 复制HBase和Hadoop配置文件
将以下三个配置文件复制到resource目录中
hbase-site.xml
从Linux中下载：sz /export/server/hbase-2.1.0/conf/hbase-site.xml
core-site.xml
从Linux中下载：sz /export/server/hadoop-2.7.5/etc/hadoop/core-site.xml
log4j.properties
创建表
package api_test;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;

import java.io.IOException;


public class TableminTest {
    private Connection connection;
    private Admin admin;

    @BeforeTest
    public void beforeTest() throws IOException {
        Configuration configuration= HBaseConfiguration.create();
        connection= ConnectionFactory.createConnection(configuration);
        admin=connection.getAdmin();

    }
    @Test
    public void createtable() throws IOException {
        TableName tableName = TableName.valueOf("WATER_BILL");

        // 1.	判断表是否存在
        if(admin.tableExists(tableName)) {
            // a)	存在，则退出
            return;
        }

        // 构建表
        // 2.	使用TableDescriptorBuilder.newBuilder构建表描述构建器
        // TableDescriptor: 表描述器，描述这个表有几个列蔟、其他的属性都是在这里可以配置
        TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(tableName);

        // 3.	使用ColumnFamilyDescriptorBuilder.newBuilder构建列蔟描述构建器
        // 创建列蔟也需要有列蔟的描述器，需要用一个构建起来构建ColumnFamilyDescriptor
        // 经常会使用到一个工具类：Bytes（hbase包下的Bytes工具类）
        // 这个工具类可以将字符串、long、double类型转换成byte[]数组
        // 也可以将byte[]数组转换为指定类型
        ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("C1"));

        // 4.	构建列蔟描述，构建表描述
        ColumnFamilyDescriptor cfDes = columnFamilyDescriptorBuilder.build();

        // 建立表和列蔟的关联
        tableDescriptorBuilder.setColumnFamily(cfDes);
        TableDescriptor tableDescriptor = tableDescriptorBuilder.build();

        // 5.	创建表
        admin.createTable(tableDescriptor);

    }
    @Test
    public void droptableTest() throws IOException {
        TableName tableName=TableName.valueOf("WATER_BILL");
        if (admin.tableExists(tableName)){
            admin.disableTable(tableName);
            admin.deleteTable(tableName);
        }
    }



    @AfterTest
    public void afterTest() throws IOException {
        admin.close();
        connection.close();
    }

}
操作表
package data_text;

import com.sun.org.apache.regexp.internal.RE;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueExcludeFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class Datatest {
    private Connection connection;
    @BeforeTest
    public void beforeTest() throws IOException {
        Configuration configuration= HBaseConfiguration.create();
        connection= ConnectionFactory.createConnection(configuration);

    }
    @Test
    public void putTest() throws IOException {
        Table tablename = connection.getTable(TableName.valueOf("mai"));
        ArrayList<String> strings = toArrayByFileReader1();
        for (String  i : strings){
            try {
                String[] split = i.split(" ");
                String rowKey=split[3];
                String colunName="C1";
//            String name="NAME";
                Put put = new Put(Bytes.toBytes(rowKey));
                System.out.println(split.length);
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("one"),Bytes.toBytes(split[0]));
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("two"),Bytes.toBytes(split[1]));
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("three"),Bytes.toBytes(split[2]));
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("four"),Bytes.toBytes(split[3]));
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("five"),Bytes.toBytes(split[4]));
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("six"),Bytes.toBytes(split[5]));
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("seven"),Bytes.toBytes(split[6]));
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("eight"),Bytes.toBytes(split[7]));
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("nine"),Bytes.toBytes(split[8]));
                put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes("ten"),Bytes.toBytes(split[9]));
                tablename.put(put);
                tablename.close();
            }catch (Exception e){

            }


        }
//        put.addColumn(Bytes.toBytes(colunName),Bytes.toBytes(name),Bytes.toBytes("朱国滔"));



    }
//    读取数据
    @Test
    public void getdata() throws IOException {
        Table tablename = connection.getTable(TableName.valueOf("WATER_BILL"));
        Get get = new Get(Bytes.toBytes("00001"));
        Result resul=tablename.get(get);
        List<Cell> cellList=resul.listCells();
        byte[] rowkey=resul.getRow();
        System.out.println(Bytes.toString(rowkey));
        System.out.println(rowkey);
        for (Cell cell:cellList){
            String s = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
            String s1 = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
            String s2 = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
            System.out.println(s1+":"+s1+"->"+s2);
        }
        tablename.close();
    }
    @Test
    public void dropdata() throws IOException {
        Table tablename = connection.getTable(TableName.valueOf("WATER_BILL"));
        Delete delete=new Delete(Bytes.toBytes("00001"));
        tablename.delete(delete);

        tablename.close();

    }
    @Test
    public void scandata() throws IOException {
        Table tablename = connection.getTable(TableName.valueOf("WATER_BILL"));
        Scan scan = new Scan();
        SingleColumnValueExcludeFilter startsingleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter(Bytes.toBytes("C1")
                , Bytes.toBytes("RECORD_DATE")
                , CompareOperator.GREATER_OR_EQUAL,
                new BinaryComparator(Bytes.toBytes("2020-06-01")));
        SingleColumnValueExcludeFilter stopsingleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter(Bytes.toBytes("C1")
                , Bytes.toBytes("RECORD_DATE")
                , CompareOperator.LESS_OR_EQUAL,
                new BinaryComparator(Bytes.toBytes("2020-06-30")));
        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL, startsingleColumnValueExcludeFilter, stopsingleColumnValueExcludeFilter);
        scan.setFilter(filterList);
        ResultScanner results=tablename.getScanner(scan);
        Iterator<Result> iterator = results.iterator();
        while (iterator.hasNext()){
            Result resul = iterator.next();
            List<Cell> cellList=resul.listCells();
            byte[] rowkey=resul.getRow();
            System.out.println(Bytes.toString(rowkey));
            System.out.println(rowkey);
            String s2="";
            for (Cell cell:cellList){
                String s = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
                String s1 = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
                if (s1.equals("NUM_CURRENT")||s1.equals("NUM_PREVIOUS")||s1.equals("NUM_USAGE")||s1.equals("TOTAL_MONEY")){
                     s2 = Bytes.toDouble(cell.getValueArray())+"";
                }else {
                     s2 = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());

                }
                System.out.println(s+":"+s1+"->"+s2);
            }
        }
        results.close();


        tablename.close();
    }
    @Test
    public ArrayList<String> toArrayByFileReader1() {
        // 使用ArrayList来存储每行读取到的字符串
        ArrayList<String> arrayList = new ArrayList<>();
        try {
            FileReader fr = new FileReader("C:\\Users\\asus\\Desktop\\data\\shuju.txt");
            BufferedReader bf = new BufferedReader(fr);
            String str;
            // 按行读取字符串
            while ((str = bf.readLine()) != null) {
                arrayList.add(str);
            }
            bf.close();
            fr.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        // 对ArrayList中存储的字符串进行处理
      for (String  i : arrayList){
          String[] split = i.split(" ");
          System.out.println(split.length);
          System.out.println(split[0]);



      }
        // 返回数组
        return arrayList;
    }


    @AfterTest
    public void afterTest() throws IOException {
        connection.close();
    }


}

20210303

陌陌数据导入
package momo_chat.service.impl;

import momo_chat.entity.MSG;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class HBaseNativeceMessage implements ChatMessageService {
    private Connection connection;
    private SimpleDateFormat simpleDateFormat;

    public HBaseNativeceMessage() throws IOException {
        Configuration configuration = HBaseConfiguration.create();
        connection= ConnectionFactory.createConnection(configuration);
    }


    @Override
    public List<MSG> getMessage(String date, String sender, String receiver) throws Exception {
        Scan scan = new Scan();
        String startDateStr=date+" 00:00:00";
        String endDateStr=date+" 23:59:59";
        SingleColumnValueFilter startsingleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes("C1")
                , Bytes.toBytes("msg_time")
                , CompareOperator.GREATER_OR_EQUAL
                , new BinaryComparator(Bytes.toBytes(startDateStr)));
        SingleColumnValueFilter endsingleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes("C1")
                , Bytes.toBytes("msg_time")
                , CompareOperator.LESS_OR_EQUAL
                , new BinaryComparator(Bytes.toBytes(endDateStr)));
        SingleColumnValueFilter sendsingleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes("C1")
                , Bytes.toBytes("sender_account")
                , CompareOperator.EQUAL
                , new BinaryComparator(Bytes.toBytes(sender)));
        SingleColumnValueFilter receiversingleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes("C1")
                , Bytes.toBytes("receiver_account")
                , CompareOperator.EQUAL
                , new BinaryComparator(Bytes.toBytes(receiver)));
        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL
                , startsingleColumnValueFilter
                , endsingleColumnValueFilter
                , sendsingleColumnValueFilter
                , receiversingleColumnValueFilter);
        scan.setFilter(filterList);

        Table table = connection.getTable(TableName.valueOf("MOMO_CHAT:MSG"));
        ResultScanner scanner = table.getScanner(scan);
        Iterator<Result> iterable=scanner.iterator();
        ArrayList<MSG> objects = new ArrayList<>();
        while (iterable.hasNext()){
            Result result=iterable.next();
            MSG msg=new MSG();
            String s = Bytes.toString(result.getRow());
            List<Cell> cellList=result.listCells();
            for (Cell cell:cellList){
                String colimname=Bytes.toString(cell.getQualifierArray(),cell.getQualifierOffset(),cell.getQualifierLength());
                if (colimname.equals("msg_time")){
                    msg.setMsg_time(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("sender_nickyname")){
                    msg.setSender_nickyname(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("sender_account")){
                    msg.setSender_account(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("sender_sex")){
                    msg.setSender_sex(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("sender_ip")){
                    msg.setSender_ip(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("sender_os")){
                    msg.setSender_os(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("sender_phone_type")){
                    msg.setSender_phone_type(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("sender_network")){
                    msg.setSender_network(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("sender_gps")){
                    msg.setSender_gps(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("receiver_nickyname")){
                    msg.setReceiver_nickyname(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("receiver_ip")){
                    msg.setReceiver_ip(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("receiver_account")){
                    msg.setReceiver_account(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("receiver_os")){
                    msg.setReceiver_os(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("receiver_phone_type")){
                    msg.setReceiver_phone_type(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("receiver_network")){
                    msg.setReceiver_network(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("receiver_gps")){
                    msg.setReceiver_gps(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("receiver_sex")){
                    msg.setReceiver_sex(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("msg_type")){
                    msg.setMsg_type(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("distance")){
                    msg.setDistance(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                if(colimname.equals("message")){
                    msg.setMessage(Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
                }
                objects.add(msg);
            }
            scanner.close();
            table.close();
        }
        return objects;
    }

    @Override
    public void close() throws IOException {
        connection.close();

    }

}

20210304

6.2 安装Phoenix
6.2.1 下载
大家可以从官网上下载与HBase版本对应的Phoenix版本。对应到HBase 2.1，应该使用版本「5.0.0-HBase-2.0」。
http://phoenix.apache.org/download.html
也可以使用资料包中的安装包。
6.2.2 安装
1.上传安装包到Linux系统，并解压
cd /export/software
tar -xvzf apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz -C ../server/
2.将phoenix的所有jar包添加到所有HBase RegionServer和Master的复制到HBase的lib目录
#  拷贝jar包到hbase lib目录 
cp /export/server/apache-phoenix-5.0.0-HBase-2.0-bin/phoenix-*.jar /export/server/hbase-2.1.0/lib/
#  进入到hbase lib  目录
cd /export/server/hbase-2.1.0/lib/
# 分发jar包到每个HBase 节点
scp phoenix-*.jar node2.itcast.cn:$PWD
scp phoenix-*.jar node3.itcast.cn:$PWD
3.修改配置文件
cd /export/server/hbase-2.1.0/conf/
vim hbase-site.xml
------
# 1. 将以下配置添加到 hbase-site.xml 后边
<!-- 支持HBase命名空间映射 -->
<property>
    <name>phoenix.schema.isNamespaceMappingEnabled</name>
    <value>true</value>
</property>
<!-- 支持索引预写日志编码 -->
<property>
  <name>hbase.regionserver.wal.codec</name>
  <value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value>
</property>
# 2. 将hbase-site.xml分发到每个节点
scp hbase-site.xml node2.itcast.cn:$PWD
scp hbase-site.xml node3.itcast.cn:$PWD
4.将配置后的hbase-site.xml拷贝到phoenix的bin目录
cp /export/server/hbase-2.1.0/conf/hbase-site.xml /export/server/apache-phoenix-5.0.0-HBase-2.0-bin/bin/
5.重新启动HBase
stop-hbase.sh
start-hbase.sh
6.启动Phoenix客户端，连接Phoenix Server
注意：第一次启动Phoenix连接HBase会稍微慢一点。
cd /export/server/apache-phoenix-5.0.0-HBase-2.0-bin/
bin/sqlline.py node1.itcast.cn:2181
# 输入!table查看Phoenix中的表

20210306

建表语句：
create table if not exists ORDER_DTL(
    ID varchar primary key,
    C1.STATUS varchar,
    C1.MONEY float,
    C1.PAY_WAY integer,
    C1.USER_ID varchar,
    C1.OPERATION_TIME varchar,
    C1.CATEGORY varchar
);
6.4 查看表的信息
!desc ORDER_DTL
6.4.1 删除表语法
drop table if exists ORDER_DTL;
6.4.2 大小写问题
在HBase中，如果在列蔟、列名没有添加双引号。Phoenix会自动转换为大写。
例如：
create table if not exists ORDER_DTL(
    id varchar primary key,
    C1.status varchar,
    C1.money double,
    C1.pay_way integer,
    C1.user_id varchar,
    C1.operation_time varchar,
    C1.category varchar
   
);
插入数据
upsert into 表名(列蔟列名, xxxx, ) VALUES(XXX, XXX, XXX)
查看所有表
!table
6.4.4.3 根据ID查询数据
SELECT * FROM ORDER_DTL WHERE "id" = '000001';
6.4.5 根据ID删除数据
DELETE FROM ORDER_DTL WHERE "id" = '000001';
6.4.7 分页查询
使用limit和offset可以快速进行分页。
limit表示每页多少条记录，offset表示从第几条记录开始查起。
-- 第一页
select * from ORDER_DTL limit 10 offset 0;
-- 第二页
-- offset从10开始
select * from ORDER_DTL limit 10 offset 10;
-- 第三页
select * from ORDER_DTL limit 10 offset 20;
6.5.1 ROWKEY预分区
按照用户ID来分区，一共4个分区。并指定数据的压缩格式为GZ。
drop table if exists ORDER_DTL;
create table if not exists ORDER_DTL(
    "id" varchar primary key,
    C1."status" varchar,
    C1."money" float,
    C1."pay_way" integer,
    C1."user_id" varchar,
    C1."operation_time" varchar,
    C1."category" varchar
) 
CONPRESSION='GZ'
SPLIT ON ('3','5','7');

6.5.2 加盐指定数量分区
drop table if exists ORDER_DTL;
create table if not exists ORDER_DTL(
    "id" varchar primary key,
    C1."status" varchar,
    C1."money" float,
    C1."pay_way" integer,
    C1."user_id" varchar,
    C1."operation_time" varchar,
    C1."category" varchar
) 
CONPRESSION='GZ', SALT_BUCKETS=10;

映射HBase中的表
CREATE VIEW "my_hbase_table"
    ( k VARCHAR primary key, "v" UNSIGNED_LONG) default_column_family='a';

-- 映射Phoenix中的表
CREATE VIEW my_view ( new_col SMALLINT )
    AS SELECT * FROM my_table WHERE k = 100;

-- 映射到一个SQL查询
CREATE VIEW my_view_on_view
    AS SELECT * FROM my_view WHERE new_col > 70;
    参考创建语句：
-- 创建MOMO_CHAT:MSG视图
create view if not exists "MOMO_CHAT". "MSG" (
    "pk" varchar primary key, -- 指定ROWKEY映射到主键
    "C1"."msg_time" varchar,
    "C1"."sender_nickyname" varchar,
    "C1"."sender_account" varchar,
    "C1"."sender_sex" varchar,
    "C1"."sender_ip" varchar,
    "C1"."sender_os" varchar,
    "C1"."sender_phone_type" varchar,
    "C1"."sender_network" varchar,
    "C1"."sender_gps" varchar,
    "C1"."receiver_nickyname" varchar,
    "C1"."receiver_ip" varchar,
    "C1"."receiver_account" varchar,
    "C1"."receiver_os" varchar,
    "C1"."receiver_phone_type" varchar,
    "C1"."receiver_network" varchar,
    "C1"."receiver_gps" varchar,
    "C1"."receiver_sex" varchar,
    "C1"."msg_type" varchar,
    "C1"."distance" varchar,
    "C1"."message" varchar
);

20210308

phoenix的jdbc连接
package momo_chat.service.impl;

import momo_chat.entity.MSG;

import java.io.IOException;
import java.sql.*;

import java.util.ArrayList;
import java.util.List;

public class PhoenixChatMessageService  implements ChatMessageService{
    private Connection connection;

    public PhoenixChatMessageService() throws ClassNotFoundException, SQLException {
        Class.forName(PhoenixChatMessageService.class.getName());
        connection= DriverManager.getConnection("jdbc:phoenix:node1:2181");

    }

    @Override
    public  List<MSG> getMessage(String date, String sender, String receiver) throws Exception {
       String sql="select * from MOMO_CHAT.MSG limit ? offset ?";
        PreparedStatement preparedStatement = connection.prepareStatement(sql);
        preparedStatement.setInt(1,3);
        preparedStatement.setInt(2,5);
        ResultSet resultSet=preparedStatement.executeQuery();
        List<MSG> objects = new ArrayList<>();
        while (resultSet.next()){
            MSG msg=new MSG();
            msg.setMsg_time(resultSet.getString("msg_time"));
            msg.setSender_nickyname(resultSet.getString("sender_nickyname"));
            msg.setSender_account(resultSet.getString("sender_account"));
            msg.setSender_sex(resultSet.getString("sender_sex"));
            msg.setSender_ip(resultSet.getString("sender_ip"));
            msg.setSender_os(resultSet.getString("sender_os"));
            msg.setSender_phone_type(resultSet.getString("sender_phone_type"));
            msg.setSender_network(resultSet.getString("sender_network"));
            msg.setSender_gps(resultSet.getString("sender_gps"));
            msg.setReceiver_nickyname(resultSet.getString("receiver_nickyname"));
            msg.setReceiver_ip(resultSet.getString("receiver_ip"));
            msg.setReceiver_account(resultSet.getString("receiver_account"));
            msg.setReceiver_os(resultSet.getString("receiver_os"));
            msg.setReceiver_phone_type(resultSet.getString("receiver_phone_type"));
            msg.setReceiver_network(resultSet.getString("receiver_network"));
            msg.setReceiver_gps(resultSet.getString("receiver_gps"));
            msg.setReceiver_sex(resultSet.getString("receiver_sex"));
            msg.setMsg_type(resultSet.getString("msg_type"));
            msg.setDistance(resultSet.getString("distance"));
            msg.setMessage(resultSet.getString("message"));
            objects.add(msg);
        }
        resultSet.close();
        preparedStatement.close();

        return objects;
    }

    @Override
    public void close() throws IOException, SQLException {
        connection.close();

    }
}

20210309

全局索引
创建语法：
CREATE INDEX 索引名称 ON 表名 (列名1, 列名2, 列名3...)
本地索引
CREATE local INDEX 索引名称 ON 表名 (列名1, 列名2, 列名3...)
覆盖索引
CREATE INDEX my_index ON my_table (v1,v2) INCLUDE(v3)
函数索引
CREATE INDEX UPPER_NAME_IDX ON EMP (UPPER(FIRST_NAME||' '||LAST_NAME))
-- 以下查询会走索引
SELECT EMP_ID FROM EMP WHERE UPPER(FIRST_NAME||' '||LAST_NAME)='JOHN DOE'
删除全局索引
drop index IDX_ORDER_DTL_DATE on ORDER_DTL;
删除本地索引
drop index LOCAL_IDX_ORDER_DTL on ORDER_DTL;
追踪查询
explain select * from ORDER_DTL where "user_id" = '8237476';
强制使用索引
explain select /*+ INDEX(ORDER_DTL GBL_IDX_ORDER_DTL) */ * from ORDER_DTL where USER_ID = '8237476';
7.3.4 使用Phoenix建立二级索引高效查询
7.3.4.1 创建本地函数索引
CREATE LOCAL INDEX LOCAL_IDX_MOMO_MSG ON MOMO_CHAT.MSG(substr("msg_time", 0, 10), "sender_account", "receiver_account");
7.3.4.2 执行数据查询
SELECT * FROM "MOMO_CHAT"."MSG" T 
WHERE substr("msg_time", 0, 10) = '2020-08-29'
    AND T."sender_account" = '13504113666'
    AND T."receiver_account" = '18182767005' LIMIT 100;

3.24

安装scala

直接解压
tar -zxvf  scala-2.12.5.tgz
设置环境变量
export SCALA_HOME=/root/scala/scala-2.12.5
export PATH=/root/scala/scala-2.12.5/bin:$PATH

安装spark

解压
tar -zxvf spark-2.4.7-bin-hadoop2.7.tar.gz
进入conf目录
cd conf
修改配置文件slaves写入从节点
vim slaves
修改spark-env.sh
vim spark-env.sh
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre
export SCALA_HOME=/root/scala/scala-2.12.5
export HADOOP_HOME=/root/hadoop/hadoop-2.7.5
export HADOOP_CONF_DIR=/root/hadoop/hadoop-2.7.5/etc/hadoop
export SPARK_MASTER_IP=node1
修改log4j.properties
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
修改spark-defaults.conf
spark.eventLog.enabled true
spark.eventLog.dir hdfs://node1:8020/spark/eventlogs/
spark.history.fs.logDirectory hdfs://node1:8020/spark/eventlogs
spark.eventLog.compress true
scp到从节点
主节点启动start-all.sh

3.27

javaapi连接spark
在resource下放入文件core-site.xml和hdfs-site.xml,log4j.properties


代码


import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object spark {
  def main(args: Array[String]): Unit = {
    var sparkConf:SparkConf=new SparkConf().setAppName("spark").setMaster("local[2]")
    val sc:SparkContext=new SparkContext(sparkConf)
    val inputRDD: RDD[String] =sc.textFile(path = "/2019electives/input/2019electives.csv")
    var resultRDD=inputRDD.flatMap(line =>line.split(",")).map(word =>(word,1)).reduceByKey(_ + _)

//    resultRDD.foreach(tuple => println(tuple))
//    resultRDD.saveAsTextFile(path = "/datas/ideaspark")
    resultRDD.map(tuple =>tuple.swap).sortByKey(ascending = false).take(num = 3).foreach(tuple =>println(tuple))
    resultRDD.sortBy(tuple => tuple._2,  ascending = false)
         .take(3)
      .foreach(tuple =>println(tuple))

    sc.stop()
  }

}

2021.04.11

练习案例：搜狗搜索数据分析
在这里插入图片描述

bean代码
package bean

case class SogouRecord (
                       queryTime:String,
                       userId:String,
                       queryWords:String,
                       resultRank:Int,
                       clickRank:Int,
                       clickUrl:String




                       )
  主类代码
  package service

import bean.SogouRecord
import com.hankcs.hanlp.HanLP
import org.apache.spark.storage.StorageLevel
import org.apache.spark.{SparkConf, SparkContext}

object SougouMap {

  def main(args: Array[String]): Unit = {
    val sc:SparkContext={
      val sparkConf:SparkConf=new SparkConf()
        .setAppName(this.getClass.getSimpleName.stripSuffix("$"))
        .setMaster("local[2]")
      SparkContext.getOrCreate(sparkConf)
    }
    val sogouRDD= sc.textFile(path = "data/sogou/SogouQ.txt")
//    println(sogouRDD.count())
//    sogouRDD.foreach(word=>println(word))
    val eltRDD=sogouRDD
        .filter{line=>null !=line && line.trim.split("\\s+").length==6
        }
      .mapPartitions{iter=>
        iter.map{line=>
          val array=line.trim.split("\\s+")
          SogouRecord(
            array(0),
            array(1),
            array(2).replace("[","").replace("]",""),
            array(3).toInt,
            array(4).toInt,
            array(5)
          )

        }
      }
    eltRDD.persist(StorageLevel.MEMORY_AND_DISK)
//    eltRDD.foreach(line=>println(line))
//    println(eltRDD.first())
//    搜索关键词统计
    val recordRdd=eltRDD
        .filter(record=> null != record.queryWords && record.queryWords.trim.length>0)
      .flatMap{record=>
          val queryWords=record.queryWords
          val segment = HanLP.segment(queryWords)
          import scala.collection.JavaConverters._
          segment.asScala.map{term=>(term.word,1)}
    }
      .reduceByKey(_+_)
//    recordRdd.foreach{word=>println(word)}
      recordRdd.sortBy(tuple=>tuple._2,ascending = false)
      .take(1)
      .foreach(println)
//    println(recordRdd.count())


//    用户搜索点击统计
      val preuserRDD=eltRDD.mapPartitions{iter=>
        iter.map{record=>
          val userId=record.userId
          val querword=record.queryWords
          ((userId,querword),1)
        }
      }
      .reduceByKey(_+_)
    val restRDD = preuserRDD.map(tuple=>tuple._2)
    restRDD.take(10).foreach(println)
    println(restRDD.max())
//    搜索时间段统计 按小时统计
    val s=eltRDD.map{record=>
      val hourstr=record.queryTime.substring(0,2)
      (hourstr,1)
    }
      .reduceByKey(_+_)
//        .sortBy(tuple=>tuple._2,ascending = false)
      .top(24)(Ordering.by(tupele=>tupele._2))
      .foreach(println)






    eltRDD.unpersist()






    sc.stop()
  }

}
pom依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>spark</artifactId>
        <groupId>spark</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

    <artifactId>demo4</artifactId>

    <repositories>
        <repository>
            <id>aliyun</id>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
        </repository>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
        <repository>
            <id>jboss</id>
            <url>http://repository.jboss.com/nexus/content/groups/public</url>
        </repository>
    </repositories>

    <properties>
        <scala.version>2.11.12</scala.version>
        <scala.binary.version>2.11</scala.binary.version>
        <spark.version>2.4.5</spark.version>
        <hadoop.version>2.6.0-cdh5.16.2</hadoop.version>
        <hbase.version>1.2.0-cdh5.16.2</hbase.version>
        <mysql.version>8.0.19</mysql.version>
    </properties>

    <dependencies>
        <!-- 依赖Scala语言 -->
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>
        <!-- Spark Core 依赖 -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_${scala.binary.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
        <!-- Spark SQL 依赖 -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_${scala.binary.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
        <!-- Hadoop Client 依赖 -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <!-- HBase Client 依赖 -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-hadoop2-compat</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>${hbase.version}</version>
        </dependency>

        <!-- MySQL Client 依赖 -->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>${mysql.version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.hankcs/hanlp -->
        <dependency>
            <groupId>com.hankcs</groupId>
            <artifactId>hanlp</artifactId>
            <version>portable-1.7.7</version>
        </dependency>

    </dependencies>

    <build>
        <outputDirectory>target/classes</outputDirectory>
        <testOutputDirectory>target/test-classes</testOutputDirectory>
        <resources>
            <resource>
                <directory>${project.basedir}/src/main/resources</directory>
            </resource>
        </resources>
        <!-- Maven 编译的插件 -->
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.0</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.2.0</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>compile</goal>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

</project>