pom文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.cc.pxj.wfy</groupId>
<artifactId>phoneWcRuoZe</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<hadoop.version>2.6.0-cdh5.16.2</hadoop.version>
<hive.version>1.1.0-cdh5.16.2</hive.version>
</properties>
<repositories>
<repository>
<id>cloudera</id>
<url>http://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<!-- 添加Hadoop依赖 -->
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/junit/junit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.17</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.3.1</version>
</dependency>
</dependencies>
<build>
<pluginManagement>
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
</build>
<!-- <build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin </artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>com.ccj.pxj.hive.PxjUDF</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>-->
</project>
mapper
package com.ccj.pxj.homework.Json;
import com.google.gson.Gson;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class JsonMapper extends Mapper<LongWritable, Text,Pojo, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String lines = value.toString();
// 1. 判断是否含有[ 或 ]
if(lines.indexOf("[")==0 || lines.indexOf("]")==0 || lines.trim().isEmpty()){
return;
}
// 2.//删除逗号
if(lines.endsWith(",")){
lines=lines.substring(0,lines.length()-1);
}
context.write(new Gson().fromJson(lines,Pojo.class),NullWritable.get());
}
}
Reducer
package com.ccj.pxj.homework.Json;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class JsonReduce extends Reducer<Pojo, NullWritable, Pojo, NullWritable> {
@Override
protected void reduce(Pojo key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
for (NullWritable value : values) {
context.write(key,NullWritable.get());
}
}
}
Json
package com.ccj.pxj.homework.Json;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Pojo implements WritableComparable {
private Integer id;
private String name;
private String address;
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public Pojo() {
}
public Pojo(Integer id, String name, String address) {
this.id = id;
this.name = name;
this.address = address;
}
@Override
public String toString() {
return id +
"\t" + name + '\t'
+ address + '\t'
;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(this.id);
out.writeUTF(this.name);
out.writeUTF(this.address);
}
@Override
public void readFields(DataInput in) throws IOException {
id=in.readInt();
name=in.readUTF();
address=in.readUTF();
}
@Override
public int compareTo(Object o) {
return 0;
}
}
测试数据
[
{"id":10,"name":"pk","address":"beijing"},
{"id":11,"name":"pxj","address":"gz"},
{"id":12,"name":"wfy","address":"gx"},
{"id":13,"name":"zcl","address":"st"}
]
输出结果
13 zcl st
12 wfy gx
11 pxj gz
10 pk beijing
作者:pxj(潘陈)
日期:2020-02-08
我不爱学习,不要刺激我