avrofield命名规则
全名的名字部分和record的field名字必须:
- 以[A-Za-z_]开头
- 接下来的名字中只能包含[A-Za-z0-9_]
<
dependency
>
<
groupId
>
org.apache.hadoop
</
groupId
>
<
artifactId
>
hadoop-core
</
artifactId
>
<
version
>
1.1.0
</
version
>
</
dependency
>
.avro文件是序列化后的数据
.avsc文件是schema
将avsc文件编译为java文件
java -jar /path/to/avro-tools-1.8.0.jar compile schema <schema file> <destination>
在pom中添加avro的插件显示Plugin execution not covered by lifecycle configuration: org.apache.avro:avro-maven-plugin
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<version>1.7.6</version>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>schema</goal>
</goals>
<configuration>
<sourceDirectory>${project.basedir}/../</sourceDirectory>
<outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
</configuration>
</execution>
</executions>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<version>1.7.6</version>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>schema</goal>
</goals>
<configuration>
<sourceDirectory>${project.basedir}/../</sourceDirectory>
<outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
原因是eclipse中的maven插件是m2eclipse的,而网上给的avro插件是对应Apache maven插件的。替换成下面的插件就行了
< plugins>
< plugin>
< groupId> org.eclipse.m2e </groupId >
< artifactId> lifecycle-mapping </ artifactId>
< version> 1.0.0 </version >
< configuration>
< lifecycleMappingMetadata>
< pluginExecutions>
< pluginExecution>
< pluginExecutionFilter>
< groupId> org.apache.avro </groupId >
< artifactId> avro-maven-plugin </artifactId >
< versionRange> [0.0,) </versionRange >
< goals>
< goal> schema </goal >
< goal> idl-protocol </goal >
</ goals>
</ pluginExecutionFilter>
< action>
< execute />
</ action>
</ pluginExecution>
</ pluginExecutions>
</ lifecycleMappingMetadata>
</ configuration>
</ plugin>
avro官方mapreduce教程,关于如何添加pom依赖等
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.7.6</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-mapred</artifactId>
<version>1.7.6</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.1.0</version>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.7.6</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-mapred</artifactId>
<version>1.7.6</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.1.0</version>
</dependency>
package com.corp.tsfile.avro;
import java.io.File;
import java.io.IOException;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
public class test {
public static void main(String[] args ) {
Tsdata tsdata1 = new Tsdata( "device_1", "1459424620505" , 786751, 719451, 1000001, 115271, new Long(39333881));
Tsdata tsdata2 = new Tsdata( "device_1", "1459424620506" , 786752, 719452, 1000002, 115272, new Long(39333882));
Tsdata tsdata3 = new Tsdata( "device_1", "1459424620507" , 786753, 719453, 1000003, 115273, new Long(39333883));
DatumWriter<Tsdata> tsdataDatumWriter = new SpecificDatumWriter<Tsdata>(Tsdata. class);
DataFileWriter<Tsdata> dataFileWriter = new DataFileWriter<Tsdata>(tsdataDatumWriter );
try {
dataFileWriter .create(tsdata1 .getSchema(), new File( "tsdata.avro" ));
dataFileWriter .append(tsdata1 );
dataFileWriter .append(tsdata2 );
dataFileWriter .append(tsdata3 );
dataFileWriter .close();
} catch (IOException e ) {
// TODO Auto-generated catch block
e .printStackTrace();
}
// Deserialize Users from disk
DatumReader<Tsdata> userDatumReader = new SpecificDatumReader<Tsdata>(Tsdata. class);
DataFileReader<Tsdata> dataFileReader = null;
try {
dataFileReader = new DataFileReader<Tsdata>( new File("tsdata.avro" ), userDatumReader );
Tsdata tsdata = null;
while (dataFileReader .hasNext())
{
tsdata = dataFileReader .next( tsdata);
System. out .println(tsdata );
}
} catch (IOException e ) {
// TODO Auto-generated catch block
e .printStackTrace();
}
}
}
package com.corp.tsfile.avro;
import java.io.File;
import java.io.IOException;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
public class test {
public static void main(String[] args ) {
// TODO Auto-generated method stub
User user1 = new User();
user1 .setName("Alyssa" );
user1 .setFavoriteNumber(256);
// Leave favorite color null
// Alternate constructor
User user2 = new User( "Ben", 7, "red" );
// Construct via builder
User user3 = User.newBuilder()
.setName("Charlie" )
.setFavoriteColor("blue" )
.setFavoriteNumber(null )
.build();
// Serialize user1, user2 and user3 to disk
DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User. class );
DataFileWriter<User> dataFileWriter = new DataFileWriter<User>( userDatumWriter );
try {
dataFileWriter .create(user1 .getSchema(), new File( "users.avro" ));
dataFileWriter .append(user1 );
dataFileWriter .append(user2 );
dataFileWriter .append(user3 );
dataFileWriter .close();
} catch (IOException e ) {
// TODO Auto-generated catch block
e .printStackTrace();
}
// Deserialize Users from disk
DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User. class );
DataFileReader<User> dataFileReader = null;
try {
dataFileReader = new DataFileReader<User>( new File("users.avro" ), userDatumReader );
User user = null;
while (dataFileReader .hasNext())
{
user = dataFileReader .next( user);
System. out .println(user );
}
} catch (IOException e ) {
// TODO Auto-generated catch block
e .printStackTrace();
}
}
}