kafka中写入avro数据

import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.EncoderFactory;

import com.cnpc.soc.avro.Log;

import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;

public class kafkaProducer2 extends Thread {

	private String topic;

	public kafkaProducer2(String topic) {
		super();
		this.topic = topic;
	}

	@Override
	public void run() {
		Producer<String, byte[]> producer = createProducer();
		while (true) {
			String regex = "^([0-9.]+)\\s([\\w.-]+)\\s([\\w.-]+)\\s(\\[[^\\[\\]]+\\])\\s\"((?:[^\"]|\\\")+)\"\\s(\\d{3})\\s(\\d+|-)\\s\"((?:[^\"]|\\\")+)\"\\s\"((?:[^\"]|\\\")+)\"$";
			List<String> list = TextFile.readToList("access.log");
			Pattern pattern = Pattern.compile(regex);
			List<KeyedMessage<String, byte[]>> list1 = new ArrayList<KeyedMessage<String, byte[]>>();
			for (String s : list) {

				Matcher matcher = pattern.matcher(s);
				if (matcher.find()) {
					String ip = matcher.group(1);
					String identity = matcher.group(2);
					String userid = matcher.group(3);
					String time = matcher.group(4);
					String requestInfo = matcher.group(5);
					String state = matcher.group(6);
					String responce = matcher.group(7);
					String referer = matcher.group(8);
					String useragent = matcher.group(9);
					GenericRecord record = new GenericData.Record(Log.getClassSchema());
					record.put("ip", ip);
					record.put("identity", identity);
					record.put("userid", userid);
					record.put("time", time);
					record.put("requestInfo", requestInfo);
					record.put("state", state);
					record.put("responce", responce);
					record.put("referer", referer);
					record.put("useragent", useragent);
					try {
						byte[] serializedValue = serializeEvent(record);
						list1.add(new KeyedMessage<String, byte[]>(topic, serializedValue));
					} catch (Exception e) {
						e.printStackTrace();
					}

				}
			}
			producer.send(list1);
			try {
				TimeUnit.SECONDS.sleep(1);
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
	}

	protected byte[] serializeEvent(GenericRecord record) throws Exception {
		ByteArrayOutputStream bos = null;
		try {
			bos = new ByteArrayOutputStream();
			BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(bos, null);
			GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
			writer.write(record, encoder);
			encoder.flush();
			byte[] serializedValue = bos.toByteArray();
			return serializedValue;
		} catch (Exception ex) {
			throw ex;
		} finally {
			if (bos != null) {
				try {
					bos.close();
				} catch (Exception e) {
					bos = null;
				}
			}
		}
	}

	private Producer<String, byte[]> createProducer() {
		Properties properties = new Properties();
		properties.put("zookeeper.connect", "xxx.xxx.xxx:2181,xxx.xxx.xxx:2181,xxx.xxx.xxx:2181");// 声明zk
		properties.put("metadata.broker.list", "xxx.xxx.xxx:6667,xxx.xxx.xxx:6667,xxx.xxx.xxx:6667");// 声明kafka broker
		return new Producer<String, byte[]>(new ProducerConfig(properties));
	}

	public static void main(String[] args) {
		new kafkaProducer2("test_log_2").start();
	}

}

 

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值