使用SpringBatch框架将海量数据文件中数据批量导入Cassandra数据库中
- SpringBatch通过springbatch.xml中 batch:job标签具体定义job工作内容
- step job内步骤项 这里只需要一个就可以
- tasklet 任务集,可以指定线程池来执行
- chunk : read-process-write模式 读csv文件,程序处理,然后写入Cassandra
- commit-interval : 事务隔多少次提交
<batch:job id="integrationTradingDate">
<batch:step id="readTradingDateWriteToDatebase">
<batch:tasklet task-executor="taskExecutor"
throttle-limit="20">
<batch:chunk reader="inputDateReader" writer="ProductItemWriter"
processor="itemProcessor" commit-interval="12">
</batch:chunk>
</batch:tasklet>
</batch:step>
</batch:job>
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>dataIntegration</artifactId>
<groupId>com.beifa.cn.data.integration</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>tradingDate</artifactId>
<packaging>jar</packaging>
<name>A Camel Route</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<dependencyManagement>
<dependencies>
<!-- Camel BOM -->
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-parent</artifactId>
<version>2.22.0</version>
<scope>import</scope>
<type>pom</type>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-core</artifactId>
</dependency>
<!-- logging -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<scope>runtime</scope>
</dependency>
<!-- testing