SpringBath批处理数据到Cassandra

使用SpringBatch框架将海量数据文件中数据批量导入Cassandra数据库中


  • SpringBatch通过springbatch.xml中 batch:job标签具体定义job工作内容
  • step job内步骤项 这里只需要一个就可以
  • tasklet 任务集,可以指定线程池来执行
  • chunk : read-process-write模式 读csv文件,程序处理,然后写入Cassandra
  • commit-interval : 事务隔多少次提交
    <batch:job id="integrationTradingDate">
		<batch:step id="readTradingDateWriteToDatebase">
			<batch:tasklet task-executor="taskExecutor"
                           throttle-limit="20">
				<batch:chunk reader="inputDateReader" writer="ProductItemWriter"
							 processor="itemProcessor" commit-interval="12">
				</batch:chunk>
			</batch:tasklet>
		</batch:step>
	</batch:job>

项目源码下载

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <parent>
        <artifactId>dataIntegration</artifactId>
        <groupId>com.beifa.cn.data.integration</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>

    <modelVersion>4.0.0</modelVersion>

    <artifactId>tradingDate</artifactId>
    <packaging>jar</packaging>

    <name>A Camel Route</name>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
    </properties>

    <dependencyManagement>
        <dependencies>
            <!-- Camel BOM -->
            <dependency>
                <groupId>org.apache.camel</groupId>
                <artifactId>camel-parent</artifactId>
                <version>2.22.0</version>
                <scope>import</scope>
                <type>pom</type>
            </dependency>
        </dependencies>
    </dependencyManagement>

    <dependencies>

        <dependency>
            <groupId>org.apache.camel</groupId>
            <artifactId>camel-core</artifactId>
        </dependency>

        <!-- logging -->
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-api</artifactId>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-slf4j-impl</artifactId>
            <scope>runtime</scope>
        </dependency>

        <!-- testing -->
        <dependency>
            <groupId>org.apache.camel</groupId>
            <artifactId>camel-test</artifactId>
            <scope>test</scope>
        </dependency>

        <!-- spring -->
        <dependency>
            <groupId>org.springframework.batch</groupId>
            <artifactId>spring-batch-core</artifactId>
            <version>4.0.1.RELEASE</version>
        </dependency>
        <!--<dependency>-->
            <!--<groupId>org.springframework</groupId>-->
            <!--<artifactId>spring-jdbc</artifactId>-->
            <!--<version>3.2.8.RELEASE</version>-->
        <!--</dependency>-->
        <!--<dependency>-->
            <!--<groupId>org.springframework.data</groupId>-->
            <!--<artifactId>spring-data-jpa</artifactId>-->
            <!--<version>1.4.1.RELEASE</version>-->
        <!--</dependency>

        <dependency>
    <groupId>org.springframework.data</groupId>
    <artifactId>spring-data-cassandra</artifactId>
    <version>2.0.8.RELEASE</version>
</dependency>
        -->

        <!--cassandra-->
        <dependency>
            <groupId>org.springframework.data</groupId>
            <artifactId>spring-data-cassandra</artifactId>
            <version>2.0.8.RELEASE</version>

        </dependency>



        <dependency>
            <groupId>org.apache.cassandra</groupId>
            <artifactId>cassandra-all</artifactId>
            <version>3.11.2</version>
        </dependency>
    </dependencies>

    <build>
        <defaultGoal>install</defaultGoal>

        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.7.0</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-resources-plugin</artifactId>
                <version>3.0.2</version>
                <configuration>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>

            <!-- Allows the example to be run via 'mvn compile exec:java' -->
            <plugin>
                <groupId>org.codehaus.mojo</groupId>
                <artifactId>exec-maven-plugin</artifactId>
                <version>1.6.0</version>
                <configuration>
                    <mainClass>com.beifa.cn.data.integration.MainApp</mainClass>
                    <includePluginDependencies>false</includePluginDependencies>
                </configuration>
            </plugin>

        </plugins>
    </build>

</project>

applicationContext.xml

	<!--           spring    -->
    <!-- 引入属性文件 -->
    <context:property-placeholder location="classpath:cassandra.properties"/>
	<!-- 自动扫描(自动注入) -->
	<context:component-scan base-package="com.*"/>

	<import resource="springbatch.xml"/>
	<import resource="cassandra.xml"/>

springbatch.xml

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
	   xmlns:batch="http://www.springframework.org/schema/batch"
	   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	   xsi:schemaLocation="http://www.springframework.org/schema/beans
	http://www.springframework.org/schema/beans/spring-beans.xsd
	http://www.springframework.org/schema/batch
	http://www.springframework.org/schema/batch/spring-batch.xsd
 ">


	<!--                       spring batch                      -->
    <bean id="jobRepository"
          class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean">
        <property name="transactionManager" ref="transactionManager"/>
    </bean>

    <bean id="transactionManager"
          class="org.springframework.batch.support.transaction.ResourcelessTransactionManager"/>
    <bean id="jobLauncher"
          class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
        <property name="jobRepository" ref="jobRepository"/>
    </bean>


    <batch:job id="integrationTradingDate">
		<batch:step id="readTradingDateWriteToDatebase">
			<batch:tasklet task-executor="taskExecutor"
                           throttle-limit="20">
				<batch:chunk reader="inputDateReader" writer="ProductItemWriter"
							 processor="itemProcessor" commit-interval="12">
				</batch:chunk>
			</batch:tasklet>
		</batch:step>
	</batch:job>

    <bean id="taskExecutor" class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
        <property name="corePoolSize" value="60" />
        <property name="maxPoolSize" value="100" />
        <property name="queueCapacity" value="25" />
    </bean>

	<bean id="itemProcessor" class="com.beifa.cn.data.integration.spring.batch.reader.processor.TradingDateProcessor"/>


	<bean id="inputDateReader" class="org.springframework.batch.item.file.FlatFileItemReader">
		<property name="resource" value="classpath:input/input.csv"/>
		<property name="lineMapper" ref="lineMapper"/>
		<property name="linesToSkip" value="1"/>
	</bean>

	<bean id="lineMapper"
		  class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
		<property name="lineTokenizer">
			<bean
					class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
				<property name="names" value="stockCode,itemValue1,itemValue2,itemValue3"/>
			</bean>
		</property>

		<property name="fieldSetMapper" ref="fieldSetMapper"/>
	</bean>


	<bean id="fieldSetMapper"
		  class="com.beifa.cn.data.integration.spring.batch.reader.mapper.TradeDateFieldSetMapper">
	</bean>

	<bean id="csvItemWriter" class="org.springframework.batch.item.file.FlatFileItemWriter">
		<property name="resource" value="file:target/output/output.csv" />
		<property name="lineAggregator">
			<bean class="org.springframework.batch.item.file.transform.DelimitedLineAggregator">
				<property name="delimiter" value=","/>
				<property name="fieldExtractor">
					<bean class="org.springframework.batch.item.file.transform.BeanWrapperFieldExtractor">
						<property name="names" value="item_id,tradingDataDate,stockCode,itemValue1,itemValue2,itemValue3"/>
					</bean>
				</property>

			</bean>
		</property>
	</bean>

    <bean id="ProductItemWriter" class="com.beifa.cn.data.integration.spring.batch.writer.cassandra.TradeDateCassandraWriter"/>

</beans>

cassandra.properties

cassandra_contactpoints = 192.168.13.163
#�˿�
cassandra_port = 9042
#��ǰ�������ռ�
cassandra_keyspace = tradingdata
#��¼�û���
cassandra_username= cassandra
#��¼����
cassandra_password= cassandra

cassandra.xml

 <!-- spring-cassandra -->
    <cassandra:cluster  contact-points="${cassandra_contactpoints}" port="${cassandra_port}"
                       username="${cassandra_username}" password="${cassandra_password}"/>

    <!-- 当前使用scheam -->
    <cassandra:session   keyspace-name="${cassandra_keyspace}"/>

    <!-- orm -->
    <cassandra:mapping/>

    <!-- 类型转换 -->
    <cassandra:converter/>

    <!-- cassandra operater  cqlTemplate -->
    <cassandra:template id="cassandraTemplate"/>

    <!-- spring data 接口 -->
    <cassandra:repositories base-package="com.beifa.cn.data.integration.spring.repository"/>

实体类

/**
 * @Author: duhongjiang
 * @Date: Created in 2018/7/13
 */

@Table(value = "time_series_data")
public class TradeDate implements Serializable {

   private static final long serialVersionUID =1L;
    @PrimaryKey(value="item_id")
    private UUID itemId;
    @Column(value="trading_date")
    private LocalDate tradingDataDate;
    @Column(value="stock_code")
    private String stockCode;
    @Column(value="item_value1")
    private Double itemValue1;
    @Column(value="item_value2")
    private Double itemValue2;
    @Column(value="item_value3")
    private Double itemValue3;

    public TradeDate(){

    }

    public TradeDate(String stockCode, Double itemValue1, Double itemValue2, Double itemValue3) {
        this.stockCode = stockCode;
        this.itemValue1 = itemValue1;
        this.itemValue2 = itemValue2;
        this.itemValue3 = itemValue3;
    }

    public TradeDate(UUID itemId, LocalDate tradingDataDate, String stockCode, Double itemValue1, Double itemValue2, Double itemValue3) {
        this.itemId = itemId;
        this.tradingDataDate = tradingDataDate;
        this.stockCode = stockCode;
        this.itemValue1 = itemValue1;
        this.itemValue2 = itemValue2;
        this.itemValue3 = itemValue3;
    }

    public UUID getItemId() {
        return itemId;
    }

    public void setItemId(UUID itemId) {
        this.itemId = itemId;
    }

    public LocalDate getTradingDataDate() {
        return tradingDataDate;
    }

    public void setTradingDataDate(LocalDate tradingDataDate) {
        this.tradingDataDate = tradingDataDate;
    }

    public String getStockCode() {
        return stockCode;
    }

    public void setStockCode(String stockCode) {
        this.stockCode = stockCode;
    }

    public Double getItemValue1() {
        return itemValue1;
    }

    public void setItemValue1(Double itemValue1) {
        this.itemValue1 = itemValue1;
    }

    public Double getItemValue2() {
        return itemValue2;
    }

    public void setItemValue2(Double itemValue2) {
        this.itemValue2 = itemValue2;
    }

    public Double getItemValue3() {
        return itemValue3;
    }

    public void setItemValue3(Double itemValue3) {
        this.itemValue3 = itemValue3;
    }
}

映射类:读取csv数据文件映射到实体类 fieldSetMapper

public class TradeDateFieldSetMapper implements FieldSetMapper<TradeDate> {
    @Override
    public TradeDate mapFieldSet(FieldSet fieldSet) throws BindException {
        TradeDate tradeDate =new TradeDate();
        tradeDate.setTradingDataDate(LocalDate.fromMillisSinceEpoch(System.currentTimeMillis()));
        tradeDate.setItemId(UUID.randomUUID());
        tradeDate.setStockCode(fieldSet.readString("stockCode"));
        tradeDate.setItemValue1(fieldSet.readDouble("itemValue1"));
        tradeDate.setItemValue2(fieldSet.readDouble("itemValue2"));
        tradeDate.setItemValue3(fieldSet.readDouble("itemValue3"));
        return tradeDate;
    }
}

public class TradeDateCassandraWriter implements ItemWriter<TradeDate> {

    @Autowired
    TradeDateRepository tradeDateRepository;

    @Override
    public void write(List<? extends TradeDate> list) throws Exception {

        for(TradeDate tradeDate:list){

            tradeDateRepository.save(tradeDate);

        }
    }
}

public class TradingDateProcessor implements ItemProcessor<TradeDate,TradeDate> {

    @Override
    public TradeDate process(TradeDate tradeDate) throws Exception {
        return tradeDate;
    }
}
public interface TradeDateRepository extends CrudRepository<TradeDate,String> {

}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值