spring batch批处理分区读取数据库的例子

最新推荐文章于 2024-05-28 16:29:27 发布

刘利新西安

最新推荐文章于 2024-05-28 16:29:27 发布

阅读量1.5w

点赞数 5

分类专栏： Spring

Spring 专栏收录该内容

44 篇文章 0 订阅

订阅专栏

在spring batch批处理框架中，分区partitioning是指用多线程读取一定范围内的数据。比如，你的数据库表中有100条记录，有一个主键id，取值从1到100，你想处理这100条记录.通常，使用1个线程，从1开始，一直到100.处理过程大概需要10分钟。

如果使用分区的话，我们可以启动10个线程，每个线程处理10条记录（按照id的范围）。这样，只用1分钟就能处理完。

为了实现分区技术，你必须懂得所输入数据的结构，这样你才能恰当地划定数据范围。

本文演示如何创建一个分区的工作，该工作有10个线程，每个线程都按照提供的id范围读取数据库.

create table users(
id int(6) not null auto_increment,                  
use_login varchar(50),           
user_pass varchar(50),
age int(6),
primary key(id)
)default charset=utf8;

首先创建一个实现了Partitioner接口的类，将分区范围partioning rang放到ExecutionContext中去。之后，在batch-job.xml文件
中声明同样的fromId和toId.

package com.mkyong.partition;

import java.util.HashMap;
import java.util.Map;

import org.springframework.batch.core.partition.support.Partitioner;
import org.springframework.batch.item.ExecutionContext;

public class RangePartitioner implements Partitioner {

	@Override
	public Map<String, ExecutionContext> partition(int gridSize) {

		Map<String, ExecutionContext> result = new HashMap<String, ExecutionContext>();

		int range = 10;
		int fromId = 1;
		int toId = range;

		for (int i = 1; i <= gridSize; i++) {
			ExecutionContext value = new ExecutionContext();

			System.out.println("\nStarting : Thread" + i);
			System.out.println("fromId : " + fromId);
			System.out.println("toId : " + toId);

			value.putInt("fromId", fromId);
			value.putInt("toId", toId);

			// give each thread a name
			value.putString("name", "Thread" + i);

			result.put("partition" + i, value);

			fromId = toId + 1;
			toId += range;

		}

		return result;
	}

}

读一下batch job XML文件，大多数条目都是不言自明的。只需要指出以下几点：
1.对于分区者partioner来讲,grid-size=number of threads
2.对于pagingItemReader bean来讲，即jdbc读取者的例子，#{stepExecutionContext[fromId,toId]}的值将通过ExecutionContext注入到rangePartioner中。
3.对于itemProcessor bean来讲，#{stepExecutionContext[name]}的值将由ExecutionContext注入到rangePartitioner中。

4.对于writers,每个线程都将输出记录到不同的csv文件中，文件名的格式是users.processed[fromId]}-[toId].csv

job-partioner.xml

<!-- spring batch core settings -->
	<import resource="../config/context.xml" />
	
	<!-- database settings -->
	<import resource="../config/database.xml" />

	<!-- partitioner job -->
	<job id="partitionJob" xmlns="http://www.springframework.org/schema/batch">
	    
	    <!-- master step, 10 threads (grid-size)  -->
		<step id="masterStep">
			<partition step="slave" partitioner="rangePartitioner">
				<handler grid-size="10" task-executor="taskExecutor" />
			</partition>
		</step>
		
	</job>

	<!-- Jobs to run -->
	<step id="slave" xmlns="http://www.springframework.org/schema/batch">
		<tasklet>
			<chunk reader="pagingItemReader" writer="flatFileItemWriter"
				processor="itemProcessor" commit-interval="1" />
		</tasklet>
	</step>

	<bean id="rangePartitioner" class="com.mkyong.partition.RangePartitioner" />

	<bean id="taskExecutor" class="org.springframework.core.task.SimpleAsyncTaskExecutor" />

	<bean id="itemProcessor" class="com.mkyong.processor.UserProcessor"
		scope="step">
		<property name="threadName" value="#{stepExecutionContext[name]}" />
	</bean>

	<bean id="pagingItemReader"
		class="org.springframework.batch.item.database.JdbcPagingItemReader"
		scope="step">
		<property name="dataSource" ref="dataSource" />
		<property name="queryProvider">
			<bean
				class="org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean">
				<property name="dataSource" ref="dataSource" />
				<property name="selectClause" value="select id, user_login, user_pass, age" />
				<property name="fromClause" value="from users" />
				<property name="whereClause" value="where id >= :fromId and id <= :toId" />
				<property name="sortKey" value="id" />
			</bean>
		</property>
		<!-- Inject via the ExecutionContext in rangePartitioner -->
		<property name="parameterValues">
			<map>
				<entry key="fromId" value="#{stepExecutionContext[fromId]}" />
				<entry key="toId" value="#{stepExecutionContext[toId]}" />
			</map>
		</property>
		<property name="pageSize" value="10" />
		<property name="rowMapper">
			<bean class="com.mkyong.UserRowMapper" />
		</property>
	</bean>

	<!-- csv file writer -->
	<bean id="flatFileItemWriter" class="org.springframework.batch.item.file.FlatFileItemWriter"
		scope="step" >
		<property name="resource"
			value="file:csv/outputs/users.processed#{stepExecutionContext[fromId]}-#{stepExecutionContext[toId]}.csv" />
		<property name="appendAllowed" value="false" />
		<property name="lineAggregator">
			<bean
				class="org.springframework.batch.item.file.transform.DelimitedLineAggregator">
				<property name="delimiter" value="," />
				<property name="fieldExtractor">
					<bean
						class="org.springframework.batch.item.file.transform.BeanWrapperFieldExtractor">
						<property name="names" value="id, username, password, age" />
					</bean>
				</property>
			</bean>
		</property>
	</bean>

	<bean id="mongoItemReader" class="org.springframework.batch.item.data.MongoItemReader"
		scope="step">
		<property name="template" ref="mongoTemplate" />
		<property name="targetType" value="com.mkyong.User" />
		<property name="query"
			value="{ 
		'id':{$gt:#{stepExecutionContext[fromId]}, $lte:#{stepExecutionContext[toId]} 
		} }" />
		<property name="sort">
			<util:map id="sort">
				<entry key="id" value="" />
			</util:map>
		</property>
	</bean>

</beans>

条目处理类用来输出正在处理的条目和当前正在运行的线程的名字。

import com.mkyong.User;

@Component("itemProcessor")
@Scope(value = "step")
public class UserProcessor implements ItemProcessor<User, User> {

	@Value("#{stepExecutionContext[name]}")
	private String threadName;

	@Override
	public User process(User item) throws Exception {

		System.out.println(threadName + " processing : " + item.getId() + " : " + item.getUsername());

		return item;
	}

	public String getThreadName() {
		return threadName;
	}

	public void setThreadName(String threadName) {
		this.threadName = threadName;
	}

}

加载所有的东西，跑一下试试。10个线程将被启动，加工各自范围的数据。

public class App {

	public static void main(String[] args) {

		App obj = new App();
		obj.run();

	}

	private void run() {

		String[] springConfig = { "spring/batch/jobs/job-partitioner.xml" };

		ApplicationContext context = new ClassPathXmlApplicationContext(springConfig);

		JobLauncher jobLauncher = (JobLauncher) context.getBean("jobLauncher");
		Job job = (Job) context.getBean("partitionJob");

		try {

			//JobParameters param = new JobParametersBuilder().addString("age", "20").toJobParameters();

			JobExecution execution = jobLauncher.run(job, new JobParameters());
			System.out.println("Exit Status : " + execution.getStatus());
			System.out.println("Exit Status : " + execution.getAllFailureExceptions());

		} catch (Exception e) {
			e.printStackTrace();
		}

		System.out.println("Done");

	}

}

console的输出：

Starting : Thread1
fromId : 1
toId : 10

Starting : Thread2
fromId : 11
toId : 20

Starting : Thread3
fromId : 21
toId : 30

译者注：在跑这个程序之前，要先用下面程序给数据库灌数据：

package com.mkyong.data;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Random;

public class InsertData {

	private static final String DB_DRIVER = "com.mysql.jdbc.Driver";
	private static final String DB_CONNECTION = "jdbc:mysql://localhost:3306/test";
	private static final String DB_USER = "root";
	private static final String DB_PASSWORD = "";

	public static void main(String[] argv) {

		try {

			insertDataIntoUserTable();

		} catch (Exception e) {

			System.out.println(e.getMessage());

		}

	}

	private static void insertDataIntoUserTable() throws Exception {

		Connection dbConnection = null;
		Statement statement = null;

		try {

			dbConnection = getDBConnection();
			statement = dbConnection.createStatement();
			
			Random generator = new Random();

			for (int i = 1; i <= 100; i++) {

				String insertTableSQL = "INSERT INTO USERS (ID, USER_LOGIN, USER_PASS, AGE) VALUES (':id',':name',':pass',':age')";

				insertTableSQL = insertTableSQL.replaceAll(":id", String.valueOf(i));
				insertTableSQL = insertTableSQL.replaceAll(":name", "user_" + i);
				insertTableSQL = insertTableSQL.replaceAll(":pass", "pass_" + i);

				//insertTableSQL = insertTableSQL.replaceAll(":age", String.valueOf(10 + (int) (Math.random() * 90)));
				insertTableSQL = insertTableSQL.replaceAll(":age", String.valueOf(generator.nextInt(100)));
				
				System.out.println(insertTableSQL);

				statement.addBatch(insertTableSQL);

			}

			statement.executeBatch();

			System.out.println("Record is inserted into USER table!");

		} catch (Exception e) {

			System.out.println(e.getMessage());

		} finally {

			if (statement != null) {
				statement.close();
			}

			if (dbConnection != null) {
				dbConnection.close();
			}

		}

	}

	private static Connection getDBConnection() {

		Connection dbConnection = null;

		try {

			Class.forName(DB_DRIVER);

		} catch (ClassNotFoundException e) {

			System.out.println(e.getMessage());

		}

		try {

			dbConnection = DriverManager.getConnection(DB_CONNECTION, DB_USER, DB_PASSWORD);
			return dbConnection;

		} catch (SQLException e) {

			System.out.println(e.getMessage());

		}

		return dbConnection;

	}

}

原文： http://www.mkyong.com/spring-batch/spring-batch-partitioning-example/

源代码：http://pan.baidu.com/share/link?shareid=3720169482&uk=3878681452
输出：

刘利新西安

关注

5
点赞
踩
17

收藏

觉得还不错? 一键收藏
2
评论
spring batch批处理分区读取数据库的例子

在spring batch批处理框架中，分区partitioning是指用多线程读取一定范围内的数据。比如，你的数据库表中有100条记录，有一个主键id，取值从1到100，你想处理这100条记录.通常，使用1个线程，从1开始，一直到100.处理过程大概需要10分钟。如果使用分区的话，我们可以启动10个线程，每个线程处理10条记录（按照id的范围）。这样，只用1分钟就能处理完。为了实现分区技
复制链接

扫一扫

专栏目录