Spring Batch_使用多线程运行一组相同任务的JOB
主要思路:在spring batch中,一个job会完成一个任务,处理一个数据集,有时这个数据集会很大,导致运行时间很长(虽然做了各种优化,数据库访问的优化,代码的优化等等),但是我想如果把这个数据集分成几块,配置几个相同的job来完成同一个任务,每个job处理其中一个数据块。这样不是也能提高效率,节省时间吗?
那么我们就来实验一下,看看可操作性。。。
如何给给一个大的数据集分块:可以利用limit。通过limit 构造两个sql语句,通过jobParameters 动态传递给运行中的job,那么job的item reader就会读取特定sql 语句查询上来的数据,然后进行处理。。。
下面我的spring batch的配置文件:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
<
beans
xmlns
=
"http://www.springframework.org/schema/beans"
xmlns:xsi
=
"http://www.w3.org/2001/XMLSchema-instance"
xmlns:batch
=
"http://www.springframework.org/schema/batch"
xmlns:context
=
"http://www.springframework.org/schema/context"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-4.0.xsd
http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch.xsd
http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd">
<!-- 包的扫描 -->
<
context:component-scan
base-package
=
"com.lyx.batch"
/>
<
bean
id
=
"exceptionHandler"
class
=
"com.lyx.batch.ExceptionListener"
/>
<
batch:step
id
=
"abstractStep"
abstract
=
"true"
>
<
batch:listeners
>
<
batch:listener
ref
=
"exceptionHandler"
/>
</
batch:listeners
>
</
batch:step
>
<
bean
id
=
"abstractCursorReader"
abstract
=
"true"
class
=
"org.springframework.batch.item.database.JdbcCursorItemReader"
>
<
property
name
=
"dataSource"
ref
=
"dataSource"
/>
</
bean
>
<
batch:job
id
=
"addPeopleDescJob_1"
>
<
batch:step
id
=
"addDescStep_1"
parent
=
"abstractStep"
>
<
batch:tasklet
>
<
batch:chunk
reader
=
"peopleAddDescReader_1"
processor
=
"addDescProcessor"
writer
=
"addDescPeopleWriter"
commit-interval
=
"2"
/>
</
batch:tasklet
>
</
batch:step
>
</
batch:job
>
<
bean
id
=
"peopleAddDescReader_1"
parent
=
"abstractCursorReader"
scope
=
"step"
>
<
property
name
=
"sql"
value
=
"#{jobParameters['sql1']}"
/>
<
property
name
=
"rowMapper"
ref
=
"peopleRowMapper"
/>
<
property
name
=
"preparedStatementSetter"
ref
=
"preparedStatementSetter"
/>
<
property
name
=
"fetchSize"
value
=
"20"
/>
</
bean
>
<
batch:job
id
=
"addPeopleDescJob_2"
>
<
batch:step
id
=
"addDescStep_2"
parent
=
"abstractStep"
>
<
batch:tasklet
>
<
batch:chunk
reader
=
"peopleAddDescReader_2"
processor
=
"addDescProcessor"
writer
=
"addDescPeopleWriter"
commit-interval
=
"2"
/>
</
batch:tasklet
>
</
batch:step
>
</
batch:job
>
<
bean
id
=
"peopleAddDescReader_2"
parent
=
"abstractCursorReader"
scope
=
"step"
>
<
property
name
=
"sql"
value
=
"#{jobParameters['sql2']}"
/>
<
property
name
=
"rowMapper"
ref
=
"peopleRowMapper"
/>
<
property
name
=
"preparedStatementSetter"
ref
=
"preparedStatementSetter"
/>
<
property
name
=
"fetchSize"
value
=
"20"
/>
</
bean
>
<
bean
id
=
"peopleRowMapper"
class
=
"com.lyx.batch.PeopleRowMapper"
/>
<
bean
id
=
"preparedStatementSetter"
class
=
"com.lyx.batch.PeoplePreparedStatementSetter"
/>
<
bean
id
=
"addDescProcessor"
class
=
"com.lyx.batch.AddPeopleDescProcessor"
/>
<
bean
id
=
"addDescPeopleWriter"
class
=
"com.lyx.batch.AddDescPeopleWriter"
>
<
property
name
=
"dataSource"
ref
=
"dataSource"
/>
</
bean
>
<!--tomcat jdbc pool数据源配置 -->
<
bean
id
=
"dataSource"
class
=
"org.apache.tomcat.jdbc.pool.DataSource"
destroy-method
=
"close"
>
<
property
name
=
"poolProperties"
>
<
bean
class
=
"org.apache.tomcat.jdbc.pool.PoolProperties"
>
<
property
name
=
"driverClassName"
value
=
"com.mysql.jdbc.Driver"
/>
<
property
name
=
"url"
value
=
"jdbc:mysql://localhost:3306/test"
/>
<
property
name
=
"username"
value
=
"root"
/>
<
property
name
=
"password"
value
=
"034039"
/>
</
bean
>
</
property
>
</
bean
>
<!-- spring batch 配置jobRepository -->
<
batch:job-repository
id
=
"jobRepository"
data-source
=
"dataSource"
transaction-manager
=
"transactionManager"
isolation-level-for-create
=
"REPEATABLE_READ"
table-prefix
=
"BATCH_"
max-varchar-length
=
"1000"
/>
<!-- spring的事务管理器 -->
<
bean
id
=
"transactionManager"
class
=
"org.springframework.jdbc.datasource.DataSourceTransactionManager"
>
<
property
name
=
"dataSource"
ref
=
"dataSource"
/>
</
bean
>
<!-- batch luncher -->
<
bean
id
=
"jobLauncher"
class
=
"org.springframework.batch.core.launch.support.SimpleJobLauncher"
>
<
property
name
=
"jobRepository"
ref
=
"jobRepository"
/>
</
bean
>
</
beans
>
|
可以看到有两个job -addPeopleDescJob_1 和 addPeopleDescJob_2,每个job的reader 是不一样的,不一样的地方在 sql参数的不一样,是通过job parameter 动态传递进来的。。
下面是AppMain4.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
package
com.lyx.batch;
import
javax.sql.DataSource;
import
org.springframework.batch.core.ExitStatus;
import
org.springframework.batch.core.Job;
import
org.springframework.batch.core.JobExecution;
import
org.springframework.batch.core.JobParametersBuilder;
import
org.springframework.batch.core.JobParametersInvalidException;
import
org.springframework.batch.core.launch.JobLauncher;
import
org.springframework.batch.core.repository.JobExecutionAlreadyRunningException;
import
org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException;
import
org.springframework.batch.core.repository.JobRestartException;
import
org.springframework.beans.factory.annotation.Autowired;
import
org.springframework.context.ApplicationContext;
import
org.springframework.context.support.ClassPathXmlApplicationContext;
import
org.springframework.jdbc.core.JdbcTemplate;
import
org.springframework.stereotype.Component;
@Component
public
class
AppMain4 {
private
static
JdbcTemplate jdbcTemplate;
@Autowired
public
void
setDataSource(DataSource dataSource) {
jdbcTemplate =
new
JdbcTemplate(dataSource);
}
public
static
void
main(String[] args)
throws
JobExecutionAlreadyRunningException, JobRestartException,
JobInstanceAlreadyCompleteException, JobParametersInvalidException {
long
startTime = System.currentTimeMillis();
// 获取开始时间
@SuppressWarnings
(
"resource"
)
final
ApplicationContext context =
new
ClassPathXmlApplicationContext(
new
String[] {
"classpath:spring-batch4.xml"
});
final
JobLauncher launcher = (JobLauncher) context
.getBean(
"jobLauncher"
);
int
rowCount = jdbcTemplate.queryForObject(
"select count(*) from people where "
+
"first_name like '%JOHN%' or last_name like '%DOE%'"
,
Integer.
class
);
final
String sql1;
final
String sql2;
int
mid = (rowCount -
1
) >>>
1
;
if
((rowCount &
1
) ==
0
) {
// 偶数
sql1 =
"select first_name ,last_name from people where "
+
"first_name like ? or last_name like ? limit 0,"
+ mid;
sql2 =
"select first_name ,last_name from people where "
+
"first_name like ? or last_name like ? order by person_id desc limit 0,"
+ mid;
}
else
{
// 奇数
sql1 =
"select first_name ,last_name from people where "
+
"first_name like ? or last_name like ? limit 0,"
+ mid;
sql2 =
"select first_name ,last_name from people where "
+
"first_name like ? or last_name like ? order by person_id desc limit 0,"
+ (mid +
1
);
}
Thread thread_1 =
new
Thread(
new
Runnable() {
public
void
run() {
long
t1 = System.currentTimeMillis();
// 获取开始时间
// TODO Auto-generated method stub
JobParametersBuilder job1 =
new
JobParametersBuilder();
job1.addString(
"sql1"
, sql1);
Job task1 = (Job) context.getBean(
"addPeopleDescJob_1"
);
try
{
JobExecution result1 = launcher.run(task1,
job1.toJobParameters());
ExitStatus es1 = result1.getExitStatus();
if
(es1.getExitCode().equals(
ExitStatus.COMPLETED.getExitCode())) {
System.out.println(
"job1任务正常完成"
);
}
else
{
System.out.println(
"job1任务失败,exitCode="
+ es1.getExitCode());
}
}
catch
(JobExecutionAlreadyRunningException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch
(JobRestartException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch
(JobInstanceAlreadyCompleteException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch
(JobParametersInvalidException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
long
t2 = System.currentTimeMillis();
// 获取结束时间
System.out.println(Thread.currentThread().getName() +
"运行时间: "
+ (t2 - t1) +
"ms"
);
}
});
thread_1.start();
Thread thread_2 =
new
Thread(
new
Runnable() {
public
void
run() {
long
t1 = System.currentTimeMillis();
// TODO Auto-generated method stub
JobParametersBuilder job2 =
new
JobParametersBuilder();
// 设置JobParameter
job2.addString(
"sql2"
, sql2);
Job task2 = (Job) context.getBean(
"addPeopleDescJob_2"
);
try
{
JobExecution result2 = launcher.run(task2,
job2.toJobParameters());
ExitStatus es2 = result2.getExitStatus();
if
(es2.getExitCode().equals(
ExitStatus.COMPLETED.getExitCode())) {
System.out.println(
"job2任务正常完成"
);
}
else
{
System.out.println(
"job2任务失败,exitCode="
+ es2.getExitCode());
}
}
catch
(JobExecutionAlreadyRunningException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch
(JobRestartException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch
(JobInstanceAlreadyCompleteException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch
(JobParametersInvalidException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
long
t2 = System.currentTimeMillis();
// 获取结束时间
System.out.println(Thread.currentThread().getName() +
"运行时间: "
+ (t2 - t1) +
"ms"
);
}
});
thread_2.start();
long
endTime = System.currentTimeMillis();
// 获取结束时间
System.out.println(
"程序运行时间: "
+ (endTime - startTime) +
"ms"
);
}
}
|
PeoplePreparedStatementSetter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
package
com.lyx.batch;
import
java.sql.PreparedStatement;
import
java.sql.SQLException;
import
org.springframework.jdbc.core.PreparedStatementSetter;
public
class
PeoplePreparedStatementSetter
implements
PreparedStatementSetter {
public
void
setValues(PreparedStatement ps)
throws
SQLException {
// TODO Auto-generated method stub
ps.setString(
1
,
"%JOHN%"
);
ps.setString(
2
,
"%DOE%"
);
// ps.setInt(3, 1);
// ps.setInt(4, 100);
}
}
|
运行结果:
job1任务正常完成
Thread-3运行时间: 4573ms
job2任务正常完成
Thread-4运行时间: 4627ms
看到每个线程的运行时间都在4秒多。
再看一下在一个线程中运行一组Job的情况:
AppMain3.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
package
com.lyx.batch;
import
javax.sql.DataSource;
import
org.springframework.batch.core.ExitStatus;
import
org.springframework.batch.core.Job;
import
org.springframework.batch.core.JobExecution;
import
org.springframework.batch.core.JobParametersBuilder;
import
org.springframework.batch.core.JobParametersInvalidException;
import
org.springframework.batch.core.launch.JobLauncher;
import
org.springframework.batch.core.repository.JobExecutionAlreadyRunningException;
import
org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException;
import
org.springframework.batch.core.repository.JobRestartException;
import
org.springframework.beans.factory.annotation.Autowired;
import
org.springframework.context.ApplicationContext;
import
org.springframework.context.support.ClassPathXmlApplicationContext;
import
org.springframework.jdbc.core.JdbcTemplate;
import
org.springframework.stereotype.Component;
@Component
public
class
AppMain3 {
private
static
JdbcTemplate jdbcTemplate;
@Autowired
public
void
setDataSource(DataSource dataSource) {
jdbcTemplate =
new
JdbcTemplate(dataSource);
}
public
static
void
main(String[] args)
throws
JobExecutionAlreadyRunningException, JobRestartException,
JobInstanceAlreadyCompleteException, JobParametersInvalidException {
long
startTime = System.currentTimeMillis();
// 获取开始时间
@SuppressWarnings
(
"resource"
)
ApplicationContext context =
new
ClassPathXmlApplicationContext(
new
String[] {
"classpath:spring-batch4.xml"
});
JobLauncher launcher = (JobLauncher) context.getBean(
"jobLauncher"
);
int
rowCount = jdbcTemplate.queryForObject(
"select count(*) from people where "
+
"first_name like '%JOHN%' or last_name like '%DOE%'"
,
Integer.
class
);
String sql1 =
null
;
String sql2 =
null
;
int
mid = (rowCount -
1
) >>>
1
;
if
((rowCount &
1
) ==
0
) {
// 偶数
sql1 =
"select first_name ,last_name from people where "
+
"first_name like ? or last_name like ? limit 0,"
+ mid;
sql2 =
"select first_name ,last_name from people where "
+
"first_name like ? or last_name like ? order by person_id desc limit 0,"
+ mid;
}
else
{
// 奇数
sql1 =
"select first_name ,last_name from people where "
+
"first_name like ? or last_name like ? limit 0,"
+ mid;
sql2 =
"select first_name ,last_name from people where "
+
"first_name like ? or last_name like ? order by person_id desc limit 0,"
+ (mid +
1
);
}
JobParametersBuilder job1 =
new
JobParametersBuilder();
job1.addString(
"sql1"
, sql1);
Job task1 = (Job) context.getBean(
"addPeopleDescJob_1"
);
JobExecution result1 = launcher.run(task1, job1.toJobParameters());
ExitStatus es1 = result1.getExitStatus();
if
(es1.getExitCode().equals(ExitStatus.COMPLETED.getExitCode())) {
System.out.println(
"job1任务正常完成"
);
}
else
{
System.out.println(
"job1任务失败,exitCode="
+ es1.getExitCode());
}
JobParametersBuilder job2 =
new
JobParametersBuilder();
// 设置JobParameter
job2.addString(
"sql2"
, sql2);
Job task2 = (Job) context.getBean(
"addPeopleDescJob_2"
);
JobExecution result2 = launcher.run(task2, job2.toJobParameters());
ExitStatus es2 = result2.getExitStatus();
if
(es2.getExitCode().equals(ExitStatus.COMPLETED.getExitCode())) {
System.out.println(
"job2任务正常完成"
);
}
else
{
System.out.println(
"job2任务失败,exitCode="
+ es2.getExitCode());
}
long
endTime = System.currentTimeMillis();
// 获取结束时间
System.out.println(
"程序运行时间: "
+ (endTime - startTime) +
"ms"
);
}
}
|
运行结果:
job1任务正常完成
job2任务正常完成
程序运行时间: 8706ms
结果你也看到了,是不是多线程运行一组job效率更高。但是用多线程,配置一组相同的job带来的问题我没有预计到,虽然提高了效率,但可能给job的重试和重启还有job的管理带来了问题。。
==============END==============