CSV文件读取案例
分割文件读取
以下示例说明了如何读取具有实际场景的FlatFile。这个特定的批处理作业从以下文件中读取足球运动员:
ID,lastName,firstName,position,birthYear,debutYear
AbduKa00,Abdul-Jabbar,Karim,rb,1974,1996,
AbduRa00,Abdullah,Rabih,rb,1975,1999,
AberWa00,Abercrombie,Walter,rb,1959,1982,
AbraDa00,Abramowicz,Danny,wr,1945,1967,
AdamBo00,Adams,Bob,te,1946,1969,
AdamCh00,Adams,Charlie,wr,1979,2003
该文件的内容 Map 到以下Player域对象:
package com.awei.springbatchawei.vo;
import java.io.Serializable;
/**
* @author Awei
* @date 2024/7/14 19:41
*/
public class Player implements Serializable {
private String ID;
private String lastName;
private String firstName;
private String position;
private int birthYear;
private int debutYear;
public String toString() {
return "PLAYER:ID=" + ID + ",Last Name=" + lastName +
",First Name=" + firstName + ",Position=" + position +
",Birth Year=" + birthYear + ",DebutYear=" +
debutYear;
}
public String getID() {
return ID;
}
public void setID(String ID) {
this.ID = ID;
}
public String getLastName() {
return lastName;
}
public void setLastName(String lastName) {
this.lastName = lastName;
}
public String getFirstName() {
return firstName;
}
public void setFirstName(String firstName) {
this.firstName = firstName;
}
public String getPosition() {
return position;
}
public void setPosition(String position) {
this.position = position;
}
public int getBirthYear() {
return birthYear;
}
public void setBirthYear(int birthYear) {
this.birthYear = birthYear;
}
public int getDebutYear() {
return debutYear;
}
public void setDebutYear(int debutYear) {
this.debutYear = debutYear;
}
}
要将FieldSetMap 到Player对象,需要定义一个返回玩家的FieldSetMapper,如以下示例所示:
package com.awei.springbatchawei.mapper;
import com.awei.springbatchawei.vo.Player;
import org.springframework.batch.item.file.mapping.FieldSetMapper;
import org.springframework.batch.item.file.transform.FieldSet;
/**
* @author Awei
* @date 2024/7/14 19:42
*/
public class PlayerFieldSetMapper implements FieldSetMapper<Player> {
public Player mapFieldSet(FieldSet fieldSet) {
if (fieldSet == null) {
return null;
}
Player player = new Player();
player.setID(fieldSet.readString(0));
player.setLastName(fieldSet.readString(1));
player.setFirstName(fieldSet.readString(2));
player.setPosition(fieldSet.readString(3));
player.setBirthYear(fieldSet.readInt(4));
player.setDebutYear(fieldSet.readInt(5));
return player;
}
}
然后可以通过正确构造FlatFileItemReader并调用read来读取文件,如以下示例所示:
String outputResource = "D:\\闲置\\lings\\分割内容.csv";
@Bean
public FlatFileItemReader<Player> flatFileItemReader() {
FlatFileItemReader<Player> itemReader = new FlatFileItemReader<Player>();
//读取资源
itemReader.setResource(new FileSystemResource(outputResource));
DefaultLineMapper<Player> lineMapper = new DefaultLineMapper<Player>();
//以分割符读取
lineMapper.setLineTokenizer(new DelimitedLineTokenizer());
//读取结果集映射文件
lineMapper.setFieldSetMapper(new PlayerFieldSetMapper());
itemReader.setLineMapper(lineMapper);
//跳过首行
itemReader.setLinesToSkip(1);
itemReader.open(new ExecutionContext());
return itemReader;
}
@Test
public void fieldItemReader() throws Exception {
FlatFileItemReader<Player> reader = springBatchConfig.flatFileItemReader();
Player player = null;
while ((player = reader.read()) != null) {
System.out.println(player);
}
reader.close();
}
运行结果
按名称 Map 字段
DelimitedLineTokenizer和FixedLengthTokenizer都允许有一项附加功能,其功能与 JDBC ResultSet类似。字段的名称可以被注入到这些LineTokenizer实现中,以提高 Map 函数的可读性。首先,将平面文件中所有字段的列名注入令牌生成器,如以下示例所示:
tokenizer.setNames(new String[] {"ID", "lastName","firstName","position","birthYear","debutYear"});
FieldSetMapper可以使用以下信息:
public class PlayerMapper implements FieldSetMapper<Player> {
public Player mapFieldSet(FieldSet fs) {
if(fs == null){
return null;
}
Player player = new Player();
player.setID(fs.readString("ID"));
player.setLastName(fs.readString("lastName"));
player.setFirstName(fs.readString("firstName"));
player.setPosition(fs.readString("position"));
player.setDebutYear(fs.readInt("debutYear"));
player.setBirthYear(fs.readInt("birthYear"));
return player;
}
}
固定长度读取
到目前为止,仅详细讨论了定界文件。但是,它们仅代表文件读取的一半。许多使用平面文件的组织都使用固定长度格式。固定长度文件示例如下:
UK21341EAH4121131.11customer1
UK21341EAH4221232.11customer2
UK21341EAH4321333.11customer3
UK21341EAH4421434.11customer4
UK21341EAH4521535.11customer5
虽然这 Watch 起来像一个大字段,但实际上代表了 4 个不同的字段:
ISIN:所 Order 商品的唯一标识符-12 个字符长。
数量:所 Order 商品的数量-3 个字符长。
价格:商品价格-5 个字符长。
Client:Order 商品的 Client 的 ID-9 个字符长。
配置FixedLengthLineTokenizer时,必须以范围的形式提供每个长度,如以下示例所示:
String resource = "D:\\闲置\\lings\\固定长度.csv";
@Bean
public FlatFileItemReader<Watch> flatFileItemReaderFixedLength() {
FlatFileItemReader<Watch> itemReader = new FlatFileItemReader<Watch>();
//读取资源
itemReader.setResource(new FileSystemResource(resource));
DefaultLineMapper<Watch> lineMapper = new DefaultLineMapper<Watch>();
//以固定长度读取
FixedLengthTokenizer tokenizer = new FixedLengthTokenizer();
tokenizer.setNames("isin", "quantity", "price", "customer");
tokenizer.setColumns(new Range(1,12),
new Range(13,15),
new Range(16,20),
new Range(21,29));
lineMapper.setLineTokenizer(tokenizer);
//读取结果集映射文件
lineMapper.setFieldSetMapper(new WatchFieldSetMapper());
itemReader.setLineMapper(lineMapper);
//itemReader.setLinesToSkip(1);
itemReader.open(new ExecutionContext());
return itemReader;
}
接受实体
package com.awei.springbatchawei.vo;
import java.math.BigDecimal;
/**
* @author Awei
* @date 2024/7/14 22:25
*/
public class Watch {
private String isin;
private int quantity;
private BigDecimal price;
private String customer;
//getter,setter
@Override
public String toString() {
return "Watch{" +
"isin='" + isin + '\'' +
", quantity=" + quantity +
", price=" + price +
", customer='" + customer + '\'' +
'}';
}
}
结果集映射到实体
package com.awei.springbatchawei.mapper;
import com.awei.springbatchawei.vo.Watch;
import org.springframework.batch.item.file.mapping.FieldSetMapper;
import org.springframework.batch.item.file.transform.FieldSet;
/**
* @author Awei
* @date 2024/7/14 19:42
*/
public class WatchFieldSetMapper implements FieldSetMapper<Watch> {
public Watch mapFieldSet(FieldSet fieldSet) {
if (fieldSet == null) {
return null;
}
Watch watch = new Watch();
watch.setIsin(fieldSet.readString(0));
watch.setQuantity(fieldSet.readInt(1));
watch.setPrice(fieldSet.readBigDecimal(2));
watch.setCustomer(fieldSet.readString(3));
return watch;
}
}
运行测试
@Test
public void flatFileItemReaderFixedLength() throws Exception {
FlatFileItemReader<Watch> reader = springBatchConfig.flatFileItemReaderFixedLength();
Watch watch = null;
while ((watch = reader.read()) != null) {
System.out.println(watch);
}
reader.close();
}
csv文件写入案例
分隔文件写入示例
最基本的平面文件格式是其中所有字段都由定界符分隔的格式。这可以使用DelimitedLineAggregator完成。
writer
String getOutputResource = "D:\\闲置\\lings\\watch.csv";
//分隔符写入
@Bean
public FlatFileItemWriter<Watch> delimiterItemWriter() throws Exception {
BeanWrapperFieldExtractor<Watch> fieldExtractor = new BeanWrapperFieldExtractor<>();
fieldExtractor.setNames(new String[] {"isin", "quantity", "price", "customer"});
fieldExtractor.afterPropertiesSet();
DelimitedLineAggregator<Watch> lineAggregator = new DelimitedLineAggregator<>();
lineAggregator.setDelimiter(",");
lineAggregator.setFieldExtractor(fieldExtractor);
FlatFileItemWriter<Watch> delimiterItemWriter = new FlatFileItemWriter<>();
delimiterItemWriter.setLineAggregator(lineAggregator);
delimiterItemWriter.setResource(new FileSystemResource(getOutputResource));
delimiterItemWriter.setEncoding("UTF-8");
delimiterItemWriter.setAppendAllowed(true);
delimiterItemWriter.setLineSeparator("\n");
return delimiterItemWriter;
}
@Autowired
private WatchDao watchDao;
@Test
void contextLoads() throws Exception {
FlatFileItemWriter<Watch> delimiterItemWriter = springBatchConfig.delimiterItemWriter();
delimiterItemWriter.open(new ExecutionContext());
List<Watch> watchList = watchDao.selectAll();
Chunk<Watch> watches = new Chunk<>(watchList);
delimiterItemWriter.write(watches);
delimiterItemWriter.close();
}
数据库原始数据
运行查看生成文件
固定长度写入案例
writer
//固定长度写入
@Bean
public FlatFileItemWriter<Watch> fieldItemWriter() throws Exception {
BeanWrapperFieldExtractor<Watch> fieldExtractor = new BeanWrapperFieldExtractor<>();
fieldExtractor.setNames(new String[] {"isin", "quantity","price","customer"});
fieldExtractor.afterPropertiesSet();
FormatterLineAggregator<Watch> lineAggregator = new FormatterLineAggregator<>();
lineAggregator.setFormat("%-12s%-3s%-5s%-9s");
lineAggregator.setFieldExtractor(fieldExtractor);
FlatFileItemWriter<Watch> fieldItemWriter = new FlatFileItemWriter<>();
fieldItemWriter.setLineAggregator(lineAggregator);
fieldItemWriter.setResource(new FileSystemResource(getOutputResource));
fieldItemWriter.setEncoding("UTF-8");
fieldItemWriter.setAppendAllowed(true);
return fieldItemWriter;
}
@Test
void fieldItemWriterTest() throws Exception {
FlatFileItemWriter<Watch> fieldItemWriter = springBatchConfig.fieldItemWriter();
fieldItemWriter.open(new ExecutionContext());
List<Watch> watchList = watchDao.selectAll();
Chunk<Watch> watches = new Chunk<>(watchList);
fieldItemWriter.write(watches);
fieldItemWriter.close();
}
运行查看数据
觉得数据太密,其实可以加宽每个字段的长度,如+2
lineAggregator.setFormat("%-14s%-5s%-7s%-11s");