背景
lcms系统中,零件来源于Pbom,通过读取远程文件夹下的BKM_MATERIAL_20160523023514.TXT文件,得到零件主数据(17w+)。
接口定义:
BKM_MATERIAL_20160523023514.TXT内容如下:
0A6409905H,取力器支架,GETRIEBETRAEGER,1000,上汽大众 SVW,L4H,Tiguan LAH零件,PCK,CKD,9999,未作材料组定义的LC零,PC,只,N
0AF300040T,变速器总成,GETRIEBE,1000,上汽大众 SVW,L4Q,Octavia 暂不用件,P4H,许永钰,125A,”发动机变速器,轴承,合”,PC,只,N
…
解析字符串
一行字符串中因为包含引号,需要把双引号内的数据算作整体,这样就不能直接通过逗号来分割字符串字段作为结果。
字符串lines:
02T300053M,”1,4L 手动链条发动机的变速箱”,”1,4L 手动链条发动机的变速箱”,1000,上汽大众 SVW,L4K,New Polo暂不用件,P4H,许永钰,125A,”发动机变速器,轴承,合”,PC,只,”M,N”
方法一
定义Line 对象,包含row1index,row2index,row3index和value 工4个属性
定义3个
List<Line>
分别为list、list2、list3先按照[,”]分组, 把lines分成4段,把没有逗号的存入list,有逗号的存入list2
再遍历list2,按照[“]分组,如果以逗号开头,则存入list3,否则存入list
最后 以[,]分组,如果不为空,则存入list
最后得到list中就包含了这14个字段啦,然而顺序是乱掉的,所以还要按照row1index,row2index,row3index升序排序
- 这样每次存入listX的时候,都需要重新构造Line对象
- Line.java
/**
* Line.java
* Created at 2016-3-12
* Created by mazan
* Copyright (C) 2016 SHANGHAI VOLKSWAGEN, All rights reserved.
*/
package com.mz.algorithm.str;
import org.apache.commons.lang.builder.CompareToBuilder;
public class Line implements Comparable<Line>{
private int row1index;
private int row2index;
private int row3index;
private String value;
public Line() {
};
public Line(String value,int row1index) {
super();
this.row1index = row1index;
this.value = value;
}
public Line(String value,int row1index, int row2index) {
super();
this.row1index = row1index;
this.row2index = row2index;
this.value = value;
}
public Line(String value,int row1index, int row2index, int row3index) {
super();
this.row1index = row1index;
this.row2index = row2index;
this.row3index = row3index;
this.value = value;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public int getRow1index() {
return row1index;
}
public void setRow1index(int row1index) {
this.row1index = row1index;
}
public int getRow2index() {
return row2index;
}
public void setRow2index(int row2index) {
this.row2index = row2index;
}
public int getRow3index() {
return row3index;
}
public void setRow3index(int row3index) {
this.row3index = row3index;
}
@Override
public String toString() {
return "Line [row1index=" + row1index +
", row2index=" + row2index +
", row3index=" + row3index +
", value=" + value + "]";
}
/**
* 按照row1index,row2index,row3index排序
* commons-lang-2.6.jar
*/
@Override
public int compareTo(Line o) {
return new CompareToBuilder()
.append(row1index,o.row1index)
.append(row2index,o.row2index)
.append(row3index,o.row3index)
.toComparison();
}
}
- LineSplit.java
/**
* LineSplit.java
* Created at 2016-3-12
* Created by mazan
* Copyright (C) 2016 SHANGHAI VOLKSWAGEN, All rights reserved.
*/
package com.mz.algorithm.str;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class LineSplit {
/**
* 给定一行字符串,以逗号分隔,但 其中有引号,需要把引号内容作为一个整体
*
* 02T300053M,"1,4L 手动链条发动机的变速箱","1,4L 手动链条发动机的变速箱",1000,上汽大众 SVW,L4K,New Polo暂不用件,P4H,许永钰,125A,"发动机变速器,轴承,合",PC,只,"M,N"";
* 先按照[,"]分组
* 再以["]分组
* 最后 以[,]分组
*
* <p>Description: TODO</p>
* @param args
*/
public static void main(String[] args) {
String lines = "02T300053M,\"1,4L 手动链条发动机的变速箱\",\"1,4L 手动链条发动机的变速箱\"," +
"1000,上汽大众 SVW,L4K,New Polo暂不用件,P4H,许永钰,125A,\"发动机变速器,轴承,合\",PC,只,\"M,N\"";
Line(lines);
}
public static void Line(String lines){
List<Line> list = new ArrayList<Line>();
List<Line> list2 = new ArrayList<Line>();
List<Line> list3 = new ArrayList<Line>();
long t1 = System.currentTimeMillis();
//以[,"]分组
String[] row1 = lines.split(",\"", 0);
for(int i =0 ;i< row1.length;i++){
System.out.println("row1["+i+"]:"+row1[i]);
Line l1 = new Line(row1[i],i);
if(noDouHao(row1[i])){
list.add(l1);
}
else {
list2.add(l1);
}
}
System.out.println("===============");
//再次筛选
for(Line line:list2){
//以(")分组
String[] row2 = line.getValue().split("\"");
for(int i =0 ;i< row2.length;i++){
System.out.println("row2["+i+"]:"+row2[i]);
//
Line l2 = new Line(row2[i],line.getRow1index(),i);
if(row2[i].startsWith(",")){
list3.add(l2);
}
else {
list.add(l2);
}
}
}
System.out.println("=========================");
//========再次分组(最后一次)
for(Line line:list3){
//以(,)分组
String[] row3 = line.getValue().split(",");
for(int i =0 ;i< row3.length;i++){
System.out.println("row3["+i+"]:"+row3[i]);
Line l3 = new Line(row3[i],line.getRow1index(),line.getRow2index(),i);
if(!"".equals(row3[i])){
list.add(l3);
}
}
}
System.out.println("=============初次分组后的list========");
printLineList(list);
//的确是分成了14个,但是 顺序变了 TT~,所以要排序
System.out.println("=============sort list========");
Line[] lineArr = new Line[list.size()];
list.toArray(lineArr);
//排序
Arrays.sort(lineArr);
long t2 = System.currentTimeMillis();
long t = t2-t1;
System.out.println("耗时:"+t);
System.out.println("=============sort 后的list========");
printLineList(lineArr);
}
//=======================================================//
private static void printLineList(List<Line> list){
for(int i = 0; i<list.size();i++){
System.out.println(list.get(i));
}
}
private static void printLineList(Object[] list){
System.out.println("line[].length="+list.length);
for(int i = 0; i<list.length;i++){
System.out.println(list[i]);
}
}
private static boolean noDouHao(String str) {
if(str.indexOf(",") == -1){
return true;
}
return false;
}
}
方法二
- 定义一个Index类,包含i, start, end 共3个属性,初始均为0
- 定义一个prop字符串数组,存放lines按照逗号分隔后的数组
- 定义一个方法String mergeName(Index index, String[] prop),用来处理prop,并按照顺序返回解析到的零件字段
先取得 prop[index.i++],定义为p
- 如果不以双引号开头,则返回p
- 如果以双引号开头,且以引号结尾,同样返回p
- 如果以引号开头,不以引号结尾:
- 以index.i 开始,到prop.length截止,i++循环判断
- 当下一个字段不是以引号结尾,则加在当前p之后,i++继续判断
- 当下一个字段是以引号结尾,则加在当前p之后,i++,跳出循环
继续执行mergeName方法,得到下一个字段
- Index.java
/**
* Index.java
* Created at 2016-3-12
* Created by mazan
* Copyright (C) 2016 SHANGHAI VOLKSWAGEN, All rights reserved.
*/
package com.mz.algorithm.str;
public class Index {
/**
* i
*/
public int i = 0;
/**
* start
*/
public int start = 0;
/**
* end
*/
public int end = 0;
}
- LineSplit2.java
/**
* LineSplit2.java
* Created at 2016-3-12
* Created by mazan
* Copyright (C) 2016 SHANGHAI VOLKSWAGEN, All rights reserved.
*/
package com.mz.algorithm.str;
import java.util.ArrayList;
import java.util.List;
public class LineSplit2 {
/**
* 引号
*/
private String seq = "\"";
/**
* 逗号
*/
private String comma = ",";
/**
* <p>Description: 解析零件主数据</p>
* 02T300053M,"1,4L 手动链条发动机的变速箱","1,4L 手动链条发动机的变速箱",1000,上汽大众 SVW,L4K,New Polo暂不用件,P4H,许永钰,125A,"发动机变速器,轴承,合",PC,只,"M,N"
* @param args
*/
public static void main(String[] args) {
String lines = "02T300053M,\"1,4L 手动链条发动机的变速箱\",\"1,4L 手动链条发动机的变速箱\"," +
"1000,上汽大众 SVW,L4K,New Polo暂不用件,P4H,许永钰,125A,\"发动机变速器,轴承,合\",PC,只,\"M,N\"";
System.out.println(lines);
long t1 = System.currentTimeMillis();
List<String> list = new LineSplit2().getPartInfo(lines);
long t2 = System.currentTimeMillis();
long t = t2-t1;
System.out.println("耗时:"+t);
printLineList(list);
}
/**
*
* <p>Description: 解析一行文本</p>
* @param line 行
* @return 零件
*/
private List<String> getPartInfo(String line) {
Index index = null;
index = new Index();
String[] prop = null;
prop = line.split(",");
List<String> p = null;
p = new ArrayList<String>();
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop));
p.add(mergeName(index, prop)); //第14次需要做Y/N判断
return p;
}
/**
*
* <p>Description: 取得属性值</p>
* @param index Index
* @param prop 一行数据
* @return 属性值
*/
private String mergeName(Index index, String[] prop) {
String p = prop[index.i++];
if (p.startsWith(this.seq) && !(p.startsWith(this.seq) && p.endsWith(this.seq))) {
for (int i = index.i; i < prop.length; i++) {
if (prop[i].endsWith(this.seq)) {
p = p + this.comma + prop[i];
index.i++;
break;
} else {
p = p + this.comma + prop[i];
index.i++;
}
}
}
return p;
}
private static void printLineList(List<String> list){
for(int i = 0; i<list.size();i++){
System.out.println(list.get(i));
}
}
}