我有一个csv的文件要处理,格式如下吧:
48674530,"Greentech Out, Limited",5200-000-04-13-1,"20/F, Euro Tntre, 21 eux Rd CentralHK","https://www.icris.cr.gov.hk/csci/cnc_comp.do"
最终要处理成:
48674530,
"Greentech Out, Limited"
5200-000-04-13-1
"20/F, Euro Tntre, 21 eux Rd CentralHK"
"https://www.icris.cr.gov.hk/csci/cnc_comp.do"
public class Test {
public static void main(String[] args) throws ParseException, Exception {
BufferedReader reader = new BufferedReader(new FileReader(new File("D:\\My Files\\Desktop\\bug#9236\\test1.csv")));
while(reader.read() != -1){
String line = reader.readLine();
String[] result = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
for (String str : result) {
System.out.println(str);
}
/*
List list = parseLine(line, true);
for(int i = 0; i < list.size(); i++){
System.out.println(list.get(i));
}
*/
}
}
private static List parseLine(String src, boolean isNeedTrim) throws IOException {
List line = new LinkedList();
src = src.trim();
if (src.length() == 0)
return line;
StringBuffer stringbuffer = new StringBuffer();
boolean beginWithQuote = false;
for (int i = 0; i < src.length(); i++) {
char ch = src.charAt(i);
if (ch == '\"') {
if (beginWithQuote) {
i++;
if (i >= src.length()) {
if (isNeedTrim)
line.add(stringbuffer.toString().trim());
else
line.add(stringbuffer.toString());
stringbuffer = new StringBuffer();
} else {
ch = src.charAt(i);
if (ch == '\"') {
stringbuffer.append(ch);
} else if (ch == ',') {
if (isNeedTrim)
line.add(stringbuffer.toString().trim());
else
line.add(stringbuffer.toString());
stringbuffer = new StringBuffer();
beginWithQuote = false;
} else {
for (int k = i; k < src.length(); k++) {
ch = src.charAt(k);
if (ch == ',') {
break;
} else {
stringbuffer.append(ch);
i++;
}
}
if (isNeedTrim)
line.add(stringbuffer.toString().trim());
else
line.add(stringbuffer.toString());
stringbuffer = new StringBuffer();
beginWithQuote = false;
continue;
}
}
} else if (stringbuffer.length() == 0) {
beginWithQuote = true;
} else {
for (int k = i; k < src.length(); k++) {
ch = src.charAt(k);
if (ch == ',') {
break;
} else {
stringbuffer.append(ch);
i++;
}
}
if (isNeedTrim)
line.add(stringbuffer.toString().trim());
else
line.add(stringbuffer.toString());
stringbuffer = new StringBuffer();
beginWithQuote = false;
continue;
}
} else if (ch == ',') {
if (beginWithQuote) {
stringbuffer.append(ch);
} else {
if (isNeedTrim)
line.add(stringbuffer.toString().trim());
else
line.add(stringbuffer.toString());
stringbuffer = new StringBuffer();
beginWithQuote = false;
}
} else {
stringbuffer.append(ch);
}
}
if (stringbuffer.length() != 0) {
{
if (isNeedTrim)
line.add(stringbuffer.toString().trim());
else
line.add(stringbuffer.toString());
}
}
return line;
}
}
public class CSVParser {
private static Pattern LINE_PATTERN = Pattern.compile("(\"[^\"]*(\"{2})*[^\"]*\")*[^,]*,");
public static String[] parseLine(String line) {
return parseLine(line, true);
}
public static String[] parseLine(String line, boolean trimEmptyCell) {
String[] retVal = null;
if ((line != null) && (line.length() > 0)) {
String str;
ArrayList cells = new ArrayList();
Matcher mCells = LINE_PATTERN.matcher(line + ",");
while (mCells.find()) {
str = mCells.group();
str = str.replaceAll("(?sm)\"?([^\"]*(\"{2}[^\"]*)*[^\"]*)\"?.*,", "$1");
str = str.replaceAll("(?sm)(\"(\"))", "$2");
cells.add(str);
}
boolean allEmpty = true;
retVal = (String[]) cells.toArray(new String[0]);
for (int i = 0; i < retVal.length; i++) {
if (trimEmptyCell)
retVal[i] = retVal[i].trim();
if (retVal[i].length() > 0)
allEmpty = false;
}
if (allEmpty)
retVal = null;
}
return retVal;
}
}