public class CSVParser { private File file; private List<String[]> resultList = new ArrayList<String[]>(); private static Pattern pattern = Pattern .compile(",?\"([^\"]*(\"{2}[^\"]*\"{2}[^\"]*)*)\",?");
public static void main(String[] args) { CSVParser parser = new CSVParser("test.csv"); List<String[]> list = parser.parse(); print(list); }
public CSVParser(File file) { this.file = file; }
public CSVParser(String filePath) { file = new File(filePath); }
private static void print(List<String[]> list) { for (int i = 0; i < list.size(); i++) System.out.println(Arrays.toString(list.get(i)) + " Length: " + list.get(i).length); }
public List<String[]> parse() { try { BufferedReader br = getReader(); String line = ""; while ((line = br.readLine()) != null) { line = handleSpecial(line); String[] elements = line.split(","); revert(elements); store(elements); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return resultList; }
/** * Revert to original ',' if contains '|', precondition is that each * original Field of the line doesn't contain this char * * @param elements */ private void revert(String[] elements) { for (int i = 0; i < elements.length; i++) elements[i] = elements[i].replace("|", ","); }
/** * Handling special characters for each given line. * * @param inputLine * @return */ private String handleSpecial(String inputLine) { if (inputLine == null || inputLine.trim().length() == 0) return ""; Matcher m = getMatcher(inputLine); String after = ""; while (m.find()) { after = m.group(2); String before = after; System.out.println("Before: " + before); if (after.indexOf(",") > -1) // use '|' to replace ',' for later splitting of each field of // the line,this '|' needs to be revert to ',' after the whole line being // split after = after.replace(',', '|'); if (after.indexOf("\"\"") > -1) after = after.replace("\"\"", "\""); System.out.println("After:" + after); inputLine = inputLine.replace(before, after); }