package Test;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
public class ReadCSV_major {
public static void main(String [] args){
ReadCSV_major read = new ReadCSV_major();
read.run();
}
public void run(){
String csv = "/Users/huazhe/Desktop/项目数据/Professors.csv";
BufferedReader br = null;
String line ="";
String csvSplitBy = ",(?=([^\"]*\"[^\"]*\")*[^\"]*$)";
//where should we save
String path_file = "/Users/huazhe/Desktop/项目数据/major.txt";
PrintWriter writer_path = null;
try {
writer_path = new PrintWriter(path_file,"UTF-8");
br = new BufferedReader(new FileReader(csv));
while((line = br.readLine()) != null){
//use comma as separatpr
String [] major = line.split(csvSplitBy);
System.out.println("major"+major[5]);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
当大家进行这一操作的时候通常会遇到如下问题。
1: 想分割数据 例如:
"123","Jack jona","Computer science"
"234","Joen Dan", "Philosophy"
//当遇到这种数据的时候可直接用 逗号进行分割。
2: 分割数据如下:
"123", "jack jona", "New York, NY"
"234", "Lee Jack", "Fort myers, FL"
//当遇到这种数据的时候可以直接使用
,(?=([^\"]*\"[^\"]*\")*[^\"]*$)
//进行分割。 这种的好处是只对引号外面的逗号进行分割,对引号内的不分割。
//简单的说这种分割得出的结果是
123 jack jona New York, NY
234 Lee Jack Fort myers, FL
//简单使用逗号分割结果是
123 jack jona New York NY
234 Lee jack Fort myers FL