项目需求
- 从多个文件夹中读取zip包
- 解压zip包
- 读取zip包中的数据
- 把读取的数据写入到txt文件中
Util.java
import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.*;
public class Util {
public List<String> getFileName(String path){//path为绝对路径
List<String> zipFileName = new ArrayList<String>();
File file = new File(path);
File[] files = file.listFiles();
if(files.length == 0){
System.out.println("This package is empty");
}else{
for(File file2:files){
if(file2.isDirectory()){
String name_1 = file2.getName();
File[] zipfile = file2.listFiles();
for(File zip:zipfile){
zipFileName.add(name_1+File.separator+zip.getName().replaceAll(".zip",""));
}
}
}
}
return zipFileName;
}
public List<String> getZipWithAnt(String path) throws Exception {
List<String> innerJsonFile = new ArrayList<String>();
String[] strings = new String[0];
FileInputStream fis = null;
try {
fis = new FileInputStream(path);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
CheckedInputStream check = new CheckedInputStream(fis,new Adler32());
Charset gbk = Charset.forName("gbk");
ZipInputStream zis = new ZipInputStream(check,gbk);
ZipEntry ze = zis.getNextEntry();
BufferedReader br = new BufferedReader(new InputStreamReader(zis));
char[] chars = new char[1024*1024];
while(ze != null){
if(ze.toString().endsWith(".txt")){
String line;
while((line = br.readLine())!= null){
line = line.substring(1,line.length()-1);
strings = line.split(",\\{");
for(int i = 1;i < strings.length;i++){
strings[i] = "{" + strings[i];
}
break;
}
}
ze = zis.getNextEntry();
}
br.close();
zis.close();
check.close();
fis.close();
for(String str:strings){
innerJsonFile.add(str);
}
System.out.println(path);
return innerJsonFile;
}
public List<String> getZipInnerFile(String path) {//path格式为xxx.zip
List<String> innerJsonFile = new ArrayList<String>();
ZipFile zf = null;
try {
zf = new ZipFile(path);
} catch (IOException e) {
System.out.println(e);
e.printStackTrace();
}
try {
InputStream in = null;
in = new BufferedInputStream(new FileInputStream(path));
Charset gbk = Charset.forName("GBK");
ZipInputStream zin = new ZipInputStream(in,gbk);
ZipEntry ze = new ZipEntry(path);
while((ze = zin.getNextEntry()) != null){
if(ze.toString().endsWith("txt")){
BufferedReader br = new BufferedReader(
new InputStreamReader(zf.getInputStream(ze)));
String line;
while((line = br.readLine()) != null){
line = line.substring(1,line.length()-1);
String[] strings = line.split(",\\{");
for(int i = 1;i < strings.length;i++){
strings[i] = "{" + strings[i];
}
for(String str:strings){
innerJsonFile.add(str);
}
line = br.readLine();
}
br.close();
}
else if(ze.toString().startsWith("attach")){
break;
}
}
zin.closeEntry();
return innerJsonFile;
} catch (IOException e) {
e.printStackTrace();
}finally {
System.out.println(path);
}
return null;
}
public void writeToTxt(String json,String path) throws IOException {
BufferedWriter out = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(path,true)));
out.write(json+"\r\n");
out.close();
}
}
Collect.java
import java.io.File;
import java.io.IOException;
import java.util.List;
public class Collect {
public static void main(String[] args){
String path = "/data/app/sh/notes";
Util util = new Util();
List<String> zipfilename = util.getFileName(path);
String name = "part-r-0000";//存储json的文件名
int count = 0,n = 0;
//path+zipfilename+".zip" 即为绝对路径
//读取zip包内文件
for(String file:zipfilename){
String fullFileName = path + File.separator+ file + ".zip";
List<String> innerJsonFile = null;
try {
innerJsonFile = util.getZipWithAnt(fullFileName);//json条
} catch (Exception ex) {
continue;
}
for(String json:innerJsonFile){
if(count < 10000 ){
try {
util.writeToTxt(file+"\t"+json,path+File.separator+"json"+File.separator+name+String.valueOf(n)+".txt");
} catch (IOException e) {
e.printStackTrace();
}finally {
count++;
}
}
else{
count = 0;
n++;
try {
util.writeToTxt(file+"\t"+json,path+File.separator+"json"+File.separator+name+String.valueOf(n)+".txt");
} catch (IOException e) {
e.printStackTrace();
}finally {
count++;
}
}
}
}
}
}
本次实验遇到的问题:不可在main函数上加异常,否则为系统级异常,抛出异常后程序自动停止。
注:中间遇到的其他问题在本人其他博客已写明。