kettle默认只能解压zip格式的文件,比如linux里的tar、gz等格式的文件需要用到java代码来解压
在java代码里引入需要的包,要返回的变量
java代码如下
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
String result11 ="001";
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
if (first) {
first = false;
}
Object[] r = getRow();
if (r == null) {
setOutputDone();
return false;
}
r = createOutputRow(r, data.outputRowMeta.size());
String gzFilePath = "D:\\kettle_space\\c_source_datas\\2019\\03\\25\\book.txt.gz";
File gzFile = new File(gzFilePath);
if (gzFile.exists()) {
try {
FileInputStream fin = new FileInputStream(gzFile);// 建立gzip压缩文件输入流
GZIPInputStream gzin = new GZIPInputStream(fin);// 建立gzip解压工作流
String ouputfile = gzFilePath.substring(0, gzFilePath.lastIndexOf('.'));// 截掉.gz
if (ouputfile.endsWith(".tar")) {
ouputfile = ouputfile.substring(0, ouputfile.lastIndexOf('.'));// 截掉.tar
}
FileOutputStream fout = new FileOutputStream(ouputfile);// 建立解压文件输出流
int num;
byte[] b = new byte[1024];
while ((num = gzin.read(b, 0, b.length)) != -1) {
fout.write(b, 0, num);
}
fout.close();
gzin.close();
fin.close();
result11 = ouputfile;
} catch (Exception ex) {
ex.printStackTrace();
}
}
get(Fields.Out, "result11").setValue(r, result11);
// Send the row on to the next step.
putRow(data.outputRowMeta, r);
return true;
}
最后返回解压后的路径