因需求,需要将每个独立的txt文件写入一个txt中
例如1.txt是一个文本,包含1列数据,2.txt是一个文本,包含一列数据,需要将这两列合并成一个txt
用java写个脚本让服务器自己执行吧!
package com.hemin.mergetxt;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
public class MergeMain {
public static void main(String[] args) throws IOException {
String filepath = "i://point//";
File path = new File(filepath);
String [] filelist = path.list();
//+++++++++++++++++++++++++++++++++++//
FileWriter fwdir = new FileWriter("i://output//dir.txt");
for (String str : filelist) {
fwdir.write(str);
fwdir.write("\r\n");
fwdir.flush();
}
fwdir.close();
//+++++++++++++++++++++++++++++++++++//
int i = getTextLines(filepath+filelist[1]);
FileWriter fwtxt = new FileWriter("i://output//merge.txt");
//按行读取内容
for(int j = 1;j<=i;j++){
StringBuilder sb = new StringBuilder();
sb.append(j);
for (String str : filelist) {
String cur_path = filepath+str;
//getTextValue(cur_path , j);
sb.append("\t"+getTextValue(cur_path , j));
}
fwtxt.write(sb.toString());
fwtxt.write("\r\n");
fwtxt.flush();
}
fwtxt.close();
}
//读取一共有多少行
public static int getTextLines(String path) throws IOException {
FileReader fr = new FileReader(path);
BufferedReader br = new BufferedReader(fr);
int x = 0;
while(br.readLine() != null) {
x++;
}
return x;
}
//读取制定行的内容
public static String getTextValue(String path , int len) throws IOException {
FileReader fr = new FileReader(path);
BufferedReader br = new BufferedReader(fr);
int x = 0;
String a= null;
String out = null;
while((a=(br.readLine())) != null) {
x++;
if(x==len){
out = a;
}
}
return out;
}
}
上午将这个代码放在每个文本50w行有5000个文本,无奈其速度之慢,于是想到使用python中的pandas库,其中的dataframe觉得很是适合处理这个事情,于是中午撸出了以下的代码,仅30多行,速度却很是满意。
代码如下:
#-*-coding:utf_8-*-
'''
Created on 2016年12月7日
@author: hemin
'''
import os
import pandas as pd
import numpy as np
def getPath(givepath):
files = os.listdir(givepath)
return files
def makeCsv(files , g_path):
data = pd.DataFrame({'tag': np.arange(2,535681)})
#data['tag'] = pd.read_table('i://output//merge.txt' )
for fname in files:
data[fname] = pd.read_csv(g_path+fname)
return data
if __name__ == '__main__':
g_path = "i://point//"
outputpath = 'i://output//merge.csv'
outdir = 'i://output//dir.txt'
files = getPath(g_path)
fl = open(outdir , 'w')
for i in files:
fl.write(i)
fl.write('\n')
fl.close()
data = makeCsv(files, g_path)
data.to_csv(outputpath ,index=False , header=False)