package com.chen;
import java.io.*;
import java.util.*;
import java.text.SimpleDateFormat;
/**
因为客户导入的数据经常有一些错误,为了自动检查文件而开发
20070920
parm:1.文件所在路径 2.分隔符号 3.大小限制
java f:/123 -1 300
判断每个文件的行分隔符号,换行是否成比例
如果汉字中有半个汉字而造成跳行,不可见字符等,或者有繁体字造成多出分隔符,可查出来
此工具可配合另外一个小程序(按行查询每行分隔数是否一致)一起使用。
*/
public class chektxtbyte {
public static void main(String[] args) {
System.out.println(
"//");
System.out.println("Txt文本校验工具(Java)v1.01/r/n异常一:行数据不完整。校验行分隔和列分隔是否成比例。/r/n" +
" /r/nchengg0769 2007-09-20");
System.out.println("//");
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm");
Date dates;
dates = new Date();
long timebegin = dates.getTime();
System.out.println("日期:" + sdf.format(dates));
System.out.println();
if (args.length < 1) {
System.out.println("请输入文件名或者文件夹名称!");
return;
}
byte[] SPLITERCOL = new byte[2]; //列分隔符号
SPLITERCOL[0] = '|';
SPLITERCOL[1] = '|';
byte[] SPLITERROW = new byte[2];
SPLITERROW[0] = 13;
SPLITERROW[1] = 10;
long FileLengthMax = 300; //500M限制
if (args.length >=2){
if (args[1].equalsIgnoreCase("-1"));
{
SPLITERCOL[0] = ',';
SPLITERCOL[1] = '/0';
}
if (args[1].equalsIgnoreCase( "-2"))
{
SPLITERCOL[0] = ';';
SPLITERCOL[1] = '/0';
}
if (args[1].equalsIgnoreCase( "-3"))
{
SPLITERCOL[0] = '|';
SPLITERCOL[1] = '/0';
}
if (args[1].equalsIgnoreCase( "-4"))
{
SPLITERCOL[0] = '|';
SPLITERCOL[1] = '|';
}
}
if (args.length ==3){
FileLengthMax = Long.parseLong(args[2]);
}
File[] filelist;
try
{
File f = new File(args[0]);
if (f.isDirectory())
{
filelist = f.listFiles(new txtFilenameFilter("txt"));
System.out.println("文件清单: -----------------------begin----------------------------");
printFileNameArray(filelist); //Out print file name
System.out.println("文件清单: -----------------------end------------------------------");
System.out.println();
FileInputStream fis;
String LineString; //单行数据
String LastLineString; //上行数据,便于比较
long QtyRows =0;
long QtyComma = 0; //标准分隔符数量
int SleepIdle=0;
byte[] ReadByte = new byte[1];
int readLen=0;
boolean PreColSpiliter; //首次比较等于列的第一个分隔符
boolean PreRowSpiliter; //首次比较等于行的第一个分隔符
for (int i = 0; i < filelist.length; i++) {
if (filelist[i].length() > 1000000 * FileLengthMax)
{
System.out.println("体积超过限制的:" + FileLengthMax + "M,程序不检查,进入下一个!");
continue;
}
Thread.sleep(1000);
fis = new FileInputStream(filelist[i]);
SleepIdle = 0;
QtyRows = 0;
QtyComma = 0;
PreColSpiliter = false;
PreRowSpiliter = false;
System.out.println(filelist[i].getName() + " -----------------begin--------------");
while ((readLen = fis.read(ReadByte,0,1)) > 0){
//System.out.println(ReadByte[0]);
if (ReadByte[0] !=SPLITERCOL[0] && ReadByte[0] !=SPLITERCOL[1] && ReadByte[0] !=SPLITERROW[0] && ReadByte[0] !=SPLITERROW[1]) {
PreColSpiliter = false;
PreRowSpiliter = false;
continue;
}
//匹配列分隔第一个
if (ReadByte[0] == SPLITERCOL[0] && !PreColSpiliter && !PreRowSpiliter){
if (SPLITERCOL[1] == '/0') {
QtyComma++;
}
else {
if (PreColSpiliter) {
QtyComma++;
PreColSpiliter = false; //复位
}
else {
PreColSpiliter = true; //置位
}
}
}
else if(ReadByte[0] == SPLITERCOL[1] && PreColSpiliter && !PreRowSpiliter){
QtyComma++;
PreColSpiliter = false; //复位
}
else if(ReadByte[0] == SPLITERROW[0] && !PreRowSpiliter &&!PreColSpiliter){
if (SPLITERROW[1] == '/0') {
QtyRows++;
}
else {
if (PreColSpiliter) {
QtyRows++;
PreRowSpiliter = false; //复位
}
else {
PreRowSpiliter = true; //置位
}
}
}
else if(ReadByte[0] == SPLITERROW[1] && PreRowSpiliter && !PreColSpiliter){
QtyRows++;
PreRowSpiliter = false; //复位
}
if (SleepIdle++ == 100000) {
Thread.sleep(100);
SleepIdle = 0;
}
}
fis.close();
if (QtyRows ==0 ||QtyComma==0){
System.out.println(" 行分隔符数量: " + QtyRows +
" 列分隔数: " + QtyComma + "/n 异常: 行分隔或列分隔符号数量为0");
}
else{
if (QtyComma % QtyRows == 0) {
System.out.println(" 行分隔符数量: " + QtyRows +
" 列分隔数: " + QtyComma + "/n 结果OK");
}
else {
System.out.println(" 行分隔符数量: " + QtyRows +
" 列分隔数: " + QtyComma +
"/n 结果--比例不匹配!!!");
}
}
System.out.println(filelist[i].getName() + " ----------------end-----------------");
}
}
long timeend=new Date().getTime();
System.out.println();
System.out.println();
System.out.println("工作耗时:"+(timeend - timebegin)/1000 + "秒");
System.gc();
}catch(Exception e)
{
e.printStackTrace();
}
}
static void printFileNameArray(File[] filelist)
{
for(int i=0;i<filelist.length;i++)
{
System.out.println(filelist[i].getName());
}
}
}
class txtFilenameFilter implements FilenameFilter
{
String ext;
txtFilenameFilter(String ext){
this.ext= "." +ext;
}
public boolean accept(File dir,String name)
{
return name.endsWith(ext);
}
}