关闭

文本文件导入导出校验工具[原创]

775人阅读 评论(0) 收藏 举报

package com.chen;

import java.io.*;
import java.util.*;
import java.text.SimpleDateFormat;

/**
因为客户导入的数据经常有一些错误,为了自动检查文件而开发
20070920
 parm:1.文件所在路径 2.分隔符号 3.大小限制
 java f:/123 -1 300
 判断每个文件的行分隔符号,换行是否成比例
如果汉字中有半个汉字而造成跳行,不可见字符等,或者有繁体字造成多出分隔符,可查出来

此工具可配合另外一个小程序(按行查询每行分隔数是否一致)一起使用。
*/
public class chektxtbyte {
  public static void main(String[] args) {
    System.out.println(
        "//////////////////////////////////////////////////////////////////////////");
    System.out.println("Txt文本校验工具(Java)v1.01/r/n异常一:行数据不完整。校验行分隔和列分隔是否成比例。/r/n" +
                       "       /r/nchengg0769 2007-09-20");
    System.out.println("//////////////////////////////////////////////////////////////////////////");

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm");
    Date dates;
    dates = new Date();
    long timebegin = dates.getTime();
    System.out.println("日期:" + sdf.format(dates));
    System.out.println();

    if (args.length < 1) {
      System.out.println("请输入文件名或者文件夹名称!");
      return;
    }

    byte[] SPLITERCOL = new byte[2]; //列分隔符号
    SPLITERCOL[0] = '|';
    SPLITERCOL[1] = '|';

    byte[] SPLITERROW = new byte[2];
    SPLITERROW[0] = 13;
    SPLITERROW[1] = 10;

    long FileLengthMax = 300;  //500M限制

    if (args.length >=2){
     if (args[1].equalsIgnoreCase("-1"));
     {
        SPLITERCOL[0] = ',';
        SPLITERCOL[1] = '/0';
     }
     if (args[1].equalsIgnoreCase( "-2"))
     {
        SPLITERCOL[0] = ';';
        SPLITERCOL[1] = '/0';
     }
     if (args[1].equalsIgnoreCase( "-3"))
     {
        SPLITERCOL[0] = '|';
        SPLITERCOL[1] = '/0';
     }
     if (args[1].equalsIgnoreCase( "-4"))
     {
        SPLITERCOL[0] = '|';
        SPLITERCOL[1] = '|';
     }
   }

    if (args.length ==3){
      FileLengthMax = Long.parseLong(args[2]);
    }

    File[] filelist;

    try
    {
        File f = new File(args[0]);
        if (f.isDirectory())
        {
          filelist = f.listFiles(new txtFilenameFilter("txt"));
          System.out.println("文件清单: -----------------------begin----------------------------");
          printFileNameArray(filelist); //Out print file name
          System.out.println("文件清单: -----------------------end------------------------------");
          System.out.println();

          FileInputStream fis;
          String LineString; //单行数据
          String LastLineString; //上行数据,便于比较

          long QtyRows =0;
          long QtyComma = 0;  //标准分隔符数量
          int SleepIdle=0;
          byte[] ReadByte = new byte[1];
          int readLen=0;
          boolean PreColSpiliter;  //首次比较等于列的第一个分隔符
          boolean PreRowSpiliter;  //首次比较等于行的第一个分隔符

          for (int i = 0; i < filelist.length; i++) {
            if (filelist[i].length() > 1000000 * FileLengthMax)
            {
              System.out.println("体积超过限制的:" + FileLengthMax + "M,程序不检查,进入下一个!");
              continue;
            }
            Thread.sleep(1000);
            fis = new FileInputStream(filelist[i]);

            SleepIdle = 0;
            QtyRows = 0;
            QtyComma = 0;
            PreColSpiliter = false;
            PreRowSpiliter = false;

            System.out.println(filelist[i].getName() + " -----------------begin--------------");
            while ((readLen = fis.read(ReadByte,0,1)) > 0){
              //System.out.println(ReadByte[0]);
              if (ReadByte[0] !=SPLITERCOL[0] && ReadByte[0] !=SPLITERCOL[1] && ReadByte[0] !=SPLITERROW[0] && ReadByte[0] !=SPLITERROW[1]) {
                PreColSpiliter = false;
                PreRowSpiliter = false;
                continue;
              }
              //匹配列分隔第一个
              if (ReadByte[0] == SPLITERCOL[0] && !PreColSpiliter && !PreRowSpiliter){
                if (SPLITERCOL[1] == '/0') {
                    QtyComma++;
                  }
                  else {
                    if (PreColSpiliter) {
                      QtyComma++;
                      PreColSpiliter = false; //复位
                    }
                    else {
                      PreColSpiliter = true; //置位
                    }
                  }
              }
              else if(ReadByte[0] == SPLITERCOL[1] && PreColSpiliter && !PreRowSpiliter){
                QtyComma++;
                PreColSpiliter = false; //复位
              }
              else if(ReadByte[0] == SPLITERROW[0] && !PreRowSpiliter &&!PreColSpiliter){
                if (SPLITERROW[1] == '/0') {
                    QtyRows++;
                  }
                  else {
                    if (PreColSpiliter) {
                      QtyRows++;
                      PreRowSpiliter = false; //复位
                    }
                    else {
                      PreRowSpiliter = true; //置位
                    }
                  }
              }
              else if(ReadByte[0] == SPLITERROW[1] && PreRowSpiliter && !PreColSpiliter){
                QtyRows++;
                PreRowSpiliter = false; //复位
              }

              if (SleepIdle++ == 100000) {
                Thread.sleep(100);
                SleepIdle = 0;
              }
            }
            fis.close();

            if (QtyRows ==0 ||QtyComma==0){
              System.out.println("       行分隔符数量: " + QtyRows +
                                 "  列分隔数: " + QtyComma + "/n       异常: 行分隔或列分隔符号数量为0");
            }
            else{
              if (QtyComma % QtyRows == 0) {
                System.out.println("       行分隔符数量: " + QtyRows +
                                   "  列分隔数: " + QtyComma + "/n       结果OK");
              }
              else {
                System.out.println("       行分隔符数量: " + QtyRows +
                                   "  列分隔数: " + QtyComma +
                                   "/n       结果--比例不匹配!!!");
              }
            }
            System.out.println(filelist[i].getName() + " ----------------end-----------------");
          }
        }
        long timeend=new Date().getTime();
        System.out.println();
        System.out.println();
        System.out.println("工作耗时:"+(timeend - timebegin)/1000 + "秒");
        System.gc();

    }catch(Exception e)
    {
        e.printStackTrace();
    }
  }
  static void printFileNameArray(File[] filelist)
  {
      for(int i=0;i<filelist.length;i++)
      {
          System.out.println(filelist[i].getName());
      }
  }
}

class txtFilenameFilter implements FilenameFilter
{
   String ext;

   txtFilenameFilter(String ext){
      this.ext= "." +ext;
    }

    public boolean accept(File dir,String name)
    {
      return name.endsWith(ext);
    }

}

 

 

0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:1006330次
    • 积分:13644
    • 等级:
    • 排名:第922名
    • 原创:287篇
    • 转载:212篇
    • 译文:0篇
    • 评论:359条
    最新评论