写的一个工具,用来查找Unicode文本中的中文字符。
FindCnchar.java
import java.io.*;
/**
* Application: FindCnchar
* Author: Steven
* Data: Monday, May 28, 2007
* Time: 15:05:42
* Purpose:
Find Chinese Characters in text.
* Usage:
Java FindCnchar filepath
* Description:
In fact, this appliction just pick out those lines containing not-ASCII characters now, of course, including chinese characters.
The source text file SHALL be encoded in "Unicode"(i.e. UTF-16 ).
Good luck to find your interested chinese information in English System.
*/
public class FindCnchar
{
public static void main( String[] args ) throws Exception
{
// argument check
if ( args.length < 1 )
{
println( "not enough arguments!" );
printUsage();
}
if (!( new File( args[0] ).exists() ) )
{
println("assigned file doesnot exist!");
}
// parameters
File srcfile = new File( args[0] );
File destfile = new File( srcfile.getAbsolutePath() + ".txt" );
BufferedReader fin = new BufferedReader( new InputStreamReader( new FileInputStream( srcfile ), "UTF-16" ) );
//PrintWriter fout = new PrintWriter( new FileWriter( destfile ) );
PrintWriter fout = new PrintWriter( new OutputStreamWriter( new FileOutputStream( destfile ), "UTF-16" ) );
// line counter
int line_no = 1;
int cur_line = -1;
// search
String line = null;
while( ( line = fin.readLine() ) != null )
{
for( int i=0; i<line.length(); ++i )
{
if ( (int)line.charAt(i) > 128 )
{
fout.println( line_no + ":" + line );
break;
}
}
++line_no;
}
fin.close();
fout.close();
// report
println( "Finished search." );
println( destfile.getAbsolutePath() );
}
/**
* Is a chinese character.
*/
// public boolean isCnchar( char ch )
// {
// }
public static void println( Object o )
{
System.out.println( o );
}
public static void print( Object o )
{
System.out.print( o );
}
public static void printUsage()
{
println("Usage:");
println("FindCnchar file_path");
println("");
}
}/*END OF CLASS FindCnchar*/