现在项目需求是使用java将BIG5繁体转GBK的繁体、我看了一下文档发现是GBK-3是繁体、找了一份源码。但是不知道怎么改,无从下手请大家帮我看看,谢谢了。
package com.big;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
public class EncodingConvertor {
//转换表所在的列表以及文档名
private static final String TABLE_FILE_DIR = "converttable";
private static final String BIG5_TO_GBKSIMP_TABLE_FILE_NAME = "big52gb.tab";
//GBK中非GB2312码的汉字编码范围(即估计需求转换的编码范围,GBK-3区和GBK-4区)
//GBK-3区,高字节范围:[0x81, 0xA0];低字节范围:[0x40, 0xFE]
private static int GBK3_HIGH_DOWN = 0x81;
private static int GBK3_HIGH_UP = 0xA0;
private static int GBK3_LOW_DOWN = 0x40;
private static int GBK3_LOW_UP = 0xFE;
//GBK-4区,高字节范围:[0xAA, 0xFE];低字节范围:[0x40, 0xA0]
/*private static int GBK4_HIGH_DOWN = 0xAA;
private static int GBK4_HIGH_UP = 0xFE;
private static int GBK4_LOW_DOWN = 0x40;
private static int GBK4_LOW_UP = 0xA0; */
//Big5编码范围,高字节:[0x81, 0xFE],低字节:[0x40, 0xFE]
private static int BIG5_HIGH_DOWN = 0x81;
private static int BIG5_HIGH_UP = 0xFE;
private static int BIG5_LOW_DOWN = 0x40;
private static int BIG5_LOW_UP = 0xFE;
//也许加载Big5->GBK转换表的Bool值(假如仅仅需求停止GBK繁体->GBK简体的转化,
//则不必加载Big5->GBK转换表;但Big5->GBK简体转换必需两个表都加载)
private boolean ifloadbig5;
//存放转换表的数组
//GBK-3区的转换表
private byte gbk3table[][][] = new byte[GBK3_HIGH_UP - GBK3_HIGH_DOWN + 1][GBK3_LOW_UP - GBK3_LOW_DOWN + 1][2];
//GBK-4区的转换表
//private byte gbk4table[][][] = new byte[GBK4_HIGH_UP - GBK4_HIGH_DOWN + 1][GBK4_LOW_UP - GBK4_LOW_DOWN + 1][2];
//Big5的转换表
private byte big5table[][][] = new byte[BIG5_HIGH_UP - BIG5_HIGH_DOWN + 1][BIG5_LOW_UP - BIG5_LOW_DOWN + 1][2];
//加载转换表的参数
private void loadConvertTables(){
try{
DataInputStream dis = new DataInputStream(new FileInputStream(TABLE_FILE_DIR + File.separator + BIG5_TO_GBKSIMP_TABLE_FILE_NAME));
int i, j;
//加载GBK-3区的转换表
for (i = GBK3_HIGH_DOWN; i <= GBK3_HIGH_UP; i ++){
for (j = GBK3_LOW_DOWN; j <= GBK3_LOW_UP; j ++){
dis.read(gbk3table[i - GBK3_HIGH_DOWN][j - GBK3_LOW_DOWN], 0, 2);
}
}
//加载GBK-4区的转换表
/*for (i = GBK4_HIGH_DOWN; i <= GBK4_HIGH_UP; i ++){
for (j = GBK4_LOW_DOWN; j <= GBK4_LOW_UP; j ++){
dis.read(gbk4table[i - GBK4_HIGH_DOWN][j - GBK4_LOW_DOWN], 0, 2);
}
} */
dis.close();
if (ifloadbig5){
dis = new DataInputStream(new FileInputStream(TABLE_FILE_DIR + File.separator + BIG5_TO_GBKSIMP_TABLE_FILE_NAME));
//加载Big5转换表
for (i = BIG5_HIGH_DOWN; i <= BIG5_HIGH_UP; i ++)
{
for (j = BIG5_LOW_DOWN; j <= BIG5_LOW_UP; j ++)
{
dis.read(big5table[i - BIG5_HIGH_DOWN][j - BIG5_LOW_DOWN], 0, 2);
}
}
dis.close();
}
else
{
big5table = null;
}
}
catch (FileNotFoundException fnfe)
{
fnfe.printStackTrace();
ifloadbig5 = false;
}
catch (IOException ioe)
{
ioe.printStackTrace();
ifloadbig5 = false;
}
}
//构造参数,加载转换表
//参数parameterifbig5指明也许加载Big5->GBK的转换表
//假如仅仅需求停止GBK繁体->GBK简体的转化,则不必加载Big5->GBK转换表;但Big5->GBK简体转换必需两个表都加载
public EncodingConvertor(boolean ifbig5){
ifloadbig5 = ifbig5;
loadConvertTables();
}
//Big5编码->GBK简体转换的外部调用接口
//参数parametertext[]为待转换的字符串的Big5编码的字节数组,textlen为字节个数
//本参数将text[]中的Big5编码的汉字转换成相应GBK编码的简体字,转换后的结果覆盖原来的数组返回
//别转换的字符按原值返回
public void big52gbk(byte text[], int textlen)
{
//假如没有加载Big5转换表,直接返回
if (!ifloadbig5){
return;
}
int i;
int high, low;
i = 0;
while (i < textlen){
if (text[i] >= 0){
i ++;
continue;
}
if ((i + 1) >= textlen){
break;
}
high = text[i];
high += 0x100;
low = text[i + 1];
if (low < 0){
low += 0x100;
}
if ((high >= BIG5_HIGH_DOWN) && (high <= BIG5_HIGH_UP) && (low >= BIG5_LOW_DOWN) && (low <= BIG5_LOW_UP)){
text[i] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][0];
text[i + 1] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][1];
}
i += 2;
}
}
public String big52gbk(byte text[])
{
//假如没有加载Big5转换表,直接返回
if (!ifloadbig5)
{
return "";
}
int i;
int high, low;
i = 0;
while (i < text.length)
{
if (text[i] >= 0)
{
i ++;
continue;
}
if ((i + 1) >= text.length)
{
break;
}
high = text[i];
high += 0x100;
low = text[i + 1];
if (low < 0)
{
low += 0x100;
}
if ((high >= BIG5_HIGH_DOWN) && (high <= BIG5_HIGH_UP) && (low >= BIG5_LOW_DOWN) && (low <= BIG5_LOW_UP))
{
text[i] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][0];
text[i + 1] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][1];
}
i += 2;
}
return new String(text,0,text.length);
}
}
我家gbk-4给注释了,因为它是转简体的。这里我不需要。但是我不知道怎么转。而且不知道怎么在程序中调用该方法。
private static final String TABLE_FILE_DIR = "converttable";
private static final String BIG5_TO_GBKSIMP_TABLE_FILE_NAME = "big52gb.tab";
我不明白这两句话怎么用,而且big52gb.tab是外置文件,可是我在网上没找到。如果大家有的话可以给我一份谢谢啦!
2010年7月02日 22:20