/* Copyright (c) 2010 Xiaoyun Zhu
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
/**
* Lingoes LD2/LDF File Reader
*
* <pre>
* Lingoes Format overview:
*
* General Information:
* - Dictionary data are stored in deflate streams.
* - Index group information is stored in an index array in the LD2 file itself.
* - Numbers are using little endian byte order.
* - Definitions and xml data have UTF-8 or UTF-16LE encodings.
*
* LD2 file schema:
* - File Header
* - File Description
* - Additional Information (optional)
* - Index Group (corresponds to definitions in dictionary)
* - Deflated Dictionary Streams
* -- Index Data
* --- Offsets of definitions
* --- Offsets of translations
* --- Flags
* --- References to other translations
* -- Definitions
* -- Translations (xml)
*
* TODO: find encoding / language fields to replace auto-detect of encodings
*
* </pre>
*
* @author keke
*
*/publicclassLingoesLd2Reader {
privatestaticfinal SensitiveStringDecoder[] AVAIL_ENCODINGS = { new SensitiveStringDecoder(Charset.forName("UTF-8")),
new SensitiveStringDecoder(Charset.forName("UTF-16LE")), new SensitiveStringDecoder(Charset.forName("UTF-16BE")),
new SensitiveStringDecoder(Charset.forName("EUC-JP")) };
publicstaticvoidmain(final String[] args) throws IOException {
// download from// https://skydrive.live.com/?cid=a10100d37adc7ad3&sc=documents&id=A10100D37ADC7AD3%211172#cid=A10100D37ADC7AD3&sc=documents// String ld2File = Helper.DIR_IN_DICTS+"\\lingoes\\Prodic English-Vietnamese Business.ld2";final String ld2File = "D:\\kr.ld2";
// read lingoes ld2 into byte arrayfinal ByteBuffer dataRawBytes;
try (RandomAccessFile file = new RandomAccessFile(ld2File, "r"); final FileChannel fChannel = file.getChannel();) {
dataRawBytes = ByteBuffer.allocate((int) fChannel.size());
fChannel.read(dataRawBytes);
}
dataRawBytes.order(ByteOrder.LITTLE_ENDIAN);
dataRawBytes.rewind();
System.out.println("文件:" + ld2File);
System.out.println("类型:" + new String(dataRawBytes.array(), 0, 4, "ASCII"));
System.out.println("版本:" + dataRawBytes.getShort(0x18) + "." + dataRawBytes.getShort(0x1A));
System.out.println("ID: 0x" + Long.toHexString(dataRawBytes.getLong(0x1C)));
finalint offsetData = dataRawBytes.getInt(0x5C) + 0x60;
if (dataRawBytes.limit() > offsetData) {
System.out.println("简介地址:0x" + Integer.toHexString(offsetData));
fi