源码下载地址:http://www.rayfile.com/files/1f6d6517-eed9-11dd-8bce-0019d11a795f/
说明:参照Delphi2009的相关类,写了TEncoding类及继承类。重写了UTF16、UTF8、UTF32在Delphi2007下的字符处理方式,现在能进行这些格式的读写了。当然对于Delphi2007,读取UNICODE格式文字显示的时候有可能丢失字符。
{* |<PRE>
================================================================================
* 软件名称:FcpEditor
* 单元名称:uEncoding
* 单元作者:Senfore
* 备 注:UNICODE读写支持
* 开发平台:Windows Xp Sp3 + Delphi 2007/Delphi2009
* 兼容测试:
* 本 地 化:使Delphi2007能使用Delphi2009的Encoding方式处理文本
* 单元标识:$Id: uEncoding.pas,v 1.1 2009/01/28
* 修改记录:
* 2009.01.28 V1.1
* 让Delphi2007环境下支持UTF32格式读写
* 2009.01.06 V1.0
* 创建单元,实现功能
================================================================================
|</PRE>}
unit uEncoding;
{$DEFINE UCS4_ENCODING_SUPPORT}
interface
uses Classes,Windows,SysUtils;
{$IFNDEF UNICODE}
function InterlockedCompareExchangePointer(var Destination: Pointer; Exchange: Pointer; Comperand: Pointer): Pointer;
{$ENDIF}
{$IFNDEF UNICODE}
type
UnicodeString=WideString;
RawByteString=AnsiString;
{$ENDIF}
{$IFNDEF UNICODE}
type
TCharArray = array of Char;
EEncodingError = class(Exception);
TEncoding = class
strict private
class var
FASCIIEncoding: TEncoding;
FBigEndianUnicodeEncoding: TEncoding;
FDefaultEncoding: TEncoding;
FUnicodeEncoding: TEncoding;
FUTF7Encoding: TEncoding;
FUTF8Encoding: TEncoding;
class function GetASCII: TEncoding; static;
class function GetBigEndianUnicode: TEncoding; static;
class function GetDefault: TEncoding; static;
class function GetUnicode: TEncoding; static;
class function GetUTF7: TEncoding; static;
class function GetUTF8: TEncoding; static;
strict protected
FIsSingleByte: Boolean;
FMaxCharSize: Integer;
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; virtual; abstract;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; virtual; abstract;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; virtual; abstract;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; virtual; abstract;
public
class function Convert(Source, Destination: TEncoding; Bytes: TBytes): TBytes; overload;
class function Convert(Source, Destination: TEncoding; Bytes: TBytes; StartIndex, Count: Integer): TBytes; overload;
class procedure FreeEncodings;
class function IsStandardEncoding(AEncoding: TEncoding): Boolean; static;
class function GetBufferEncoding(const Buffer: TBytes; var AEncoding: TEncoding): Integer; static;
function GetByteCount(const Chars: TCharArray): Integer; overload;
function GetByteCount(const Chars: TCharArray; CharIndex, CharCount: Integer): Integer; overload;
function GetByteCount(const S: string): Integer; overload;
function GetByteCount(const S: string; CharIndex, CharCount: Integer): Integer; overload;
function GetBytes(const Chars: TCharArray): TBytes; overload;
function GetBytes(const Chars: TCharArray; CharIndex, CharCount: Integer;
var Bytes: TBytes; ByteIndex: Integer): Integer; overload;
function GetBytes(const S: string): TBytes; overload;
function GetBytes(const S: string; CharIndex, CharCount: Integer;
var Bytes: TBytes; ByteIndex: Integer): Integer; overload;
function GetCharCount(const Bytes: TBytes): Integer; overload;
function GetCharCount(const Bytes: TBytes; ByteIndex, ByteCount: Integer): Integer; overload;
function GetChars(const Bytes: TBytes): TCharArray; overload;
function GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer): TCharArray; overload;
function GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer;
var Chars: TCharArray; CharIndex: Integer): Integer; overload;
class function GetEncoding(CodePage: Integer): TEncoding; static;
function GetMaxByteCount(CharCount: Integer): Integer; virtual; abstract;
function GetMaxCharCount(ByteCount: Integer): Integer; virtual; abstract;
function GetPreamble: TBytes; virtual; abstract;
function GetString(const Bytes: TBytes): string; overload;
function GetString(const Bytes: TBytes; ByteIndex, ByteCount: Integer): string; overload;
class property ASCII: TEncoding read GetASCII;
class property BigEndianUnicode: TEncoding read GetBigEndianUnicode;
class property Default: TEncoding read GetDefault;
property IsSingleByte: Boolean read FIsSingleByte;
class property Unicode: TEncoding read GetUnicode;
class property UTF7: TEncoding read GetUTF7;
class property UTF8: TEncoding read GetUTF8;
end;
TMBCSEncoding = class(TEncoding)
private
FCodePage: Cardinal;
FMBToWCharFlags: Cardinal;
FWCharToMBFlags: Cardinal;
strict protected
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
constructor Create; overload; virtual;
constructor Create(CodePage: Integer); overload; virtual;
constructor Create(CodePage, MBToWCharFlags, WCharToMBFlags: Integer); overload; virtual;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
function GetPreamble: TBytes; override;
end;
TUTF7Encoding = class(TMBCSEncoding)
strict protected
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
constructor Create; override;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
end;
TUTF8Encoding = class(TUTF7Encoding)
public
constructor Create; override;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
function GetPreamble: TBytes; override;
end;
TUnicodeEncoding = class(TEncoding)
strict protected
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
constructor Create; virtual;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
function GetPreamble: TBytes; override;
end;
TBigEndianUnicodeEncoding = class(TUnicodeEncoding)
strict protected
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
function GetPreamble: TBytes; override;
end;
TStringsEx=class(TStrings)
private
public
procedure LoadFromFile(const FileName: string); overload; virtual;
procedure LoadFromFile(const FileName: string; Encoding: TEncoding); overload; virtual;
procedure LoadFromStream(Stream: TStream); overload; virtual;
procedure LoadFromStream(Stream: TStream; Encoding: TEncoding); overload; virtual;
procedure SaveToFile(const FileName: string); overload; virtual;
procedure SaveToFile(const FileName: string; Encoding: TEncoding); overload; virtual;
procedure SaveToStream(Stream: TStream); overload; virtual;
procedure SaveToStream(Stream: TStream; Encoding: TEncoding); overload; virtual;
end;
{ TStringList class }
TStringListEx = class;
TStringListExSortCompare = function(List: TStringListEx; Index1, Index2: Integer): Integer;
TStringListEx = class(TStringsEx)
private
FList: PStringItemList;
FCount: Integer;
FCapacity: Integer;
FSorted: Boolean;
FDuplicates: TDuplicates;
FCaseSensitive: Boolean;
FOnChange: TNotifyEvent;
FOnChanging: TNotifyEvent;
procedure ExchangeItems(Index1, Index2: Integer);
procedure Grow;
procedure QuickSort(L, R: Integer; SCompare: TStringListExSortCompare);
procedure SetSorted(Value: Boolean);
procedure SetCaseSensitive(const Value: Boolean);
protected
procedure Changed; virtual;
procedure Changing; virtual;
function Get(Index: Integer): string; override;
function GetCapacity: Integer; override;
function GetCount: Integer; override;
function GetObject(Index: Integer): TObject; override;
procedure Put(Index: Integer; const S: string); override;
procedure PutObject(Index: Integer; AObject: TObject); override;
procedure SetCapacity(NewCapacity: Integer); override;
procedure SetUpdateState(Updating: Boolean); override;
function CompareStrings(const S1, S2: string): Integer; override;
procedure InsertItem(Index: Integer; const S: string; AObject: TObject); virtual;
public
destructor Destroy; override;
function Add(const S: string): Integer; override;
function AddObject(const S: string; AObject: TObject): Integer; override;
procedure Clear; override;
procedure Delete(Index: Integer); override;
procedure Exchange(Index1, Index2: Integer); override;
function Find(const S: string; var Index: Integer): Boolean; virtual;
function IndexOf(const S: string): Integer; override;
procedure Insert(Index: Integer; const S: string); override;
procedure InsertObject(Index: Integer; const S: string;
AObject: TObject); override;
procedure Sort; virtual;
procedure CustomSort(Compare: TStringListExSortCompare); virtual;
property Duplicates: TDuplicates read FDuplicates write FDuplicates;
property Sorted: Boolean read FSorted write SetSorted;
property CaseSensitive: Boolean read FCaseSensitive write SetCaseSensitive;
property OnChange: TNotifyEvent read FOnChange write FOnChange;
property OnChanging: TNotifyEvent read FOnChanging write FOnChanging;
end;
{$ENDIF}
{$IFDEF UCS4_ENCODING_SUPPORT}
type
TUCS4Encoding = class(TEncoding)
strict protected
FBOMEnable: Boolean;
FBigEndian: Boolean;
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
constructor Create; overload; virtual;
constructor Create(BigEndian: Boolean); overload; virtual;
constructor Create(BigEndian, BOMEnable: Boolean); overload; virtual;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
function GetPreamble: TBytes; override;
end;
{$ENDIF UCS4_ENCODING_SUPPORT}
{CheckBom}
function HasUTF16LEBOM(S: TStream): Boolean; overload;
function HasUTF16LEBOM(S: AnsiString): Boolean; overload;
function HasUTF16BEBOM(S: TStream): Boolean; overload;
function HasUTF16BEBOM(S: AnsiString): Boolean; overload;
function HasUTF8BOM(S: TStream): Boolean; overload;
function HasUTF8BOM(S: AnsiString): Boolean; overload;
function HasUTF32LEBOM(S: TStream): Boolean; overload;
function HasUTF32LEBOM(S: AnsiString): Boolean; overload;
function HasUTF32BEBOM(S: TStream): Boolean; overload;
function HasUTF32BEBOM(S: AnsiString): Boolean; overload;
{Encoding}
function GetEncodeFromStream(Stream: TStream;var CodePage:LongInt): Boolean;
function GetCodePageFromFile(const FileName:TFileName;var DefCodePage:LongInt):Boolean;
function GetEncoding(CodePage: Integer): TEncoding;
function GetEnCodingFromFile(const FileName:TFileName;DefCodePage:LongInt=0):TEncoding;
{File<->Str}
function StrToFile(const FileName, S: String): Boolean;overload;
function StrToFile(const FileName, S: String;Encoding:TEncoding): Boolean;overload;
procedure StrSaveToStream(const S:string;Stream: TStream; Encoding: TEncoding);
function FileToStr(const FileName: string):string;overload;
function FileToStr(const FileName: string; Encoding: TEncoding):string;overload;
function FileToString(const FileName: string): RawByteString;
function StrLoadFromStream(Stream: TStream; Encoding: TEncoding):string;
{Unicode}
function UnicodeEncode(Str: string; CodePage: integer): WideString;
function UnicodeDecode(Str: WideString; CodePage: integer): string;
function Gb2Big5(Str: string): string;
function Big52Gb(Str: string): string;
function GBCHT2CHS(const S: string): string;
function GBCHS2CHT(const S: string): string;