RSS订阅 | 匿名投稿
您的位置:网站首页 > 单元 > 正文

Delphi中一字符串处理转换单元MECSUtils V1.31

作者:admin 来源:未知 日期:2010/10/9 0:18:05 人气: 标签:

支持AnsiString/UnicodeString/WideString

mecsutils.pas
{ *********************************************************************** }
{                                                                         }
{ MECS Utilities Unit  ver1.31                                            }
{                                                                         }
{ Copyright (c) 2008,2009  Hideaki Tominaga (DEKO)                        }
{                                                                         }
{ *********************************************************************** }

unit MECSUtils;

interface

uses
  SysUtils, Graphics, Windows, Dialogs;

type
  TLeadBytes = set of AnsiChar;
  UTF8Char =  type AnsiChar;
  PUTF8Char = type PAnsiChar;
  {$IFDEF UNICODE}
  UnicodeChar = type Char;
  PUnicodeChar = type PChar;
  {$ENDIF}

  TElementType = (etSingle, etLead, etTrail);

  TEastAsianWidth = (eawNeutral, eawFullwidth, eawHalfwidth, eawWide, eawNarrow, eawAmbiguous);
  TCombiningType = (ctBase, ctCombining);

  TIndexCount =
    record
      StartIndex: Integer;
      EndIndex: Integer;
      Count: Integer;
    end;

  //  Normalization forms
  TNORM_FORM =
    (
      NormalizationOther = $00,
      NormalizationC     = $01,  // Normalization Form Canonical Composition(NFC)
      NormalizationD     = $02,  // Normalization Form Canonical Decomposition(NFD)
      NormalizationKC    = $05,  // Normalization Form Compatibility Composition(NFKC)
      NormalizationKD    = $06   // Normalization Form Compatibility Decomposition(NFKD)
    );

// for Codepage
// -----------------------------------------------------------------------------
function CodePageToFontCharset(Codepage: DWORD): TFontCharset;
function FontCharsetToCodePage(FontCharset: TFontCharset): DWORD;
function IsFarEastCharSet(CharSet: TFontCharSet): Boolean;
function IsFarEastLCID(Locale: LCID): Boolean;

// for AnsiString
// -----------------------------------------------------------------------------
{$IFDEF UNICODE}
function MecsGetCodePage(S: RawByteString): DWORD;
{$ELSE}
function MecsGetCodePage(S: AnsiString): DWORD;
{$ENDIF}

// MecsGetLeadBytes
function MecsGetLeadBytes: TLeadBytes; overload;
function MecsGetLeadBytes(Codepage: DWORD): TLeadBytes; overload;

// for UnicodeString/WideString
// -----------------------------------------------------------------------------
function MecsEastAsianWidth(U: UCS4Char): TEastAsianWidth;
function MecsIsNormalized(const AText: WideString; NormForm: TNORM_FORM): Boolean;
// MecsMappingFixJA
{$IFDEF UNICODE}
function MecsMappingFixJA(const S: UnicodeString): UnicodeString; overload;
{$ENDIF}
function MecsMappingFixJA(const S: WideString): WideString; overload;
function MecsNormalize(const Src: WideString; var Dst: WideString; NormForm: TNORM_FORM): Boolean;
{$IFNDEF UNICODE}
function StrLen(const Str: PWideChar): Cardinal;
{$ENDIF}

// for AnsiString/UnicodeString/WideString (Common)
// -----------------------------------------------------------------------------
// MecsCharToElementIndex
{$IFDEF UNICODE}
function MecsCharToElementIndex(const S: RawByteString; Index: Integer): Integer; overload;
function MecsCharToElementIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer; overload;
function MecsCharToElementIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ELSE}
function MecsCharToElementIndex(const S: AnsiString; Index: Integer): Integer; overload;
function MecsCharToElementIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsCharToElementIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsCharToElementIndexCount
{$IFDEF UNICODE}
function MecsCharToElementIndexCount(const S: RawByteString; Index, Count: Integer): TIndexCount; overload;
function MecsCharToElementIndexCount(const S: RawByteString; Index, Count: Integer; Codepage: DWORD): TIndexCount; overload;
function MecsCharToElementIndexCount(const S: UnicodeString; Index, Count: Integer): TIndexCount; overload;
{$ELSE}
function MecsCharToElementIndexCount(const S: AnsiString; Index, Count: Integer): TIndexCount; overload;
function MecsCharToElementIndexCount(const S: AnsiString; Index, Count: Integer; Codepage: DWORD): TIndexCount; overload;
{$ENDIF}
function MecsCharToElementIndexCount(const S: WideString; Index, Count: Integer): TIndexCount; overload;
// MecsCharToElementLen
{$IFDEF UNICODE}
function MecsCharToElementLen(const S: RawByteString; MaxLen: Integer): Integer; overload;
function MecsCharToElementLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer; overload;
function MecsCharToElementLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
{$ELSE}
function MecsCharToElementLen(const S: AnsiString; MaxLen: Integer): Integer; overload;
function MecsCharToElementLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsCharToElementLen(const S: WideString; MaxLen: Integer): Integer; overload;
// MecsCharLength
{$IFDEF UNICODE}
function MecsCharLength(const S: RawByteString; Index: Integer): Integer; overload;
function MecsCharLength(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer; overload;
function MecsCharLength(const S: UnicodeString; Index: Integer): Integer; overload;
{$ELSE}
function MecsCharLength(const S: AnsiString; Index: Integer): Integer; overload;
function MecsCharLength(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsCharLength(const S: WideString; Index: Integer): Integer; overload;
// MecsCopy
{$IFDEF UNICODE}
function MecsCopy(const S: RawByteString; Index, Count: Integer): RawByteString; overload;
function MecsCopy(const S: RawByteString; Index, Count: Integer; Codepage: DWORD): RawByteString; overload;
function MecsCopy(const S: UnicodeString; Index, Count: Integer): UnicodeString; overload;
{$ELSE}
function MecsCopy(const S: AnsiString; Index, Count: Integer): AnsiString; overload;
function MecsCopy(const S: AnsiString; Index, Count: Integer; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsCopy(const S: WideString; Index, Count: Integer): WideString; overload;
// MecsDelete
{$IFDEF UNICODE}
procedure MecsDelete(var S: RawByteString; Index, Count: Integer); overload;
procedure MecsDelete(var S: RawByteString; Index, Count: Integer; Codepage: DWORD); overload;
procedure MecsDelete(var S: UnicodeString; Index, Count: Integer); overload;
{$ELSE}
procedure MecsDelete(var S: AnsiString; Index, Count: Integer); overload;
procedure MecsDelete(var S: AnsiString; Index, Count: Integer; Codepage: DWORD); overload;
{$ENDIF}
procedure MecsDelete(var S: WideString; Index, Count: Integer); overload;
// MecsElementToCharIndex
{$IFDEF UNICODE}
function MecsElementToCharIndex(const S: RawByteString; Index: Integer): Integer; overload;
function MecsElementToCharIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer; overload;
function MecsElementToCharIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ELSE}
function MecsElementToCharIndex(const S: AnsiString; Index: Integer): Integer; overload;
function MecsElementToCharIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsElementToCharIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsElementToCharLen
{$IFDEF UNICODE}
function MecsElementToCharLen(const S: RawByteString; MaxLen: Integer): Integer; overload;
function MecsElementToCharLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer; overload;
function MecsElementToCharLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
{$ELSE}
function MecsElementToCharLen(const S: AnsiString; MaxLen: Integer): Integer; overload;
function MecsElementToCharLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsElementToCharLen(const S: WideString; MaxLen: Integer): Integer; overload;
// MecsElementType
{$IFDEF UNICODE}
function MecsElementType(const S: RawByteString; Index: Integer): TElementType; overload;
function MecsElementType(const S: RawByteString; Index: Integer; Codepage: DWORD): TElementType; overload;
function MecsElementType(const S: UnicodeString; Index: Integer): TElementType; overload;
{$ELSE}
function MecsElementType(const S: AnsiString; Index: Integer): TElementType; overload;
function MecsElementType(const S: AnsiString; Index: Integer; Codepage: DWORD): TElementType; overload;
{$ENDIF}
function MecsElementType(const S: WideString; Index: Integer): TElementType; overload;
// MecsInsert
{$IFDEF UNICODE}
procedure MecsInsert(const Source: RawByteString; var S: RawByteString; Index: Integer); overload;
procedure MecsInsert(const Source: RawByteString; var S: RawByteString; Index: Integer; Codepage: DWORD); overload;
procedure MecsInsert(const Source: UnicodeString; var S: UnicodeString; Index: Integer); overload;
{$ELSE}
procedure MecsInsert(const Source: AnsiString; var S: AnsiString; Index: Integer); overload;
procedure MecsInsert(const Source: AnsiString; var S: AnsiString; Index: Integer; Codepage: DWORD); overload;
{$ENDIF}
procedure MecsInsert(const Source: WideString; var S: WideString; Index: Integer); overload;
// MecsIsFullWidth
{$IFDEF UNICODE}
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer): Boolean; overload;
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer; FarEast: Boolean): Boolean; overload;
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer; FarEast: Boolean; CodePage: DWORD): Boolean; overload;
function MecsIsFullWidth(const S: UnicodeString; CharIndex: Integer): Boolean; overload;
function MecsIsFullWidth(const S: UnicodeString; CharIndex: Integer; FarEast: Boolean): Boolean; overload;
{$ELSE}
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer): Boolean; overload;
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer; FarEast: Boolean): Boolean; overload;
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer; FarEast: Boolean; CodePage: DWORD): Boolean; overload;
{$ENDIF}
function MecsIsFullWidth(const S: WideString; CharIndex: Integer): Boolean; overload;
function MecsIsFullWidth(const S: WideString; CharIndex: Integer; FarEast: Boolean): Boolean; overload;
// MecsIsLeadElement
function MecsIsLeadElement(TestChar: AnsiChar): Boolean; overload;
function MecsIsLeadElement(TestChar: AnsiChar; Codepage: DWORD): Boolean; overload;
{$IFDEF UNICODE}
function MecsIsLeadElement(TestChar: UnicodeChar): Boolean; overload;
{$ELSE}
function MecsIsLeadElement(TestChar: WideChar): Boolean; overload;
{$ENDIF}
// MecsIsMECElement
function MecsIsMECElement(TestChar: AnsiChar): Boolean; overload;
function MecsIsMECElement(TestChar: AnsiChar; Codepage: DWORD): Boolean; overload;
{$IFDEF UNICODE}
function MecsIsMECElement(TestChar: UnicodeChar): Boolean; overload;
{$ELSE}
function MecsIsMECElement(TestChar: WideChar): Boolean; overload;
{$ENDIF}
// MecsIsTrailElement
function MecsIsTrailElement(TestChar: AnsiChar): Boolean; overload;
function MecsIsTrailElement(TestChar: AnsiChar; Codepage: DWORD): Boolean; overload;
{$IFDEF UNICODE}
function MecsIsTrailElement(TestChar: UnicodeChar): Boolean; overload;
{$ELSE}
function MecsIsTrailElement(TestChar: WideChar): Boolean; overload;
{$ENDIF}
// MecsLeftStr
{$IFDEF UNICODE}
function MecsLeftStr(const AText: RawByteString; const ACount: Integer): RawByteString; overload;
function MecsLeftStr(const AText: RawByteString; const ACount: Integer; Codepage: DWORD): RawByteString; overload;
function MecsLeftStr(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
{$ELSE}
function MecsLeftStr(const AText: AnsiString; const ACount: Integer): AnsiString; overload;
function MecsLeftStr(const AText: AnsiString; const ACount: Integer; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsLeftStr(const AText: WideString; const ACount: Integer): WideString; overload;
// MecsLength
{$IFDEF UNICODE}
function MecsLength(const S: RawByteString): Integer; overload;
function MecsLength(const S: RawByteString; Codepage: DWORD): Integer; overload;
function MecsLength(const S: UnicodeString): Integer; overload;
{$ELSE}
function MecsLength(const S: AnsiString): Integer; overload;
function MecsLength(const S: AnsiString; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsLength(const S: WideString): Integer; overload;
// MecsMidStr
{$IFDEF UNICODE}
function MecsMidStr(const AText: RawByteString; const AStart, ACount: Integer): RawByteString; overload;
function MecsMidStr(const AText: RawByteString; const AStart, ACount: Integer; Codepage: DWORD): RawByteString; overload;
function MecsMidStr(const AText: UnicodeString; const AStart, ACount: Integer): UnicodeString; overload;
{$ELSE}
function MecsMidStr(const AText: AnsiString; const AStart, ACount: Integer): AnsiString; overload;
function MecsMidStr(const AText: AnsiString; const AStart, ACount: Integer; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsMidStr(const AText: WideString; const AStart, ACount: Integer): WideString; overload;
// MecsNextCharIndex
{$IFDEF UNICODE}
function MecsNextCharIndex(const S: RawByteString; Index: Integer): Integer; overload;
function MecsNextCharIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer; overload;
function MecsNextCharIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ELSE}
function MecsNextCharIndex(const S: AnsiString; Index: Integer): Integer; overload;
function MecsNextCharIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsNextCharIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsReverseString
{$IFDEF UNICODE}
function MecsReverseString(const AText: RawByteString): RawByteString; overload;
function MecsReverseString(const AText: RawByteString; Codepage: DWORD): RawByteString; overload;
function MecsReverseString(const AText: UnicodeString): UnicodeString; overload;
{$ELSE}
function MecsReverseString(const AText: AnsiString): AnsiString; overload;
function MecsReverseString(const AText: AnsiString; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsReverseString(const AText: WideString): WideString; overload;
// MecsRightStr
{$IFDEF UNICODE}
function MecsRightStr(const AText: RawByteString; const ACount: Integer): RawByteString; overload;
function MecsRightStr(const AText: RawByteString; const ACount: Integer; Codepage: DWORD): RawByteString; overload;
function MecsRightStr(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
{$ELSE}
function MecsRightStr(const AText: AnsiString; const ACount: Integer): AnsiString; overload;
function MecsRightStr(const AText: AnsiString; const ACount: Integer; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsRightStr(const AText: WideString; const ACount: Integer): WideString; overload;
// MecsStrCharLength
function MecsStrCharLength(const Str: PAnsiChar): Integer; overload;
function MecsStrCharLength(const Str: PAnsiChar; Codepage: DWORD): Integer; overload;
{$IFDEF UNICODE}
function MecsStrCharLength(const Str: PUnicodeChar): Integer; overload;
{$ENDIF}
function MecsStrCharLength(const Str: PWideChar): Integer; overload;
// MecsStrElementType
function MecsStrElementType(const Str: PAnsiChar; Index: Cardinal): TElementType; overload;
function MecsStrElementType(const Str: PAnsiChar; Index: Cardinal; Codepage: DWORD): TElementType; overload;
{$IFDEF UNICODE}
function MecsStrElementType(const Str: PUnicodeChar; Index: Cardinal): TElementType; overload;
{$ENDIF}
function MecsStrElementType(const Str: PWideChar; Index: Cardinal): TElementType; overload;
// MecsStrLen
function MecsStrLen(const Str: PAnsiChar): Cardinal; overload;
function MecsStrLen(const Str: PAnsiChar; Codepage: DWORD): Cardinal; overload;
{$IFDEF UNICODE}
function MecsStrLen(const Str: PUnicodeChar): Cardinal; overload;
{$ENDIF}
function MecsStrLen(const Str: PWideChar): Cardinal; overload;
// MecsStrNextChar
function MecsStrNextChar(const Str: PAnsiChar): PAnsiChar; overload;
function MecsStrNextChar(const Str: PAnsiChar; Codepage: DWORD): PAnsiChar; overload;
{$IFDEF UNICODE}
function MecsStrNextChar(const Str: PUnicodeChar): PUnicodeChar; overload;
{$ENDIF}
function MecsStrNextChar(const Str: PWideChar): PWideChar; overload;

// for UnicodeString/WideString (Combining Character Sequence)
// -----------------------------------------------------------------------------
// MecsCombiningType
function MecsCombiningType(U: UCS4Char): TCombiningType;
// MecsCCSLength
{$IFDEF UNICODE}
function MecsCCSLength(const S: UnicodeString; Index: Integer): Integer; overload;
{$ENDIF}
function MecsCCSLength(const S: WideString; Index: Integer): Integer; overload;
// MecsCCSToElementIndex
{$IFDEF UNICODE}
function MecsCCSToElementIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ENDIF}
function MecsCCSToElementIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsCCSToElementIndexCount
{$IFDEF UNICODE}
function MecsCCSToElementIndexCount(const S: UnicodeString; Index, Count: Integer): TIndexCount; overload;
{$ENDIF}
function MecsCCSToElementIndexCount(const S: WideString; Index, Count: Integer): TIndexCount; overload;
// MecsCCSToElementLen
{$IFDEF UNICODE}
function MecsCCSToElementLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
{$ENDIF}
function MecsCCSToElementLen(const S: WideString; MaxLen: Integer): Integer; overload;
// MecsDeleteC
{$IFDEF UNICODE}
procedure MecsDeleteC(var S: UnicodeString; Index, Count: Integer); overload;
{$ENDIF}
procedure MecsDeleteC(var S: WideString; Index, Count: Integer); overload;
// MecsElementToCCSIndex
{$IFDEF UNICODE}
function MecsElementToCCSIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ENDIF}
function MecsElementToCCSIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsElementToCCSLen
{$IFDEF UNICODE}
function MecsElementToCCSLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
{$ENDIF}
function MecsElementToCCSLen(const S: WideString; MaxLen: Integer): Integer; overload;
// MecsLengthC
{$IFDEF UNICODE}
function MecsLengthC(const S: UnicodeString): Integer; overload;
{$ENDIF}
function MecsLengthC(const S: WideString): Integer; overload;
// MecsCopyC
{$IFDEF UNICODE}
function MecsCopyC(const S: UnicodeString; Index, Count: Integer): UnicodeString; overload;
{$ENDIF}
function MecsCopyC(const S: WideString; Index, Count: Integer): WideString; overload;
// MecsInsertC
{$IFDEF UNICODE}
procedure MecsInsertC(const Source: UnicodeString; var S: UnicodeString; Index: Integer); overload;
{$ENDIF}
procedure MecsInsertC(const Source: WideString; var S: WideString; Index: Integer); overload;
// MecsLeftStrC
{$IFDEF UNICODE}
function MecsLeftStrC(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
{$ENDIF}
function MecsLeftStrC(const AText: WideString; const ACount: Integer): WideString; overload;
// MecsMidStrC
{$IFDEF UNICODE}
function MecsMidStrC(const AText: UnicodeString; const AStart, ACount: Integer): UnicodeString; overload;
{$ENDIF}
function MecsMidStrC(const AText: WideString; const AStart, ACount: Integer): WideString; overload;
// MecsNextCCSIndex
{$IFDEF UNICODE}
function MecsNextCCSIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ENDIF}
function MecsNextCCSIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsReverseStringC
{$IFDEF UNICODE}
function MecsReverseStringC(const AText: UnicodeString): UnicodeString; overload;
{$ENDIF}
function MecsReverseStringC(const AText: WideString): WideString; overload;
// MecsRightStrC
{$IFDEF UNICODE}
function MecsRightStrC(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
{$ENDIF}
function MecsRightStrC(const AText: WideString; const ACount: Integer): WideString; overload;
// MecsStrCCSLength
{$IFDEF UNICODE}
function MecsStrCCSLength(const Str: PUnicodeChar): Integer; overload;
{$ENDIF}
function MecsStrCCSLength(const Str: PWideChar): Integer; overload;
// MecsStrLenC
{$IFDEF UNICODE}
function MecsStrLenC(const Str: PUnicodeChar): Cardinal; overload;
{$ENDIF}
function MecsStrLenC(const Str: PWideChar): Cardinal; overload;
// MecsStrNextCCS
{$IFDEF UNICODE}
function MecsStrNextCCS(const Str: PUnicodeChar): PUnicodeChar; overload;
{$ENDIF}
function MecsStrNextCCS(const Str: PWideChar): PWideChar; overload;

// for Convert
// -----------------------------------------------------------------------------
// AnsiToUTF32
{$IFDEF UNICODE}
function AnsiToUTF32(const AText: RawByteString; Codepage: DWORD): UCS4String; overload;
function AnsiToUTF32(const AText: RawByteString): UCS4String; overload;
{$ELSE}
function AnsiToUTF32(const AText: AnsiString; Codepage: DWORD): UCS4String; overload;
function AnsiToUTF32(const AText: AnsiString): UCS4String; overload;
{$ENDIF}
// AnsiToUTF16
{$IFDEF UNICODE}
function AnsiToUTF16(const AText: RawByteString; Codepage: DWORD): WideString; overload;
function AnsiToUTF16(const AText: RawByteString): WideString; overload;
{$ELSE}
function AnsiToUTF16(const AText: AnsiString; Codepage: DWORD): WideString; overload;
function AnsiToUTF16(const AText: AnsiString): WideString; overload;
{$ENDIF}
// AnsiToUTF8
{$IFDEF UNICODE}
function AnsiToUTF8(const AText: RawByteString; Codepage: DWORD): RawByteString; overload;
function AnsiToUTF8(const AText: RawByteString): RawByteString; overload;
{$ELSE}
function AnsiToUTF8(const AText: AnsiString; Codepage: DWORD): UTF8String; overload;
function AnsiToUTF8(const AText: AnsiString): UTF8String; overload;
{$ENDIF}
// CodePointToUTF16
function CodePointToUTF16(const UCS4: UCS4Char): WideString; overload;
{$IFDEF UNICODE}
function CodePointToUTF8(const UCS4: UCS4Char): RawByteString;
{$ELSE}
function CodePointToUTF8(const UCS4: UCS4Char): UTF8String;
{$ENDIF}
// ConvertMultiByteToUnicode
{$IFDEF UNICODE}
function ConvertMultiByteToUnicode(SrcCodepage: DWORD; const SrcStr: RawByteString; var DstStr: WideString): Boolean;
{$ELSE}
function ConvertMultiByteToUnicode(SrcCodepage: DWORD; const SrcStr: AnsiString; var DstStr: WideString): Boolean;
{$ENDIF}
// ConvertString
{$IFDEF UNICODE}
function ConvertString(SrcCodepage, DstCodepage: DWORD; const SrcStr: RawByteString; var DstStr: RawByteString): Boolean;
{$ELSE}
function ConvertString(SrcCodepage, DstCodepage: DWORD; const SrcStr: AnsiString; var DstStr: AnsiString): Boolean;
{$ENDIF}
// ConvertUnicodeToMultiByte
{$IFDEF UNICODE}
function ConvertUnicodeToMultiByte(DstCodepage: DWORD; const SrcStr: WideString; var DstStr: RawByteString): Boolean;
{$ELSE}
function ConvertUnicodeToMultiByte(DstCodepage: DWORD; const SrcStr: WideString; var DstStr: AnsiString): Boolean;
{$ENDIF}
// UTF32ToAnsi
{$IFDEF UNICODE}
function UTF32ToAnsi(const UCS4Text: UCS4String; Codepage: DWORD): RawByteString; overload;
function UTF32ToAnsi(const UCS4Text: UCS4String): RawByteString; overload;
{$ELSE}
function UTF32ToAnsi(const UCS4Text: UCS4String; Codepage: DWORD): AnsiString; overload;
function UTF32ToAnsi(const UCS4Text: UCS4String): AnsiString; overload;
{$ENDIF}
// UTF32ToUTF16
function UTF32ToUTF16(const UCS4Text: UCS4String): WideString;
// UTF32ToUTF8
{$IFDEF UNICODE}
function UTF32ToUTF8(const UCS4Text: UCS4String): RawByteString;
{$ELSE}
function UTF32ToUTF8(const UCS4Text: UCS4String): UTF8String;
{$ENDIF}
// UTF16ToAnsi
{$IFDEF UNICODE}
function UTF16ToAnsi(const WText: WideString; Codepage: DWORD): RawByteString; overload;
function UTF16ToAnsi(const WText: WideString): RawByteString; overload;
{$ELSE}
function UTF16ToAnsi(const WText: WideString; Codepage: DWORD): AnsiString; overload;
function UTF16ToAnsi(const WText: WideString): AnsiString; overload;
{$ENDIF}
// UTF16ToUTF32
function UTF16ToUTF32(const WText: WideString): UCS4String;
// UTF16ToUTF32
{$IFDEF UNICODE}
function UTF16ToUTF8(const WText: WideString): RawByteString;
{$ELSE}
function UTF16ToUTF8(const WText: WideString): UTF8String;
{$ENDIF}
// UTF16ToAnsi
{$IFDEF UNICODE}
function UTF8ToAnsi(const UCF8Text: RawByteString; Codepage: DWORD): RawByteString; overload;
function UTF8ToAnsi(const UCF8Text: RawByteString): RawByteString; overload;
{$ELSE}
function UTF8ToAnsi(const UCF8Text: UTF8String; Codepage: DWORD): AnsiString; overload;
function UTF8ToAnsi(const UCF8Text: UTF8String): AnsiString; overload;
{$ENDIF}
// UTF8ToUTF32
{$IFDEF UNICODE}
function UTF8ToUTF32(const UCF8Text: RawByteString): UCS4String;
{$ELSE}
function UTF8ToUTF32(const UCF8Text: UTF8String): UCS4String;
{$ENDIF}
// UTF8ToUTF16
{$IFDEF UNICODE}
function UTF8ToUTF16(const UCF8Text: RawByteString): WideString;
{$ELSE}
function UTF8ToUTF16(const UCF8Text: UTF8String): WideString;
{$ENDIF}

var
  DefaultAnsiCodePage : DWORD;
  DefaultOEMCodePage  : DWORD;
  DefaultLCID         : LCID;

implementation

// for Codepage/Locale/CharSet
// -----------------------------------------------------------------------------

// CodePageToFontCharset
function CodePageToFontCharset(Codepage: DWORD): TFontCharset;
var
  lpSrc: DWORD;
  lpCs: TCharsetInfo;
begin
  lpSrc := CodePage;
  if TranslateCharsetInfo(lpSrc, lpCs, TCI_SRCCODEPAGE) then
    result := lpCs.ciCharset
  else
    result := DEFAULT_CHARSET;
end;

// FontCharsetToCodePage
function FontCharsetToCodePage(FontCharset: TFontCharset): DWORD;
var
  lpSrc: DWORD;
  lpCs: TCharsetInfo;
begin
  lpSrc := FontCharset;
  if TranslateCharsetInfo(lpSrc, lpCs, TCI_SRCCHARSET) then
    result := lpCs.ciACP
  else
    result := DefaultAnsiCodePage;
end;

// IsFarEastCharSet
function IsFarEastCharSet(CharSet: TFontCharSet): Boolean;
begin
  case CharSet of
    SHIFTJIS_CHARSET,
    GB2312_CHARSET,
    CHINESEBIG5_CHARSET,
    HANGEUL_CHARSET,
    JOHAB_CHARSET:
      result := True;
  else
    result := False;
  end;
end;

// IsFarEastLCID
function IsFarEastLCID(Locale: LCID): Boolean;
begin
  case Locale of
    1028, // Taiwan
    1041, // Japan
    1042, // Korea
    2052, // China
    3076, // Hong Kong
    4100: // Singapore
      result := True;
  else
    result := False;
  end;
end;

// for AnsiString
// -----------------------------------------------------------------------------

// MecsGetCodePage
{$IFDEF UNICODE}
function MecsGetCodePage(S: RawByteString): DWORD;
{$ELSE}
function MecsGetCodePage(S: AnsiString): DWORD;
{$ENDIF}
begin
  {$IFDEF UNICODE}
  result := StringCodePage(S);
  if result = CP_ACP then
  {$ENDIF}
    result := DefaultAnsiCodePage;
end;

// MecsGetLeadBytes
function MecsGetLeadBytes: TLeadBytes;
begin
  result := MecsGetLeadBytes(DefaultAnsiCodePage);
end;

function MecsGetLeadBytes(Codepage: DWORD): TLeadBytes;
var
  i:Integer;
  MBCSCPInfo: TCPInfo;
  LB: Byte;
begin
  result := [];
  if CodePage = CP_UTF8 then
    Exit;
  GetCPInfo(CodePage, MBCSCPInfo);
  with MBCSCPInfo do
    begin
      i := 0;
      while (i < MAX_LEADBYTES) and ((LeadByte[i] or LeadByte[i + 1]) <> 0) do
        begin
          for LB := LeadByte[i] to LeadByte[i + 1] do
            Include(result, AnsiChar(LB));
          Inc(i, 2);
        end;
    end;
end;

// for UnicodeString/WideString
// -----------------------------------------------------------------------------

// MecsEastAsianWidth
function MecsEastAsianWidth(U: UCS4Char): TEastAsianWidth;
// http://unicode.org/Public/UNIDATA/MecsEastAsianWidth.txt
begin
  case U of
    // Fullwidth
    $3000,
    $FF01..$FF60,
    $FFE0..$FFE6:
      result := eawFullwidth;
    // Halfwidth
    $20A9,
    $FF61..$FFBE,
    $FFC2..$FFC7,
    $FFCA..$FFCF,
    $FFD2..$FFD7,
    $FFDA..$FFDC,
    $FFE8..$FFEE:
      result := eawHalfwidth;
    // Wide
    $1100..$1159,
    $115F,
    $2329..$232A,
    $2E80..$2E99,
    $2E9B..$2EF3,
    $2F00..$2FD5,
    $2FF0..$2FFB,
    $3001..$303E,
    $3041..$3096,
    $3099..$30FF,
    $3105..$312D,
    $3131..$318E,
    $3190..$31B7,
    $31C0..$31E3,
    $31F0..$321E,
    $3220..$3243,
    $3250..$32FE,
    $3300..$33FF,
    $3400..$4DB5,
    $4E00..$9FC3,
    $A000..$A48C,
    $A490..$A4C6,
    $AC00..$D7A3,
    $F900..$FA2D,
    $FA30..$FA6A,
    $FA70..$FAD9,
    $FE10..$FE19,
    $FE30..$FE52,
    $FE54..$FE66,
    $FE68..$FE6B,
    $20000..$2A6D6,
    $2A6D7..$2F7FF,
    $2F800..$2FA1D,
    $2FA1E..$2FFFD,
    $30000..$3FFFD:
      result := eawWide;
    // Narrow
    $0020..$007E,
    $00A2..$00A3,
    $00A5..$00A6,
    $00AC,
    $00AF,
    $27E6..$27ED,
    $2985..$2986:
      result := eawNarrow;
    // Ambiguous
    $00A1,
    $00A4,
    $00A7..$00A8,
    $00AA,
    $00AD..$00AE,
    $00B0..$00B4,
    $00B6..$00BA,
    $00BC..$00BF,
    $00C6,
    $00D0,
    $00D7..$00D8,
    $00DE..$00E1,
    $00E6,
    $00E8..$00EA,
    $00EC..$00ED,
    $00F0,
    $00F2..$00F3,
    $00F7..$00FA,
    $00FC,
    $00FE,
    $0101,
    $0111,
    $0113,
    $011B,
    $0126..$0127,
    $012B,
    $0131..$0133,
    $0138,
    $013F..$0142,
    $0144,
    $0148..$014B,
    $014D,
    $0152..$0153,
    $0166..$0167,
    $016B,
    $01CE,
    $01D0,
    $01D2,
    $01D4,
    $01D6,
    $01D8,
    $01DA,
    $01DC,
    $0251,
    $0261,
    $02C4,
    $02C7,
    $02C9..$02CB,
    $02CD,
    $02D0,
    $02D8..$02DB,
    $02DD,
    $02DF,
    $0300..$036F,
    $0391..$03A1,
    $03A3..$03A9,
    $03B1..$03C1,
    $03C3..$03C9,
    $0401,
    $0410..$044F,
    $0451,
    $2010,
    $2013..$2016,
    $2018..$2019,
    $201C..$201D,
    $2020..$2022,
    $2024..$2027,
    $2030,
    $2032..$2033,
    $2035,
    $203B,
    $203E,
    $2074,
    $207F,
    $2081..$2084,
    $20AC,
    $2103,
    $2105,
    $2109,
    $2113,
    $2116,
    $2121..$2122,
    $2126,
    $212B,
    $2153..$2154,
    $215B..$215E,
    $2160..$216B,
    $2170..$2179,
    $2190..$2199,
    $21B8..$21B9,
    $21D2,
    $21D4,
    $21E7,
    $2200,
    $2202..$2203,
    $2207..$2208,
    $220B,
    $220F,
    $2211,
    $2215,
    $221A,
    $221D..$2220,
    $2223,
    $2225,
    $2227..$222C,
    $222E,
    $2234..$2237,
    $223C..$223D,
    $2248,
    $224C,
    $2252,
    $2260..$2261,
    $2264..$2267,
    $226A..$226B,
    $226E..$226F,
    $2282..$2283,
    $2286..$2287,
    $2295,
    $2299,
    $22A5,
    $22BF,
    $2312,
    $2460..$24E9,
    $24EB..$254B,
    $2550..$2573,
    $2580..$258F,
    $2592..$2595,
    $25A0..$25A1,
    $25A3..$25A9,
    $25B2..$25B3,
    $25B6..$25B7,
    $25BC..$25BD,
    $25C0..$25C1,
    $25C6..$25C8,
    $25CB,
    $25CE..$25D1,
    $25E2..$25E5,
    $25EF,
    $2605..$2606,
    $2609,
    $260E..$260F,
    $2614..$2615,
    $261C,
    $261E,
    $2640,
    $2642,
    $2660..$2661,
    $2663..$2665,
    $2667..$266A,
    $266C..$266D,
    $266F,
    $273D,
    $2776..$277F,
    $E000..$F8FF,
    $FE00..$FE0F,
    $FFFD,
    $E0100..$E01EF:
      result := eawAmbiguous;
  else
    result := eawNeutral;
  end;
end;

// MecsCombiningType (written by Mae)
function MecsCombiningType(U: UCS4Char): TCombiningType;
begin
  case U of
    $0300..$036F,
    $0483..$0489,
    $0591..$05BD,
    $05BF..$05BF,
    $05C1..$05C2,
    $05C4..$05C5,
    $05C7..$05C7,
    $0610..$061A,
    $064B..$065E,
    $0670..$0670,
    $06D6..$06DC,
    $06DE..$06E4,
    $06E7..$06E8,
    $06EA..$06ED,
    $0711..$0711,
    $0730..$074A,
    $07A6..$07B0,
    $07EB..$07F3,
    $0901..$0903,
    $093C..$093C,
    $093E..$094D,
    $0951..$0954,
    $0962..$0963,
    $0981..$0983,
    $09BC..$09BC,
    $09BE..$09C4,
    $09C7..$09C8,
    $09CB..$09CD,
    $09D7..$09D7,
    $09E2..$09E3,
    $0A01..$0A03,
    $0A3C..$0A3C,
    $0A3E..$0A42,
    $0A47..$0A48,
    $0A4B..$0A4D,
    $0A51..$0A51,
    $0A70..$0A71,
    $0A75..$0A75,
    $0A81..$0A83,
    $0ABC..$0ABC,
    $0ABE..$0AC5,
    $0AC7..$0AC9,
    $0ACB..$0ACD,
    $0AE2..$0AE3,
    $0B01..$0B03,
    $0B3C..$0B3C,
    $0B3E..$0B44,
    $0B47..$0B48,
    $0B4B..$0B4D,
    $0B56..$0B57,
    $0B62..$0B63,
    $0B82..$0B82,
    $0BBE..$0BC2,
    $0BC6..$0BC8,
    $0BCA..$0BCD,
    $0BD7..$0BD7,
    $0C01..$0C03,
    $0C3E..$0C44,
    $0C46..$0C48,
    $0C4A..$0C4D,
    $0C55..$0C56,
    $0C62..$0C63,
    $0C82..$0C83,
    $0CBC..$0CBC,
    $0CBE..$0CBE,
    $0CC0..$0CC4,
    $0CC7..$0CC8,
    $0CCA..$0CCD,
    $0CD5..$0CD6,
    $0CE2..$0CE3,
    $0D02..$0D03,
    $0D3E..$0D44,
    $0D46..$0D48,
    $0D4A..$0D4D,
    $0D57..$0D57,
    $0D62..$0D63,
    $0D82..$0D83,
    $0DCA..$0DCA,
    $0DCF..$0DD4,
    $0DD6..$0DD6,
    $0DD8..$0DDF,
    $0DF2..$0DF3,
    $0E31..$0E31,
    $0E34..$0E3A,
    $0E47..$0E4E,
    $0EB1..$0EB1,
    $0EB4..$0EB9,
    $0EBB..$0EBC,
    $0EC8..$0ECD,
    $0F18..$0F19,
    $0F35..$0F35,
    $0F37..$0F37,
    $0F39..$0F39,
    $0F3E..$0F3F,
    $0F71..$0F84,
    $0F86..$0F87,
    $0F90..$0F97,
    $0F99..$0FBC,
    $0FC6..$0FC6,
    $102B..$103E,
    $1056..$1059,
    $105E..$1060,
    $1062..$1064,
    $1067..$106D,
    $1071..$1074,
    $1082..$108D,
    $108F..$108F,
    $135F..$135F,
    $1712..$1714,
    $1732..$1734,
    $1752..$1753,
    $1772..$1773,
    $17B6..$17D3,
    $17DD..$17DD,
    $180B..$180D,
    $18A9..$18A9,
    $1920..$192B,
    $1930..$193B,
    $19B0..$19C0,
    $19C8..$19C9,
    $1A17..$1A1B,
    $1B00..$1B04,
    $1B34..$1B44,
    $1B6B..$1B73,
    $1B80..$1B82,
    $1BA1..$1BAA,
    $1C24..$1C37,
    $1DC0..$1DE6,
    $1DFE..$1DFF,
    $20D0..$20F0,
    $2DE0..$2DFF,
    $302A..$302F,
    $3099..$309A,
    $A66F..$A672,
    $A67C..$A67D,
    $A802..$A802,
    $A806..$A806,
    $A80B..$A80B,
    $A823..$A827,
    $A880..$A881,
    $A8B4..$A8C4,
    $A926..$A92D,
    $A947..$A953,
    $AA29..$AA36,
    $AA43..$AA43,
    $AA4C..$AA4D,
    $FB1E..$FB1E,
    $FE00..$FE0F,
    $FE20..$FE26,
    $101FD..$101FD,
    $10A01..$10A03,
    $10A05..$10A06,
    $10A0C..$10A0F,
    $10A38..$10A3A,
    $10A3F..$10A3F,
    $1D165..$1D169,
    $1D16D..$1D172,
    $1D17B..$1D182,
    $1D185..$1D18B,
    $1D1AA..$1D1AD,
    $1D242..$1D244,
    $E0100..$E01EF:
      result := ctCombining;
  else
    result := ctBase;
  end;
end;

// MecsIsNormalized
function MecsIsNormalized(const AText: WideString; NormForm: TNORM_FORM): Boolean;
// -----------------------------------------------------------------------------
// Requires(XP/2003 or later):
// Microsoft Internationalized Domain Names (IDN) Mitigation APIs 1.1
// http://www.microsoft.com/downloads/details.aspx?FamilyID=ad6158d7-ddba-416a-9109-07607425a815&displaylang=en
// (or Internet Explorer 7 or later)
// -----------------------------------------------------------------------------
var
  FP: TFarProc;
  DLLWnd: THandle;
  // http://msdn2.microsoft.com/en-us/library/ms776382(VS.85).aspx
  MecsIsNormalizedString: function(NormForm: Integer; lpString: LPCWSTR; cwLength: Integer):Boolean; stdcall;
begin
  result := False;
  if Length(AText) = 0 then
    begin
      result := True;
      Exit;
    end;
  {$IFDEF UNICODE}
  DLLWnd := LoadLibraryW('normaliz.dll');
  {$ELSE}
  DLLWnd := LoadLibraryA('normaliz.dll');
  {$ENDIF}
  try
    FP := GetProcAddress(DLLWnd, 'MecsIsNormalizedString');
    if FP <> nil then
      begin
        @MecsIsNormalizedString := FP;
        result := MecsIsNormalizedString(Integer(NormForm), PWideChar(AText), Length(AText));
      end;
  finally
    if DLLWnd > 0 then
      FreeLibrary(DLLWnd);
  end;
end;

// MecsMappingFixJA
// (Fix Unicode to JIS X 0208 mapping)
{$IFDEF UNICODE}
function MecsMappingFixJA(const S: UnicodeString): UnicodeString; overload;
var
  i: Integer;
begin
  result := S;
  for i:=1 to Length(result) do
    case result[i] of
      #$2225: // U+2225: PARALLEL TO            -> U+2016: DOUBLE VERTICAL LINE
        result[i] := #$2016;
      #$FF0D: // U+FF0D: FULLWIDTH HYPHEN-MINUS -> U+2212: MINUS SIGN
        result[i] := #$2212;
      #$FF5E: // U+FF5E: FULLWIDTH TILDE        -> U+301C: WAVE DASH
        result[i] := #$301C;
      #$FFE0: // U+FFE0: FULLWIDTH CENT SIGN    -> U+00A2: CENT SIGN
        result[i] := #$00A2;
      #$FFE1: // U+FFE1: FULLWIDTH POUND SIGN   -> U+00A3: POUND SIGN
        result[i] := #$00A3;
      #$FFE2: // U+FFE2: FULLWIDTH NOT SIGN     -> U+00AC: NOT SIGN
        result[i] := #$00AC;
    end;  
end;
{$ENDIF}
function MecsMappingFixJA(const S: WideString): WideString; overload;
var
  i: Integer;
begin
  result := S;
  for i:=1 to Length(result) do
    case result[i] of
      #$2225: // U+2225: PARALLEL TO            -> U+2016: DOUBLE VERTICAL LINE
        result[i] := #$2016;
      #$FF0D: // U+FF0D: FULLWIDTH HYPHEN-MINUS -> U+2212: MINUS SIGN
        result[i] := #$2212;
      #$FF5E: // U+FF5E: FULLWIDTH TILDE        -> U+301C: WAVE DASH
        result[i] := #$301C;
      #$FFE0: // U+FFE0: FULLWIDTH CENT SIGN    -> U+00A2: CENT SIGN
        result[i] := #$00A2;
      #$FFE1: // U+FFE1: FULLWIDTH POUND SIGN   -> U+00A3: POUND SIGN
        result[i] := #$00A3;
      #$FFE2: // U+FFE2: FULLWIDTH NOT SIGN     -> U+00AC: NOT SIGN
        result[i] := #$00AC;
    end;  
end;

// MecsNormalize
function MecsNormalize(const Src: WideString; var Dst: WideString; NormForm: TNORM_FORM): Boolean;
// -----------------------------------------------------------------------------
// NormalizeString Requires(XP/2003 or later):
// Microsoft Internationalized Domain Names (IDN) Mitigation APIs 1.1
// http://www.microsoft.com/downloads/details.aspx?FamilyID=ad6158d7-ddba-416a-9109-07607425a815&displaylang=en
// (or Internet Explorer 7 or later)
// -----------------------------------------------------------------------------
// FoldString requires Windows NT3.1 or later;
// -----------------------------------------------------------------------------
//
// Please examine U+03D3.
//  - NFC  U+03D3
//  - NFD  U+03D2 (U+0020) U+0301
//  - NFKC U+038E
//  - NFKC U+03A5 (U+0020) U+0301
//
// -----------------------------------------------------------------------------
var
  i: Integer;
  O,P :PWideChar;
  BufSize: Integer;
  FP: TFarProc;
  DLLWnd: THandle;
  MapFlags: DWORD;
  // http://msdn2.microsoft.com/en-us/library/ms776395(VS.85).aspx
  NormalizeString: function(NormForm: Integer; lpSrcString: LPCWSTR; cwSrMecsLength: Integer; lpDstString: LPWSTR; cwDstLength: Integer):Integer; stdcall;
begin
  result := False;
  Dst := '';
  if Length(Src) = 0 then
    begin
      result := True;
      Exit;
    end;
  {$IFDEF UNICODE}
  DLLWnd := LoadLibraryW('normaliz.dll');
  {$ELSE}
  DLLWnd := LoadLibraryA('normaliz.dll');
  {$ENDIF}
  try
    FP := GetProcAddress(DLLWnd, 'NormalizeString');
    if FP <> nil then
      begin
        @NormalizeString := FP;
        BufSize := NormalizeString(Integer(NormForm), PWideChar(Src), Length(Src), nil, 0);
        if (GetLastError <> 0) then
          Exit;
        if (BufSize = 0) then
          begin
            result := True;
            Exit;
          end;
        SetLength(Dst, BufSize);
        P := PWideChar(Dst);
        for i:=1 to BufSize do
          begin
            P^ := #$0000;
            Inc(P);
          end;
        NormalizeString(Integer(NormForm), PWideChar(Src), Length(Src), PWideChar(Dst), Length(Dst));
        if (GetLastError <> 0) then
          Exit;
        P := PWideChar(Dst);
        O := P;
        while (P^ <> #$0000) do
          Inc(P);
        SetLength(Dst, P - O);
        result := True;
      end;
  finally
    if DLLWnd > 0 then
      FreeLibrary(DLLWnd);
  end;
  if not result then
    begin
      case NormForm of
        NormalizationC:
          MapFlags := MAP_PRECOMPOSED;
        NormalizationD:
          MapFlags := MAP_COMPOSITE;
        NormalizationKC:
          MapFlags := MAP_FOLDCZONE or MAP_PRECOMPOSED;
        NormalizationKD:
          MapFlags := MAP_FOLDCZONE or MAP_COMPOSITE;
      else
        MapFlags := 0;
      end;
      BufSize := FoldStringW(MapFlags, PWideChar(Src), Length(Src), nil, 0);
      if (GetLastError <> 0) then
        Exit;
      if (BufSize = 0) then
        begin
          result := True;
          Exit;
        end;
      SetLength(Dst, BufSize);
      P := PWideChar(Dst);
      for i:=1 to BufSize do
        begin
          P^ := #$0000;
          Inc(P);
        end;
      FoldStringW(MapFlags, PWideChar(Src), Length(Src), PWideChar(Dst), Length(Dst));
      if (GetLastError <> 0) then
        Exit;
      P := PWideChar(Dst);
      O := P;
      while (P^ <> #$0000) do
        Inc(P);
      SetLength(Dst, P - O);
      result := True;
    end;
end;

// StrLen
{$IFNDEF UNICODE}
function StrLen(const Str: PWideChar): Cardinal;
begin
  result := lstrlenW(Str);
end;
{$ENDIF}

// for AnsiString/UnicodeString/WideString (Common)
// -----------------------------------------------------------------------------

// MecsCharToElementIndex
{$IFDEF UNICODE}
function MecsCharToElementIndex(const S: RawByteString; Index: Integer): Integer;
{$ELSE}
function MecsCharToElementIndex(const S: AnsiString; Index: Integer): Integer;
{$ENDIF}
begin
  result := MecsCharToElementIndex(S, Index, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsCharToElementIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsCharToElementIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer;
{$ENDIF}
var
  P, O: PAnsiChar;
  L, ChrCnt: Integer;
  LeadBytes: TLeadBytes;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  result := 0;
  if CodePage = CP_UTF8 then
    begin
      ChrCnt := 0;
      P := PAnsiChar(S);
      O := P;
      while (P^ <> #$00) and (result = 0) do
        begin
          if (Byte(P^) and $80) = $00 then
            Inc(ChrCnt)
          else if (Byte(P^) and $C0) = $C0 then
            Inc(ChrCnt);
          if ChrCnt = Index then
            begin
              result := P - O + 1;
              Exit;
            end;
          Inc(P);
        end;
    end
  else
    begin
      LeadBytes := MecsGetLeadBytes(CodePage);
      if LeadBytes = [] then
        result := Index
      else
        begin
          ChrCnt := 0;
          P := PAnsiChar(S);
          O := P;
          while (P^ <> #$00) and (result = 0) do
            begin
              Inc(ChrCnt);
              if ChrCnt = Index then
                begin
                  result := P - O + 1;
                  Exit;
                end;
              if P^ in LeadBytes then
                begin
                  Inc(P);
                  if P^ = #$00 then
                    Break;
                end;
              Inc(P);
            end;
        end;
    end;
end;

{$IFDEF UNICODE}
function MecsCharToElementIndex(const S: UnicodeString; Index: Integer): Integer;
var
  P, O: PUnicodeChar;
  L, ChrCnt: Integer;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  ChrCnt := 0;
  P := PUnicodeChar(S);
  O := P;
  while (P^ <> #$0000) and (result = 0) do
    begin
      Inc(ChrCnt);
      if ChrCnt = Index then
        begin
          result := P - O + 1;
          Break;
        end;
      if MecsIsLeadElement(P^) then
        begin
          Inc(P);
          if P^ = #$0000 then
            Break;
        end;
      Inc(P);
    end;
end;
{$ENDIF}

function MecsCharToElementIndex(const S: WideString; Index: Integer): Integer;
var
  P, O: PWideChar;
  L, ChrCnt: Integer;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  ChrCnt := 0;
  P := PWideChar(S);
  O := P;
  while (P^ <> #$0000) and (result = 0) do
    begin
      Inc(ChrCnt);
      if ChrCnt = Index then
        begin
          result := P - O + 1;
          Break;
        end;
      if MecsIsLeadElement(P^) then
        begin
          Inc(P);
          if P^ = #$0000 then
            Break;
        end;
      Inc(P);
    end;
end;

// MecsCharToElementIndexCount
{$IFDEF UNICODE}
function MecsCharToElementIndexCount(const S: RawByteString; Index, Count: Integer): TIndexCount;
{$ELSE}
function MecsCharToElementIndexCount(const S: AnsiString; Index, Count: Integer): TIndexCount;
{$ENDIF}
begin
  result := MecsCharToElementIndexCount(S, Index, Count, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsCharToElementIndexCount(const S: RawByteString; Index, Count: Integer; Codepage: DWORD): TIndexCount;
{$ELSE}
function MecsCharToElementIndexCount(const S: AnsiString; Index, Count: Integer; Codepage: DWORD): TIndexCount;
{$ENDIF}
var
  P, O: PAnsiChar;
  L, J, ChrCnt: Integer;
  LeadBytes: TLeadBytes;
begin
  result.StartIndex := Index;
  result.EndIndex   := Index + Count - 1;
  result.Count      := Count;
  L := System.Length(S);
  if L = 0 then
    Exit;
  if CodePage = CP_UTF8 then
    begin
      result.StartIndex := 0;
      result.EndIndex   := 0;
      result.Count      := 0;
      ChrCnt := 0;
      P := PAnsiChar(S);
      O := P;
      while (P^ <> #$00) and (result.EndIndex = 0) and ((ChrCnt - Index) < Count) do
        begin
          if (Byte(P^) and $80) = $00 then
            begin
              Inc(ChrCnt);
              if ChrCnt = Index then
                result.StartIndex := P - O + 1;
              if ChrCnt = (Index + Count - 1) then
                result.EndIndex := P - O + 1;
            end
          else if (Byte(P^) and $C0) = $C0 then
            begin
              Inc(ChrCnt);
              if ChrCnt = Index then
                result.StartIndex := P - O + 1;
              if ChrCnt = (Index + Count - 1) then
                begin
                  for J:=2 to 6 do
                    begin
                      if (Byte(P^) and ($80 shr J)) = 0 then
                        begin
                          result.EndIndex := P - O + J;
                          Break;
                        end;
                    end;
                end;
            end;
          Inc(P);
        end;
      if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
        result.EndIndex := L;
      result.Count := result.EndIndex - result.StartIndex + 1;
    end
  else
    begin
      LeadBytes := MecsGetLeadBytes(CodePage);
      if LeadBytes = [] then
        begin
          result.StartIndex := Index;
          result.EndIndex   := Index + Count - 1;
          result.Count      := Count;
        end
      else
        begin
          result.StartIndex := 0;
          result.EndIndex   := 0;
          result.Count      := 0;
          ChrCnt := 0;
          P := PAnsiChar(S);
          O := P;
          while (P^ <> #$00) and (result.EndIndex = 0) and ((ChrCnt - Index) < Count) do
            begin
              Inc(ChrCnt);
              if ChrCnt = Index then
                result.StartIndex := P - O + 1;
              if P^ in LeadBytes then
                begin
                  if ChrCnt = (Index + Count - 1) then
                    result.EndIndex := P - O + 2;
                  Inc(P);
                  if P^ = #$00 then
                    Break;
                end
              else if ChrCnt = (Index + Count - 1) then
                result.EndIndex := P - O + 1;
              Inc(P);
            end;
          if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
            result.EndIndex := L;
          result.Count := result.EndIndex - result.StartIndex + 1;
        end;
    end;
end;

{$IFDEF UNICODE}
function MecsCharToElementIndexCount(const S: UnicodeString; Index, Count: Integer): TIndexCount;
var
  P, O: PUnicodeChar;
  L, ChrCnt: Integer;
begin
  result.StartIndex := Index;
  result.EndIndex   := Index + Count - 1;
  result.Count      := Count;
  L := System.Length(S);
  if L = 0 then
    Exit;
  result.StartIndex := 0;
  result.EndIndex   := 0;
  result.Count      := 0;
  ChrCnt := 0;
  P := PUnicodeChar(S);
  O := P;
  while (P^ <> #$0000) and (result.EndIndex = 0) and ((ChrCnt - Index) < Count) do
    begin
      Inc(ChrCnt);
      if ChrCnt = Index then
        result.StartIndex := P - O + 1;
      if MecsIsLeadElement(P^) then
        begin
          if ChrCnt = (Index + Count - 1) then
            result.EndIndex := P - O + 2;
          Inc(P);
          if P^ = #$0000 then
            Break;
        end
      else if ChrCnt = (Index + Count - 1) then
        result.EndIndex := P - O + 1;
      Inc(P);
    end;
  if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
    result.EndIndex := L;
  result.Count := result.EndIndex - result.StartIndex + 1;
end;
{$ENDIF}

function MecsCharToElementIndexCount(const S: WideString; Index, Count: Integer): TIndexCount;
var
  P, O: PWideChar;
  L, ChrCnt: Integer;
begin
  result.StartIndex := Index;
  result.EndIndex   := Index + Count - 1;
  result.Count      := Count;
  L := System.Length(S);
  if L = 0 then
    Exit;
  result.StartIndex := 0;
  result.EndIndex   := 0;
  result.Count      := 0;
  ChrCnt := 0;
  P := PWideChar(S);
  O := P;
  while (P^ <> #$0000) and (result.EndIndex = 0) and ((ChrCnt - Index) < Count) do
    begin
      Inc(ChrCnt);
      if ChrCnt = Index then
        result.StartIndex := P - O + 1;
      if MecsIsLeadElement(P^) then
        begin
          if ChrCnt = (Index + Count - 1) then
            result.EndIndex := P - O + 2;
          Inc(P);
          if P^ = #$0000 then
            Break;
        end
      else if ChrCnt = (Index + Count - 1) then
        result.EndIndex := P - O + 1;
      Inc(P);
    end;
  if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
    result.EndIndex := L;
  result.Count := result.EndIndex - result.StartIndex + 1;
end;


// MecsCharLength
{$IFDEF UNICODE}
function MecsCharLength(const S: RawByteString; Index: Integer): Integer;
{$ELSE}
function MecsCharLength(const S: AnsiString; Index: Integer): Integer;
{$ENDIF}
begin
  result := MecsCharLength(S, Index, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsCharLength(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsCharLength(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer;
{$ENDIF}
var
  Idx: Integer;
  J: Byte;
begin
  result := 1;
  if S = '' then
    Exit;
  Idx := MecsCharToElementIndex(S, Index, Codepage);
  if Idx = 0 then
    Exit;
  if Codepage = CP_UTF8 then
    begin
      if (Byte(S[Idx]) and $C0) = $C0 then
        begin
          for J:=2 to 6 do
            begin
              if (Byte(S[Idx]) and ($80 shr J)) = 0 then
                begin
                  result := J;
                  Break;
                end;
            end;
        end;
    end
  else
    begin
      if MecsIsLeadElement(S[Idx], Codepage) then
        result := 2;
    end;
end;

{$IFDEF UNICODE}
function MecsCharLength(const S: UnicodeString; Index: Integer): Integer;
begin
  result := 1;
  if S = '' then
    Exit;
  if Index <= 0 then
    Exit;
  if MecsIsLeadElement(S[Index]) then
    result := 2;
end;
{$ENDIF}

function MecsCharLength(const S: WideString; Index: Integer): Integer;
begin
  result := 1;
  if S = '' then
    Exit;
  if Index <= 0 then
    Exit;
  if MecsIsLeadElement(S[Index]) then
    result := 2;
end;

// MecsCharToElementLen
{$IFDEF UNICODE}
function MecsCharToElementLen(const S: RawByteString; MaxLen: Integer): Integer;
{$ELSE}
function MecsCharToElementLen(const S: AnsiString; MaxLen: Integer): Integer;
{$ENDIF}
begin
  result := MecsCharToElementLen(S, MaxLen, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsCharToElementLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsCharToElementLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ENDIF}
  {$IFDEF UNICODE}
  function _MecsCharToElementLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer;
  {$ELSE}
  function _MecsCharToElementLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer;
  {$ENDIF}
  var
    P, O: PAnsiChar;
    L, ChrCnt: Integer;
    J: Byte;
    LeadBytes: TLeadBytes;
  begin
    result := 0;
    L := System.Length(S);
    if L = 0 then
      Exit;
    if CodePage = CP_UTF8 then
      begin
        ChrCnt := 0;
        P := PAnsiChar(S);
        O := P;
        while (P^ <> #$00) and (result = 0) do
          begin
            if (Byte(P^) and $80) = $00 then
              begin
                Inc(ChrCnt);
                if ChrCnt = MaxLen then
                  begin
                    result := P - O + 1;
                    Exit;
                  end;
              end
            else if (Byte(P^) and $C0) = $C0 then
              begin
                Inc(ChrCnt);
                if ChrCnt = MaxLen then
                  begin
                    for J:=2 to 6 do
                      begin
                        if (Byte(P^) and ($80 shr J)) = 0 then
                          begin
                            result := P - O + J;
                            Break;
                          end;
                      end;
                    Exit;
                  end;
              end;
            Inc(P);
          end;
      end
    else
      begin
        LeadBytes := MecsGetLeadBytes(CodePage);
        if LeadBytes = [] then
          result := MaxLen
        else
          begin
            ChrCnt := 0;
            P := PAnsiChar(S);
            O := P;
            while (P^ <> #$00) and (result = 0) do
              begin
                Inc(ChrCnt);
                if ChrCnt = MaxLen then
                  result := P - O + 1;
                if P^ in LeadBytes then
                  begin
                    Inc(P);
                    if ChrCnt = MaxLen then
                      begin
                        result := P - O + 1;
                        Break;
                      end;
                    if P^ = #$00 then
                      Break;
                  end;
                Inc(P);
              end;
          end;
      end;
  end;
begin
  result := _MecsCharToElementLen(S, MaxLen, CodePage);
  if result < MaxLen then
    result := System.Length(S);
end;

{$IFDEF UNICODE}
function MecsCharToElementLen(const S: UnicodeString; MaxLen: Integer): Integer;
begin
  result := MecsCharToElementIndex(S, MaxLen + 1) - 1;
  if result < MaxLen then
    result := System.Length(S);
end;
{$ENDIF}

function MecsCharToElementLen(const S: WideString; MaxLen: Integer): Integer;
begin
  result := MecsCharToElementIndex(S, MaxLen + 1) - 1;
  if result < MaxLen then
    result := System.Length(S);
end;

// MecsCopy
{$IFDEF UNICODE}
function MecsCopy(const S: RawByteString; Index, Count: Integer): RawByteString;
{$ELSE}
function MecsCopy(const S: AnsiString; Index, Count: Integer): AnsiString;
{$ENDIF}
begin
  result := MecsCopy(S, Index, Count, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsCopy(const S: RawByteString; Index, Count: Integer; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsCopy(const S: AnsiString; Index, Count: Integer; Codepage: DWORD): AnsiString;
{$ENDIF}
var
  AIC: TIndexCount;
begin
  AIC := MecsCharToElementIndexCount(S, Index, Count, CodePage);
  if AIC.StartIndex > 0 then
    result := Copy(S, AIC.StartIndex, AIC.Count)
  else
    result := '';
end;

{$IFDEF UNICODE}
function MecsCopy(const S: UnicodeString; Index, Count: Integer): UnicodeString;
var
  WIC: TIndexCount;
begin
  WIC := MecsCharToElementIndexCount(S, Index, Count);
  if WIC.StartIndex > 0 then
    result := Copy(S, WIC.StartIndex, WIC.Count)
  else
    result := '';
end;
{$ENDIF}

function MecsCopy(const S: WideString; Index, Count: Integer): WideString;
var
  WIC: TIndexCount;
begin
  WIC := MecsCharToElementIndexCount(S, Index, Count);
  if WIC.StartIndex > 0 then
    result := Copy(S, WIC.StartIndex, WIC.Count)
  else
    result := '';
end;

// MecsDelete
{$IFDEF UNICODE}
procedure MecsDelete(var S: RawByteString;  Index, Count: Integer);
{$ELSE}
procedure MecsDelete(var S: AnsiString;  Index, Count: Integer);
{$ENDIF}
begin
  MecsDelete(S, Index, Count, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
procedure MecsDelete(var S: RawByteString; Index, Count: Integer; Codepage: DWORD);
{$ELSE}
procedure MecsDelete(var S: AnsiString; Index, Count: Integer; Codepage: DWORD);
{$ENDIF}
var
  AIC: TIndexCount;
begin
  AIC := MecsCharToElementIndexCount(S, Index, Count, CodePage);
  if AIC.StartIndex > 0 then
    Delete(S, AIC.StartIndex, AIC.Count);
end;

{$IFDEF UNICODE}
procedure MecsDelete(var S: UnicodeString; Index, Count: Integer);
var
  UIC: TIndexCount;
begin
  UIC := MecsCharToElementIndexCount(S, Index, Count);
  if UIC.StartIndex > 0 then
    Delete(S, UIC.StartIndex, UIC.Count);
end;
{$ENDIF}

procedure MecsDelete(var S: WideString; Index, Count: Integer);
var
  WIC: TIndexCount;
begin
  WIC := MecsCharToElementIndexCount(S, Index, Count);
  if WIC.StartIndex > 0 then
    Delete(S, WIC.StartIndex, WIC.Count);
end;

// MecsElementToCharIndex
{$IFDEF UNICODE}
function MecsElementToCharIndex(const S: RawByteString; Index: Integer): Integer;
{$ELSE}
function MecsElementToCharIndex(const S: AnsiString; Index: Integer): Integer;
{$ENDIF}
begin
  result := MecsElementToCharIndex(S, Index, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsElementToCharIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsElementToCharIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer;
{$ENDIF}
var
  P, O: PAnsiChar;
  L, ChrCnt: Integer;
  LeadBytes: TLeadBytes;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  result := 0;
  if CodePage = CP_UTF8 then
    begin
      ChrCnt := 0;
      P := PAnsiChar(S);
      O := P;
      while (P^ <> #$00) and (result = 0) do
        begin
          if (Byte(P^) and $80) = $00 then
            Inc(ChrCnt)
          else if (Byte(P^) and $C0) = $C0 then
            Inc(ChrCnt);
          if Index = (P - O + 1) then
            begin
              result := ChrCnt;
              Break;
            end;
          Inc(P);
        end;
    end
  else
    begin
      LeadBytes := MecsGetLeadBytes(CodePage);
      if LeadBytes = [] then
        result := Index
      else
        begin
          ChrCnt := 0;
          P := PAnsiChar(S);
          O := P;
          while (P^ <> #$00) and (result = 0) do
            begin
              Inc(ChrCnt);
              if Index = (P - O + 1) then
                begin
                  result := ChrCnt;
                  Break;
                end;
              if P^ in LeadBytes then
                begin
                  Inc(P);
                  if P^ = #$00 then
                    Break;
                  if Index = (P - O + 1) then
                    begin
                      result := ChrCnt;
                      Break;
                    end;
                end;
              Inc(P);
            end;
        end;
    end;
end;

{$IFDEF UNICODE}
function MecsElementToCharIndex(const S: UnicodeString; Index: Integer): Integer;
var
  P, O: PUnicodeChar;
  L, ChrCnt: Integer;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  ChrCnt := 0;
  P := PUnicodeChar(S);
  O := P;
  while (P^ <> #$0000) and (result = 0) do
    begin
      Inc(ChrCnt);
      if Index = (P - O + 1) then
        begin
          result := ChrCnt;
          Break;
        end;
      if MecsIsLeadElement(P^) then
        begin
          Inc(P);
          if P^ = #$0000 then
            Break;
          if Index = (P - O + 1) then
            begin
              result := ChrCnt;
              Break;
            end;
        end;
      Inc(P);
    end;
end;
{$ENDIF}

function MecsElementToCharIndex(const S: WideString; Index: Integer): Integer;
var
  P, O: PWideChar;
  L, ChrCnt: Integer;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  ChrCnt := 0;
  P := PWideChar(S);
  O := P;
  while (P^ <> #$0000) and (result = 0) do
    begin
      Inc(ChrCnt);
      if Index = (P - O + 1) then
        begin
          result := ChrCnt;
          Break;
        end;
      if MecsIsLeadElement(P^) then
        begin
          Inc(P);
          if P^ = #$0000 then
            Break;
          if Index = (P - O + 1) then
            begin
              result := ChrCnt;
              Break;
            end;
        end;
      Inc(P);
    end;
end;

// MecsElementToCharLen
{$IFDEF UNICODE}
function MecsElementToCharLen(const S: RawByteString; MaxLen: Integer): Integer;
{$ELSE}
function MecsElementToCharLen(const S: AnsiString; MaxLen: Integer): Integer;
{$ENDIF}
begin
  result := MecsElementToCharLen(S, MaxLen, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsElementToCharLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsElementToCharLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ENDIF}
begin
  result := MecsElementToCharIndex(S, MaxLen, CodePage);
  if result = 0 then
    result := MecsLength(S, CodePage);
end;

{$IFDEF UNICODE}
function MecsElementToCharLen(const S: UnicodeString; MaxLen: Integer): Integer;
begin
  result := MecsElementToCharIndex(S, MaxLen);
  if result = 0 then
    result := MecsLength(S);
end;
{$ENDIF}

function MecsElementToCharLen(const S: WideString; MaxLen: Integer): Integer;
begin
  result := MecsElementToCharIndex(S, MaxLen);
  if result = 0 then
    result := MecsLength(S);
end;

// MecsElementType
{$IFDEF UNICODE}
function MecsElementType(const S: RawByteString; Index: Integer): TElementType;
{$ELSE}
function MecsElementType(const S: AnsiString; Index: Integer): TElementType;
{$ENDIF}
begin
  result := MecsStrElementType(PAnsiChar(S), Index - 1, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsElementType(const S: RawByteString; Index: Integer; Codepage: DWORD): TElementType;
{$ELSE}
function MecsElementType(const S: AnsiString; Index: Integer; Codepage: DWORD): TElementType;
{$ENDIF}
begin
  result := MecsStrElementType(PAnsiChar(S), Index - 1, CodePage);
end;

{$IFDEF UNICODE}
function MecsElementType(const S: UnicodeString; Index: Integer): TElementType;
begin
  result := MecsStrElementType(PUnicodeChar(S), Index - 1);
end;
{$ENDIF}

function MecsElementType(const S: WideString; Index: Integer): TElementType;
begin
  result := MecsStrElementType(PWideChar(S), Index - 1);
end;

// MecsInsert
{$IFDEF UNICODE}
procedure MecsInsert(const Source: RawByteString; var S: RawByteString; Index: Integer);
{$ELSE}
procedure MecsInsert(const Source: AnsiString; var S: AnsiString; Index: Integer);
{$ENDIF}
begin
  MecsInsert(Source, S, Index, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
procedure MecsInsert(const Source: RawByteString; var S: RawByteString; Index: Integer; Codepage: DWORD);
{$ELSE}
procedure MecsInsert(const Source: AnsiString; var S: AnsiString; Index: Integer; Codepage: DWORD);
{$ENDIF}
var
  Idx: Integer;
begin
  Idx := MecsCharToElementIndex(S, Index, CodePage);
  if Idx > 0 then
    Insert(Source, S, Idx)
  else
    S := S + Source;
end;

{$IFDEF UNICODE}
procedure MecsInsert(const Source: UnicodeString; var S: UnicodeString; Index: Integer);
var
  Idx: Integer;
begin
  Idx := MecsCharToElementIndex(S, Index);
  if Idx > 0 then
    Insert(Source, S, Idx)
  else
    S := S + Source;
end;
{$ENDIF}

procedure MecsInsert(const Source: WideString; var S: WideString; Index: Integer);
var
  Idx: Integer;
begin
  Idx := MecsCharToElementIndex(S, Index);
  if Idx > 0 then
    Insert(Source, S, Idx)
  else
    S := S + Source;
end;

// MecsIsFullWidth
{$IFDEF UNICODE}
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer): Boolean;
{$ELSE}
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer): Boolean;
{$ENDIF}
begin
  result := MecsIsFullWidth(S, CharIndex, SysLocale.FarEast, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer; FarEast: Boolean): Boolean;
{$ELSE}
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer; FarEast: Boolean): Boolean;
{$ENDIF}
begin
  result := MecsIsFullWidth(S, CharIndex, FarEast, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer; FarEast: Boolean; Codepage: DWORD): Boolean;
{$ELSE}
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer; FarEast: Boolean; Codepage: DWORD): Boolean;
{$ENDIF}
var
  IC: TIndexCount;
begin
  if CodePage = CP_UTF8 then
    {$IFDEF UNICODE}
    result := MecsIsFullWidth(UTF8ToUnicodeString(S), CharIndex, FarEast)
    {$ELSE}
    result := MecsIsFullWidth(MECSUtils.UTF8ToUTF16(S), CharIndex, FarEast)
    {$ENDIF}
  else
    begin
      IC := MecsCharToElementIndexCount(S, CharIndex, 1, Codepage);
      result := (IC.Count > 1);
    end;
end;

{$IFDEF UNICODE}
function MecsIsFullWidth(const S: UnicodeString; CharIndex: Integer): Boolean;
begin
  result := MecsIsFullWidth(S, CharIndex, SysLocale.FarEast);
end;
{$ENDIF}

{$IFDEF UNICODE}
function MecsIsFullWidth(const S: UnicodeString; CharIndex: Integer; FarEast: Boolean): Boolean;
var
  UCS4: UCS4String;
begin
  result := False;
  UCS4 := UTF16ToUTF32(S);
  if Length(UCS4) < 0 then
    Exit;
  if CharIndex < 1 then
    Exit;
  if CharIndex >= Length(UCS4) then
    Exit;
  case MecsEastAsianWidth(UCS4[CharIndex - 1]) of
    eawWide,
    eawFullwidth:
      result := True;
    eawAmbiguous:
      result := FarEast;
  else
    result := False;
  end;
end;
{$ENDIF}

function MecsIsFullWidth(const S: WideString; CharIndex: Integer): Boolean;
begin
  result := MecsIsFullWidth(S, CharIndex, SysLocale.FarEast);
end;

function MecsIsFullWidth(const S: WideString; CharIndex: Integer; FarEast: Boolean): Boolean;
var
  UCS4: UCS4String;
begin
  result := False;
  UCS4 := UTF16ToUTF32(S);
  if Length(UCS4) < 0 then
    Exit;
  if CharIndex < 1 then
    Exit;
  if CharIndex >= Length(UCS4) then
    Exit;
  case MecsEastAsianWidth(UCS4[CharIndex - 1]) of
    eawWide,
    eawFullwidth:
      result := True;
    eawAmbiguous:
      result := FarEast;
  else
    result := False;
  end;
end;

// MecsIsLeadElement
function MecsIsLeadElement(TestChar: AnsiChar): Boolean;
begin
  { Not recommended }
  result := MecsIsLeadElement(TestChar, DefaultAnsiCodePage);
end;

function MecsIsLeadElement(TestChar: AnsiChar; Codepage: DWORD): Boolean;
var
  LeadBytes: TLeadBytes;
begin
  result := False;
  if TestChar = '' then
    Exit;
  if CodePage = CP_UTF8 then
    result := ((Byte(TestChar) and $C0) = $C0)
  else
    begin
      LeadBytes := MecsGetLeadBytes(CodePage);
      if LeadBytes = [] then
        Exit;
      result := (TestChar in LeadBytes);
    end;
end;

{$IFDEF UNICODE}
function MecsIsLeadElement(TestChar: UnicodeChar): Boolean;
{$ELSE}
function MecsIsLeadElement(TestChar: WideChar): Boolean;
{$ENDIF}
begin
  result := ((WORD(TestChar) shr 8) in [$D8..$DB]);
end;

// MecsIsMECElement
function MecsIsMECElement(TestChar: AnsiChar): Boolean; overload;
begin
  { Not recommended }
  result := MecsIsMECElement(TestChar, DefaultAnsiCodePage);
end;

function MecsIsMECElement(TestChar: AnsiChar; Codepage: DWORD): Boolean; overload;
begin
  result := (MecsElementType(TestChar, 1, Codepage) <> etSingle);
end;

{$IFDEF UNICODE}
function MecsIsMECElement(TestChar: UnicodeChar): Boolean;
{$ELSE}
function MecsIsMECElement(TestChar: WideChar): Boolean;
{$ENDIF}
begin
  result := ((WORD(TestChar) and $D800) = $D800);
end;

// MecsIsTrailElement
function MecsIsTrailElement(TestChar: AnsiChar): Boolean;
begin
  { Not recommended }
  result := MecsIsTrailElement(TestChar, DefaultAnsiCodePage);
end;

function MecsIsTrailElement(TestChar: AnsiChar; Codepage: DWORD): Boolean;
begin
  result := (MecsElementType(TestChar, 1, Codepage) = etTrail);
end;

{$IFDEF UNICODE}
function MecsIsTrailElement(TestChar: UnicodeChar): Boolean;
{$ELSE}
function MecsIsTrailElement(TestChar: WideChar): Boolean;
{$ENDIF}
begin
  result := ((WORD(TestChar) shr 8) in [$DC..$DF]);
end;

// MecsLeftStr
{$IFDEF UNICODE}
function MecsLeftStr(const AText: RawByteString; const ACount: Integer): RawByteString;
{$ELSE}
function MecsLeftStr(const AText: AnsiString; const ACount: Integer): AnsiString;
{$ENDIF}
begin
  result := MecsCopy(AText, 1, ACount, MecsGetCodePage(AText));
end;

{$IFDEF UNICODE}
function MecsLeftStr(const AText: RawByteString; const ACount: Integer; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsLeftStr(const AText: AnsiString; const ACount: Integer; Codepage: DWORD): AnsiString;
{$ENDIF}
begin
  result := MecsCopy(AText, 1, ACount, CodePage);
end;

{$IFDEF UNICODE}
function MecsLeftStr(const AText: UnicodeString; const ACount: Integer): UnicodeString;
begin
  result := MecsCopy(AText, 1, ACount);
end;
{$ENDIF}

function MecsLeftStr(const AText: WideString; const ACount: Integer): WideString;
begin
  result := MecsCopy(AText, 1, ACount);
end;

// MecsLength
{$IFDEF UNICODE}
function MecsLength(const S: RawByteString): Integer;
{$ELSE}
function MecsLength(const S: AnsiString): Integer;
{$ENDIF}
begin
  result := MecsStrLen(PAnsiChar(S), MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsLength(const S: RawByteString; Codepage: DWORD): Integer;
{$ELSE}
function MecsLength(const S: AnsiString; Codepage: DWORD): Integer;
{$ENDIF}
begin
  result := MecsStrLen(PAnsiChar(S), CodePage);
end;

{$IFDEF UNICODE}
function MecsLength(const S: UnicodeString): Integer;
begin
  result := MecsStrLen(PUnicodeChar(S));
end;
{$ENDIF}

function MecsLength(const S: WideString): Integer;
begin
  result := MecsStrLen(PWideChar(S));
end;

// MecsMidStr
{$IFDEF UNICODE}
function MecsMidStr(const AText: RawByteString; const AStart, ACount: Integer): RawByteString;
{$ELSE}
function MecsMidStr(const AText: AnsiString; const AStart, ACount: Integer): AnsiString;
{$ENDIF}
begin
  result := MecsCopy(AText, AStart, ACount);
end;

{$IFDEF UNICODE}
function MecsMidStr(const AText: RawByteString; const AStart, ACount: Integer; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsMidStr(const AText: AnsiString; const AStart, ACount: Integer; Codepage: DWORD): AnsiString;
{$ENDIF}
begin
  result := MecsCopy(AText, AStart, ACount, Codepage);
end;

{$IFDEF UNICODE}
function MecsMidStr(const AText: UnicodeString; const AStart, ACount: Integer): UnicodeString;
begin
  result := MecsCopy(AText, AStart, ACount);
end;
{$ENDIF}

function MecsMidStr(const AText: WideString; const AStart, ACount: Integer): WideString;
begin
  result := MecsCopy(AText, AStart, ACount);
end;

// MecsNextCharIndex
{$IFDEF UNICODE}
function MecsNextCharIndex(const S: RawByteString; Index: Integer): Integer;
{$ELSE}
function MecsNextCharIndex(const S: AnsiString; Index: Integer): Integer;
{$ENDIF}
begin
  result := MecsNextCharIndex(S, Index, MecsGetCodePage(S));
end;

{$IFDEF UNICODE}
function MecsNextCharIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsNextCharIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer;
{$ENDIF}
var
  Idx: Integer;
begin
  Idx := MecsElementToCharIndex(S, Index, CodePage);
  result := MecsCharToElementLen(S, Idx, CodePage) + 1;
end;

{$IFDEF UNICODE}
function MecsNextCharIndex(const S: UnicodeString; Index: Integer): Integer;
var
  Idx: Integer;
begin
  Idx := MecsElementToCharIndex(S, Index);
  result := MecsCharToElementLen(S, Idx) + 1;
end;
{$ENDIF}

function MecsNextCharIndex(const S: WideString; Index: Integer): Integer;
var
  Idx: Integer;
begin
  Idx := MecsElementToCharIndex(S, Index);
  result := MecsCharToElementLen(S, Idx) + 1;
end;

// MecsReverseString
{$IFDEF UNICODE}
function MecsReverseString(const AText: RawByteString): RawByteString;
{$ELSE}
function MecsReverseString(const AText: AnsiString): AnsiString;
{$ENDIF}
begin
  result := MecsReverseString(AText, MecsGetCodePage(AText));
end;

{$IFDEF UNICODE}
function MecsReverseString(const AText: RawByteString; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsReverseString(const AText: AnsiString; Codepage: DWORD): AnsiString;
{$ENDIF}
var
  i: Integer;
begin
  result := '';
  for i:=1 to MecsLength(AText) do
    result := MecsCopy(AText, i, 1) + result;
end;

{$IFDEF UNICODE}
function MecsReverseString(const AText: UnicodeString): UnicodeString;
var
  i: Integer;
begin
  result := '';
  for i:=1 to System.Length(AText) do
    begin
      case MecsElementType(AText, i) of
        etLead:;
        etTrail:
          result := AText[i-1] + AText[i] + result;
      else
        result := AText[i] + result;
      end;
    end;
end;
{$ENDIF}

function MecsReverseString(const AText: WideString): WideString;
var
  i: Integer;
begin
  result := '';
  for i:=1 to System.Length(AText) do
    begin
      case MecsElementType(AText, i) of
        etLead:;
        etTrail:
          result := AText[i-1] + AText[i] + result;
      else
        result := AText[i] + result;
      end;
    end;
end;

// MecsRightStr
{$IFDEF UNICODE}
function MecsRightStr(const AText: RawByteString; const ACount: Integer; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsRightStr(const AText: AnsiString; const ACount: Integer; Codepage: DWORD): AnsiString;
{$ENDIF}
var
  Idx: Integer;
begin
  Idx :=  MecsLength(AText, CodePage);
  if (Idx - ACount + 1) > 0 then
    result := MecsCopy(AText, Idx - ACount + 1, ACount, CodePage)
  else
    result := AText;
end;

{$IFDEF UNICODE}
function MecsRightStr(const AText: RawByteString; const ACount: Integer): RawByteString;
{$ELSE}
function MecsRightStr(const AText: AnsiString; const ACount: Integer): AnsiString;
{$ENDIF}
begin
  result := MecsRightStr(AText, ACount, MecsGetCodePage(AText));
end;

{$IFDEF UNICODE}
function MecsRightStr(const AText: UnicodeString; const ACount: Integer): UnicodeString;
var
  Idx: Integer;
begin
  Idx := MecsLength(AText);
  if (Idx - ACount + 1) > 0 then
    result := MecsCopy(AText, Idx - ACount + 1, ACount)
  else
    result := AText;
end;
{$ENDIF}

function MecsRightStr(const AText: WideString; const ACount: Integer): WideString;
var
  Idx: Integer;
begin
  Idx := MecsLength(AText);
  if (Idx - ACount + 1) > 0 then
    result := MecsCopy(AText, Idx - ACount + 1, ACount)
  else
    result := AText;
end;

// MecsStrCharLength
function MecsStrCharLength(const Str: PAnsiChar): Integer;
begin
  { Not recommended }
  result := MecsStrCharLength(Str, DefaultAnsiCodePage);
end;

function MecsStrCharLength(const Str: PAnsiChar; Codepage: DWORD): Integer;
begin
  if (Str = nil) or (Str = '') then
    result := 0
  else
    result := MecsCharLength(StrPas(Str), 1, Codepage);
end;

{$IFDEF UNICODE}
function MecsStrCharLength(const Str: PUnicodeChar): Integer;
begin
  if (Str = nil) or (Str = '') then
    result := 0
  else
    result := MecsCharLength(Str^, 1);
end;
{$ENDIF}

function MecsStrCharLength(const Str: PWideChar): Integer;
begin
  if (Str = nil) or (Str = '') then
    result := 0
  else
    result := MecsCharLength(Str^, 1);
end;

// MecsStrElementType
function MecsStrElementType(const Str: PAnsiChar; Index: Cardinal): TElementType;
begin
  { Not recommended }
  result := MecsStrElementType(Str, Index, DefaultAnsiCodePage);
end;

function MecsStrElementType(const Str: PAnsiChar; Index: Cardinal; Codepage: DWORD): TElementType;
var
  P: PAnsiChar;
  i: Integer;
  LeadBytes: TLeadBytes;
begin
  result := etSingle;
  if Str = nil then
    Exit;
  if (Index <= 0) or (Index + 1 > SysUtils.StrLen(Str)) then
    Exit;
  P := Str;
  if CodePage = CP_UTF8 then
    begin
      Inc(P, Index);
      if (Byte(P^) and $80) = $00 then
        result := etSingle
      else if (Byte(P^) and $C0) = $C0 then
        result := etLead
      else
        result := etTrail;
    end
  else
    begin
      LeadBytes := MecsGetLeadBytes(CodePage);
      if LeadBytes = [] then
        Exit;
      if (P = nil) or ((P + Index)^ = #$00) then
        Exit;
      if (Index = 0) then
        begin
          if P^ in LeadBytes then
            result := etLead;
        end
      else
        begin
          i := Index - 1;
          while (i >= 0) and ((P + i)^ in LeadBytes) do
            Dec(i);
          if ((Index - Cardinal(i)) mod 2) = 0 then
            result := etTrail
          else if (P + Index)^ in LeadBytes then
            result := etLead;
        end;
    end;
end;

{$IFDEF UNICODE}
function MecsStrElementType(const Str: PUnicodeChar; Index: Cardinal): TElementType;
begin
  result := etSingle;
  if Str = nil then
    Exit;
  if (Index <= 0) or ((Index + 1) > Cardinal(SysUtils.StrLen(PWideChar(Str)))) then
    Exit;
  if MecsIsLeadElement(Str[Index]) then
    result := etLead
  else if MecsIsTrailElement(Str[Index]) then
    result := etTrail;
end;
{$ENDIF}

function MecsStrElementType(const Str: PWideChar; Index: Cardinal): TElementType;
begin
  result := etSingle;
  if Str = nil then
    Exit;
  {$IFDEF UNICODE}
  if (Index <= 0) or ((Index + 1) > Cardinal(SysUtils.StrLen(Str))) then
  {$ELSE}
  if (Index <= 0) or ((Index + 1) > Cardinal(MECSUtils.StrLen(Str))) then
  {$ENDIF}
    Exit;
  if MecsIsLeadElement(Str[Index]) then
    result := etLead
  else if MecsIsTrailElement(Str[Index]) then
    result := etTrail;
end;

// MecsStrLen
function MecsStrLen(const Str: PAnsiChar): Cardinal;
begin
  { Not recommended }
  result := MecsStrLen(Str, DefaultAnsiCodePage);
end;

function MecsStrLen(const Str: PAnsiChar; Codepage: DWORD): Cardinal;
var
  P: PAnsiChar;
  L, ChrCnt: Integer;
  LeadBytes: TLeadBytes;
begin
  result := 0;
  L := SysUtils.StrLen(Str);
  if L = 0 then
    Exit;
  if CodePage = CP_UTF8 then
    begin
      ChrCnt := 0;
      P := Str;
      while P^ <> #$00 do
        begin
          if (Byte(P^) and $80) = $00 then
            Inc(ChrCnt)
          else if (Byte(P^) and $C0) = $C0 then
            Inc(ChrCnt);
          Inc(P);
        end;
    end
  else
    begin
      LeadBytes := MecsGetLeadBytes(CodePage);
      if LeadBytes = [] then
        ChrCnt := L
      else
        begin
          ChrCnt := 0;
          P := Str;
          while P^ <> #$00 do
            begin
              Inc(ChrCnt);
              if P^ in LeadBytes then
                begin
                  Inc(P);
                  if P^ = #$00 then
                    Break;
                end;
              Inc(P);
            end;
        end;
    end;
  result := ChrCnt;
end;

{$IFDEF UNICODE}
function MecsStrLen(const Str: PUnicodeChar): Cardinal;
var
  P: PUnicodeChar;
  ChrCnt: Integer;
begin
  ChrCnt := 0;
  P := Str;
  while P^ <> #$0000 do
    begin
      if MecsIsLeadElement(P^) then
        Inc(ChrCnt)
      else if not MecsIsTrailElement(P^) then
        Inc(ChrCnt);
      Inc(P);
    end;
  result := ChrCnt;
end;
{$ENDIF}

function MecsStrLen(const Str: PWideChar): Cardinal;
var
  P: PWideChar;
  ChrCnt: Integer;
begin
  ChrCnt := 0;
  P := Str;
  while P^ <> #$0000 do
    begin
      if MecsIsLeadElement(P^) then
        Inc(ChrCnt)
      else if not MecsIsTrailElement(P^) then
        Inc(ChrCnt);
      Inc(P);
    end;
  result := ChrCnt;
end;

// MecsStrNextChar
function MecsStrNextChar(const Str: PAnsiChar): PAnsiChar;
begin
  { Not recommended }
  result := MecsStrNextChar(Str, DefaultAnsiCodePage);
end;

function MecsStrNextChar(const Str: PAnsiChar; Codepage: DWORD): PAnsiChar;
begin
  result := str + MecsStrCharLength(str, CodePage);
end;

{$IFDEF UNICODE}
function MecsStrNextChar(const Str: PUnicodeChar): PUnicodeChar;
begin
  result := str + MecsStrCharLength(str);
end;
{$ENDIF}

function MecsStrNextChar(const Str: PWideChar): PWideChar;
begin
  result := str + MecsStrCharLength(str);
end;

// for UnicodeString/WideString (Combining Character Sequence)
// -----------------------------------------------------------------------------

// MecsCCSLength
{$IFDEF UNICODE}
function MecsCCSLength(const S: UnicodeString; Index: Integer): Integer; overload;
var
  IC: TIndexCount;
begin
  IC := MecsCCSToElementIndexCount(S, Index, 1);
  if IC.StartIndex = 0 then
    result := 1
  else
    result := IC.Count;
end;
{$ENDIF}

function MecsCCSLength(const S: WideString; Index: Integer): Integer; overload;
var
  IC: TIndexCount;
begin
  IC := MecsCCSToElementIndexCount(S, Index, 1);
  if IC.StartIndex = 0 then
    result := 1
  else
    result := IC.Count;
end;

// MecsCCSToElementIndex
{$IFDEF UNICODE}
function MecsCCSToElementIndex(const S: UnicodeString; Index: Integer): Integer; overload;
var
  L, ChrCnt: Integer;
  UCS4 : UCS4String;
  i: Integer;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  ChrCnt := 0;
  UCS4 := UTF16toUTF32(S);
  for i:=0 to Length(UCS4) - 1 do
    begin
      if MecsCombiningType(UCS4[i]) = ctBase then
        Inc(ChrCnt);
      if (ChrCnt = Index) then
        begin
          result := MecsCharToElementIndex(S, i + 1);
          break;
        end;
    end;
end;
{$ENDIF}

function MecsCCSToElementIndex(const S: WideString; Index: Integer): Integer; overload;
var
  L, ChrCnt: Integer;
  UCS4 : UCS4String;
  i: Integer;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  ChrCnt := 0;
  UCS4 := UTF16toUTF32(S);
  for i:=0 to Length(UCS4) - 1 do
    begin
      if MecsCombiningType(UCS4[i]) = ctBase then
        Inc(ChrCnt);
      if (ChrCnt = Index) then
        begin
          result := MecsCharToElementIndex(S, i + 1);
          break;
        end;
    end;
end;

// MecsCCSToElementIndexCount
{$IFDEF UNICODE}
function MecsCCSToElementIndexCount(const S: UnicodeString; Index, Count: Integer): TIndexCount; overload;
var
  ChrCnt, L: Integer;
  UCS4 : UCS4String;
  i: Integer;
begin
  result.StartIndex := Index;
  result.EndIndex   := Index + Count - 1;
  result.Count      := Count;
  L := System.Length(S);
  if L = 0 then
    Exit;

  result.StartIndex := 0;
  result.EndIndex   := 0;
  result.Count      := 0;
  ChrCnt := 0;
  UCS4 := UTF16toUTF32(S);
  for i:=0 to Length(UCS4) - 1 do
    begin
      if MecsCombiningType(UCS4[i]) = ctBase then
        Inc(ChrCnt);
      if (ChrCnt = Index) and (result.StartIndex = 0) then
        result.StartIndex := MecsCharToElementIndex(S, i + 1);
      if (ChrCnt = (Index + Count - 1)) and (result.StartIndex > 0) then
        result.EndIndex := MecsCharToElementLen(S, i + 1)
      else if ChrCnt > (Index + Count - 1) then
        break;
    end;
  if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
    result.EndIndex := L;
  result.Count := result.EndIndex - result.StartIndex + 1;
end;

{$ENDIF}
function MecsCCSToElementIndexCount(const S: WideString; Index, Count: Integer): TIndexCount; overload;
var
  ChrCnt, L: Integer;
  UCS4 : UCS4String;
  i: Integer;
begin
  result.StartIndex := Index;
  result.EndIndex   := Index + Count - 1;
  result.Count      := Count;
  L := System.Length(S);
  if L = 0 then
    Exit;

  result.StartIndex := 0;
  result.EndIndex   := 0;
  result.Count      := 0;
  ChrCnt := 0;

  UCS4 := UTF16toUTF32(S);
  for i:=0 to Length(UCS4) - 1 do
    begin
      if MecsCombiningType(UCS4[i]) = ctBase then
        Inc(ChrCnt);
      if (ChrCnt = Index) and (result.StartIndex = 0) then
        result.StartIndex := MecsCharToElementIndex(S, i + 1);
      if (ChrCnt = (Index + Count - 1)) and (result.StartIndex > 0) then
        result.EndIndex := MecsCharToElementLen(S, i + 1)
      else if ChrCnt > (Index + Count - 1) then
        break;
    end;
  if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
    result.EndIndex := L;
  result.Count := result.EndIndex - result.StartIndex + 1;
end;

// MecsCCSToElementLen
{$IFDEF UNICODE}
function MecsCCSToElementLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
begin
  result := MecsCCSToElementIndex(S, MaxLen + 1) - 1;
  if result < MaxLen then
    result := System.Length(S);
end;
{$ENDIF}

function MecsCCSToElementLen(const S: WideString; MaxLen: Integer): Integer; overload;
begin
  result := MecsCCSToElementIndex(S, MaxLen + 1) - 1;
  if result < MaxLen then
    result := System.Length(S);
end;

// MecsElementToCCSIndex
{$IFDEF UNICODE}
function MecsElementToCCSIndex(const S: UnicodeString; Index: Integer): Integer; overload;
var
  L, ChrCnt: Integer;
  UCS4: UCS4String;
  i, Idx: Integer;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  Idx := MecsElementToCharIndex(S, Index);
  ChrCnt := 0;
  UCS4 := UTF16ToUTF32(S);
  for i:=0 to Length(UCS4)-1 do
    begin
      if MecsCombiningType(UCS4[i]) = ctBase then
        Inc(ChrCnt);
      if i = (Idx - 1) then
        begin
          result := ChrCnt;
          Break;
        end;
    end;
end;
{$ENDIF}

function MecsElementToCCSIndex(const S: WideString; Index: Integer): Integer; overload;
var
  L, ChrCnt: Integer;
  UCS4: UCS4String;
  i, Idx: Integer;
begin
  result := 0;
  L := System.Length(S);
  if L = 0 then
    Exit;
  Idx := MecsElementToCharIndex(S, Index);
  ChrCnt := 0;
  UCS4 := UTF16ToUTF32(S);
  for i:=0 to Length(UCS4)-1 do
    begin
      if MecsCombiningType(UCS4[i]) = ctBase then
        Inc(ChrCnt);
      if i = (Idx - 1) then
        begin
          result := ChrCnt;
          Break;
        end;
    end;
end;

// MecsElementToCCSLen
{$IFDEF UNICODE}
function MecsElementToCCSLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
begin
  result := MecsElementToCCSIndex(S, MaxLen);
  if result = 0 then
    result := MecsLengthC(S);
end;
{$ENDIF}

function MecsElementToCCSLen(const S: WideString; MaxLen: Integer): Integer; overload;
begin
  result := MecsElementToCCSIndex(S, MaxLen);
  if result = 0 then
    result := MecsLengthC(S);
end;

// MecsDeleteC
{$IFDEF UNICODE}
procedure MecsDeleteC(var S: UnicodeString; Index, Count: Integer); overload;
var
  AIC: TIndexCount;
begin
  AIC := MecsCCSToElementIndexCount(S, Index, Count);
  if AIC.StartIndex > 0 then
    Delete(S, AIC.StartIndex, AIC.Count);
end;
{$ENDIF}

procedure MecsDeleteC(var S: WideString; Index, Count: Integer); overload;
var
  AIC: TIndexCount;
begin
  AIC := MecsCCSToElementIndexCount(S, Index, Count);
  if AIC.StartIndex > 0 then
    Delete(S, AIC.StartIndex, AIC.Count);
end;

// MecsLengthC
{$IFDEF UNICODE}
function MecsLengthC(const S: UnicodeString): Integer; overload;
var
  Cnt: Integer;
  UCS4 : UCS4String;
  i: Integer;
begin
  Cnt := 0;
  if Length(S) > 0 then
    begin
      UCS4 := UTF16toUTF32(S);
      Cnt := Length(UCS4) - 1;
      for i:=0 to Length(UCS4) - 1 do
        begin
          if MecsCombiningType(UCS4[i]) = ctCombining then
            Dec(Cnt);
        end;
    end;
  result := Cnt;
end;
{$ENDIF}

function MecsLengthC(const S: WideString): Integer; overload;
var
  Cnt: Integer;
  UCS4 : UCS4String;
  i: Integer;
begin
  Cnt := 0;
  if Length(S) > 0 then
    begin
      UCS4 := UTF16toUTF32(S);
      Cnt := Length(UCS4) - 1;
      for i:=0 to Length(UCS4) - 1 do
        begin
          if MecsCombiningType(UCS4[i]) = ctCombining then
            Dec(Cnt);
        end;
    end;
  result := Cnt;
end;

// MecsCopyC
{$IFDEF UNICODE}
function MecsCopyC(const S: UnicodeString; Index, Count: Integer): UnicodeString; overload;
var
  AIC: TIndexCount;
begin
  AIC := MecsCCSToElementIndexCount(S, Index, Count);
  if AIC.StartIndex > 0 then
    result := Copy(S, AIC.StartIndex, AIC.Count)
  else
    result := '';
end;
{$ENDIF}

function MecsCopyC(const S: WideString; Index, Count: Integer): WideString; overload;
var
  AIC: TIndexCount;
begin
  AIC := MecsCCSToElementIndexCount(S, Index, Count);
  if AIC.StartIndex > 0 then
    result := Copy(S, AIC.StartIndex, AIC.Count)
  else
    result := '';
end;

// MecsInsertC
{$IFDEF UNICODE}
procedure MecsInsertC(const Source: UnicodeString; var S: UnicodeString; Index: Integer); overload;
var
  Idx: Integer;
begin
  Idx := MecsCCSToElementIndex(S, Index);
  if Idx > 0 then
    Insert(Source, S, Idx)
  else
    S := S + Source;
end;
{$ENDIF}

procedure MecsInsertC(const Source: WideString; var S: WideString; Index: Integer); overload;
var
  Idx: Integer;
begin
  Idx := MecsCCSToElementIndex(S, Index);
  if Idx > 0 then
    Insert(Source, S, Idx)
  else
    S := S + Source;
end;

// MecsLeftStrC
{$IFDEF UNICODE}
function MecsLeftStrC(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
begin
  result := MecsCopyC(AText, 1, ACount);
end;
{$ENDIF}

function MecsLeftStrC(const AText: WideString; const ACount: Integer): WideString; overload;
begin
  result := MecsCopyC(AText, 1, ACount);
end;

// MecsMidStr
{$IFDEF UNICODE}
function MecsMidStrC(const AText: UnicodeString; const AStart, ACount: Integer): UnicodeString; overload;
begin
  result := MecsCopyC(AText, AStart, ACount);
end;
{$ENDIF}

function MecsMidStrC(const AText: WideString; const AStart, ACount: Integer): WideString; overload;
begin
  result := MecsCopyC(AText, AStart, ACount);
end;

// MecsNextCCSIndex
{$IFDEF UNICODE}
function MecsNextCCSIndex(const S: UnicodeString; Index: Integer): Integer; overload;
var
  Idx: Integer;
begin
  Idx := MecsElementToCCSIndex(S, Index);
  result := MecsCCSToElementLen(S, Idx) + 1;
end;
{$ENDIF}

function MecsNextCCSIndex(const S: WideString; Index: Integer): Integer; overload;
var
  Idx: Integer;
begin
  Idx := MecsElementToCCSIndex(S, Index);
  result := MecsCCSToElementLen(S, Idx) + 1;
end;

// MecsReverseStringC
{$IFDEF UNICODE}
function MecsReverseStringC(const AText: UnicodeString): UnicodeString; overload;
var
  i: Integer;
begin
  result := '';
  for i := MECSLengthC(AText) downto 1 do
    result := result + MecsCopyC(AText, i, 1);
end;
{$ENDIF}

function MecsReverseStringC(const AText: WideString): WideString; overload;
var
  i: Integer;
begin
  result := '';
  for i := MecsLengthC(AText) downto 1 do
    result := result + MecsCopyC(AText, i, 1);
end;

// MecsRightStrC
{$IFDEF UNICODE}
function MecsRightStrC(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
var
  Idx: Integer;
begin
  Idx := MecsLengthC(AText);
  if (Idx - ACount + 1) > 0 then
    result := MecsCopyC(AText, Idx - ACount + 1, ACount)
  else
    result := AText;
end;
{$ENDIF}

function MecsRightStrC(const AText: WideString; const ACount: Integer): WideString; overload;
var
  Idx: Integer;
begin
  Idx := MecsLengthC(AText);
  if (Idx - ACount + 1) > 0 then
    result := MecsCopyC(AText, Idx - ACount + 1, ACount)
  else
    result := AText;
end;

// MecsStrCCSLength
{$IFDEF UNICODE}
function MecsStrCCSLength(const Str: PUnicodeChar): Integer; overload;
begin
  result := MecsCCSLength(StrPas(PWideChar(Str)), 1);
end;
{$ENDIF}

function MecsStrCCSLength(const Str: PWideChar): Integer; overload;
begin
  result := MecsCCSLength(WideString(Str), 1);
end;

// MecsStrLenC
{$IFDEF UNICODE}
function MecsStrLenC(const Str: PUnicodeChar): Cardinal; overload;
begin
  result := MECSLengthC(StrPas(PWideChar(Str)));
end;
{$ENDIF}

function MecsStrLenC(const Str: PWideChar): Cardinal; overload;
begin
  result := MECSLengthC(WideString(Str));
end;

// MecsStrNextCCS
{$IFDEF UNICODE}
function MecsStrNextCCS(const Str: PUnicodeChar): PUnicodeChar; overload;
begin
  result := str + MecsStrCCSLength(str);
end;
{$ENDIF}

function MecsStrNextCCS(const Str: PWideChar): PWideChar; overload;
begin
  result := str + MecsStrCCSLength(str);
end;

// for Convert
// -----------------------------------------------------------------------------

// IsWin95
function IsWin95: Boolean;
begin
  result := (Win32Platform = VER_PLATFORM_WIN32_WINDOWS) and (Win32MinorVersion = 0);
end;

// IsWin2000
function IsWin2000: Boolean;
begin
  result := (Win32Platform = VER_PLATFORM_WIN32_NT) and (Win32MajorVersion = 5) and (Win32MinorVersion = 0);
end;

// AnsiToUTF16
{$IFDEF UNICODE}
function AnsiToUTF16(const AText: RawByteString): WideString;
{$ELSE}
function AnsiToUTF16(const AText: AnsiString): WideString;
{$ENDIF}
begin
  result := AnsiToUTF16(AText, MecsGetCodePage(AText));
end;

{$IFDEF UNICODE}
function AnsiToUTF16(const AText: RawByteString; Codepage: DWORD): WideString;
{$ELSE}
function AnsiToUTF16(const AText: AnsiString; Codepage: DWORD): WideString;
{$ENDIF}
// MultiByteToWideChar
// http://msdn2.microsoft.com/en-us/library/ms776413(VS.85).aspx
// CP_UTF8(CP_UTF7) is *not* supported in Win95.
var
  BufSize: Integer;
  dwFlags: DWORD;
begin
  result := '';
  if Length(AText) = 0 then
    Exit;
{
  case CodePage of
    CP_UTF7,
    CP_UTF8:
      dwFlags := 0;
  else
    dwFlags := MB_PRECOMPOSED;
  end;
}
  dwFlags := 0;
  if IsWin95 and (dwFlags = 0) then
    ConvertMultiByteToUnicode(Codepage, AText, result)
  else
    begin
      BufSize := MultiByteToWideChar(CodePage, dwFlags, PAnsiChar(AText), Length(AText), nil, 0);
      SetLength(result, BufSize);
      MultiByteToWideChar(CodePage, dwFlags, PANsiChar(AText), Length(AText), PWideChar(result), BufSize);
    end;
end;

// AnsiToUTF32
{$IFDEF UNICODE}
function AnsiToUTF32(const AText: RawByteString): UCS4String;
{$ELSE}
function AnsiToUTF32(const AText: AnsiString): UCS4String;
{$ENDIF}
begin
  result := AnsiToUTF32(AText, MecsGetCodePage(AText));
end;

{$IFDEF UNICODE}
function AnsiToUTF32(const AText: RawByteString; Codepage: DWORD): UCS4String;
{$ELSE}
function AnsiToUTF32(const AText: AnsiString; Codepage: DWORD): UCS4String;
{$ENDIF}
begin
  result := UTF16ToUTF32(AnsiToUTF16(AText, CodePage));
end;

// AnsiToUTF8
{$IFDEF UNICODE}
function AnsiToUTF8(const AText: RawByteString): RawByteString;
{$ELSE}
function AnsiToUTF8(const AText: AnsiString): UTF8String;
{$ENDIF}
begin
  result := AnsiToUTF8(AText, MecsGetCodePage(AText));
end;

{$IFDEF UNICODE}
function AnsiToUTF8(const AText: RawByteString; Codepage: DWORD): RawByteString;
{$ELSE}
function AnsiToUTF8(const AText: AnsiString; Codepage: DWORD): UTF8String;
{$ENDIF}
begin
  if CodePage = CP_UTF8 then
    result := AText
  else
    begin
      if IsWin95 then
        {$IFDEF UNICODE}
        ConvertString(Codepage, CP_UTF8, AText, RawByteString(result))
        {$ELSE}
        ConvertString(Codepage, CP_UTF8, AText, AnsiString(result))
        {$ENDIF}
      else
        result := UTF16ToAnsi(AnsiToUTF16(AText, CodePage), CP_UTF8);
    end;
end;

function ConvertINetString(lpdwMode: LPDWORD; dwSrcEncoding: DWORD; dwDstEncoding: DWORD; lpSrcStr: LPCSTR; lpnSrcSize: PInteger; lpDstStr: Pointer; lpnDstSize: PInteger):Hresult;
var
  FP: TFarProc;
  DLLWnd: THandle;
  // http://msdn2.microsoft.com/en-us/library/aa741106(VS.85).aspx
  // Win95 and IE5.5 or later
  _ConvertINetString: function(lpdwMode: LPDWORD; dwSrcEncoding: DWORD; dwDstEncoding: DWORD; lpSrcStr: LPCSTR; lpnSrcSize: PInteger; lpDstStr: Pointer; lpnDstSize: PInteger):Hresult; stdcall;
begin
  result := S_FALSE;
  {$IFDEF UNICODE}
  DLLWnd := LoadLibraryW('mlang.dll');
  {$ELSE}
  DLLWnd := LoadLibraryA('mlang.dll');
  {$ENDIF}
  try
    FP := GetProcAddress(DLLWnd, 'ConvertINetString');
    if FP <> nil then
      begin
        @_ConvertINetString := FP;
        result := _ConvertINetString(lpdwMode, dwSrcEncoding, dwDstEncoding, lpSrcStr, lpnSrcSize, lpDstStr, lpnDstSize);
      end;
  finally
    if DLLWnd > 0 then
      FreeLibrary(DLLWnd);
  end;
end;

// CodePointToUTF16
function CodePointToUTF16(const UCS4: UCS4Char): WideString;
var
  U: UCS4String;
begin
  SetLength(U, 2);
  U[0] := UCS4;
  U[1] := $00000000;
  result := UTF32ToUTF16(U);
end;

// CodePointToUTF8
{$IFDEF UNICODE}
function CodePointToUTF8(const UCS4: UCS4Char): RawByteString;
{$ELSE}
function CodePointToUTF8(const UCS4: UCS4Char): UTF8String;
{$ENDIF}
var
  U: UCS4String;
begin
  SetLength(U, 2);
  U[0] := UCS4;
  U[1] := $00000000;
  result := UTF32ToUTF8(U);
end;

// ConvertString
{$IFDEF UNICODE}
function ConvertString(SrcCodepage, DstCodepage: DWORD; const SrcStr: RawByteString; var DstStr: RawByteString): Boolean;
{$ELSE}
function ConvertString(SrcCodepage, DstCodepage: DWORD; const SrcStr: AnsiString; var DstStr: AnsiString): Boolean;
{$ENDIF}
var
  dwMode: DWORD;
  SrcSize, DstSize: Integer;
begin
  dwMode := 0;
  SetLength(DstStr, Length(SrcStr) * 3);
  SrcSize := Length(SrcStr);
  DstSize := Length(DstStr);
  result := (ConvertINetString(@dwMode, SrcCodepage, DstCodepage, PAnsiChar(SrcStr), @SrcSize, PAnsiChar(DstStr), @DstSize) = S_OK);
  if result then
    SetLength(DstStr, DstSize)
  else
    DstStr := '';
end;

function ConvertINetUnicodeToMultiByte(lpdwMode: LPDWORD; dwEncoding: DWORD; lpSrcStr: LPCWSTR; lpnWideCharCount: PInteger; lpDstStr: LPSTR; lpnMultiCharCount: PInteger):Hresult;
var
  FP: TFarProc;
  DLLWnd: THandle;
  // http://msdn2.microsoft.com/en-us/library/aa741107(VS.85).aspx
  // Win95 and IE5.5 or later
  _ConvertINetUnicodeToMultiByte: function(lpdwMode: LPDWORD; dwEncoding: DWORD; lpSrcStr: LPCWSTR; lpnWideCharCount: PInteger; lpDstStr: LPSTR; lpnMultiCharCount: PInteger):Hresult; stdcall;
begin
  result := S_FALSE;
  {$IFDEF UNICODE}
  DLLWnd := LoadLibraryW('mlang.dll');
  {$ELSE}
  DLLWnd := LoadLibraryA('mlang.dll');
  {$ENDIF}
  try
    FP := GetProcAddress(DLLWnd, 'ConvertINetUnicodeToMultiByte');
    if FP <> nil then
      begin
        @_ConvertINetUnicodeToMultiByte := FP;
        result := _ConvertINetUnicodeToMultiByte(lpdwMode, dwEncoding, lpSrcStr, lpnWideCharCount, lpDstStr, lpnMultiCharCount);
      end;
  finally
    if DLLWnd > 0 then
      FreeLibrary(DLLWnd);
  end;
end;

// ConvertUnicodeToMultiByte
{$IFDEF UNICODE}
function ConvertUnicodeToMultiByte(DstCodepage: DWORD; const SrcStr: WideString; var DstStr: RawByteString): Boolean;
{$ELSE}
function ConvertUnicodeToMultiByte(DstCodepage: DWORD; const SrcStr: WideString; var DstStr: AnsiString): Boolean;
{$ENDIF}
var
  dwMode: DWORD;
  SrcSize, DstSize: Integer;
begin
  dwMode := 0;
  SetLength(DstStr, Length(SrcStr) * 3);
  SrcSize := Length(SrcStr);
  DstSize := Length(DstStr);
  result := (ConvertINetUnicodeToMultiByte(@dwMode, DstCodepage, PWideChar(SrcStr), @SrcSize, PAnsiChar(DstStr), @DstSize) = S_OK);
  if result then
    SetLength(DstStr, DstSize)
  else
    DstStr := '';
end;

function ConvertINetMultiByteToUnicode(lpdwMode: LPDWORD; dwSrcEncoding: DWORD; lpSrcStr: LPCSTR; lpnMultiCharCount: PInteger; lpDstStr: LPWSTR; lpnWideCharCount: PInteger):Hresult;
var
  FP: TFarProc;
  DLLWnd: THandle;
  // http://msdn2.microsoft.com/en-us/library/aa741105(VS.85).aspx
  // Win95 and IE5.5 or later
  _ConvertINetMultiByteToUnicode: function(lpdwMode: LPDWORD; dwSrcEncoding: DWORD; lpSrcStr: LPCSTR; lpnMultiCharCount: PInteger; lpDstStr: LPWSTR; lpnWideCharCount: PInteger):Hresult; stdcall;
begin
  result := S_FALSE;
  {$IFDEF UNICODE}
  DLLWnd := LoadLibraryW('mlang.dll');
  {$ELSE}
  DLLWnd := LoadLibraryA('mlang.dll');
  {$ENDIF}
  try
    FP := GetProcAddress(DLLWnd, 'ConvertINetMultiByteToUnicode');
    if FP <> nil then
      begin
        @_ConvertINetMultiByteToUnicode := FP;
        result := _ConvertINetMultiByteToUnicode(lpdwMode, dwSrcEncoding, lpSrcStr, lpnMultiCharCount, lpDstStr, lpnWideCharCount);
      end;
  finally
    if DLLWnd > 0 then
      FreeLibrary(DLLWnd);
  end;
end;

// ConvertMultiByteToUnicode
{$IFDEF UNICODE}
function ConvertMultiByteToUnicode(SrcCodepage: DWORD; const SrcStr: RawByteString; var DstStr: WideString): Boolean;
{$ELSE}
function ConvertMultiByteToUnicode(SrcCodepage: DWORD; const SrcStr: AnsiString; var DstStr: WideString): Boolean;
{$ENDIF}
var
  dwMode: DWORD;
  SrcSize, DstSize: Integer;
begin
  dwMode := 0;
  SetLength(DstStr, Length(SrcStr) * 3);
  SrcSize := Length(SrcStr);
  DstSize := Length(DstStr);
  result := (ConvertINetMultiByteToUnicode(@dwMode, SrcCodepage, PAnsiChar(SrcStr), @SrcSize, PWideChar(DstStr), @DstSize) = S_OK);
  if result then
    SetLength(DstStr, DstSize)
  else
    DstStr := '';
end;

// UTF32ToAnsi
{$IFDEF UNICODE}
function UTF32ToAnsi(const UCS4Text: UCS4String): RawByteString;
{$ELSE}
function UTF32ToAnsi(const UCS4Text: UCS4String): AnsiString;
{$ENDIF}
begin
  result := UTF32ToAnsi(UCS4Text, DefaultAnsiCodePage);
end;

{$IFDEF UNICODE}
function UTF32ToAnsi(const UCS4Text: UCS4String; Codepage: DWORD): RawByteString;
{$ELSE}
function UTF32ToAnsi(const UCS4Text: UCS4String; Codepage: DWORD): AnsiString;
{$ENDIF}
begin
  result := UTF16ToAnsi(UTF32ToUTF16(UCS4Text), CodePage);
end;

// UTF32ToUTF16
function UTF32ToUTF16(const UCS4Text: UCS4String): WideString;
var
  Counter,
  i: Integer;
  U: UCS4Char;
  PU, OU: PUCS4Char;
  PW: PWideChar;
begin
  result := '';
  if Length(UCS4Text) = 0 then
    Exit;

  Counter := 0;
  PU := PUCS4Char(UCS4Text);
  OU := PU;
  for i := 0 to Length(UCS4Text) - 1 do
    begin
      if PU^ < $10000 then
        Inc(Counter)
      else
        Inc(Counter, 2);
      Inc(PU);
    end;
  SetLength(result, Counter);

  PU := OU;
  PW := PWideChar(result);
  for i := 0 to Length(UCS4Text) - 1 do
    begin
      if PU^ < $10000 then
        begin
          PW^ := WideChar(PU^);
          Inc(PW);
        end
      else
        begin
          // Surrogate Pair
          U := PU^ - $10000;
          PW^ := WideChar((U shr 10) or $D800);
          Inc(PW);
          PW^ := WideChar((U and $000003FF) or $DC00);
          Inc(PW);
        end;
      Inc(PU);
    end;
end;

// UTF32ToUTF8
{$IFDEF UNICODE}
function UTF32ToUTF8(const UCS4Text: UCS4String): RawByteString;
{$ELSE}
function UTF32ToUTF8(const UCS4Text: UCS4String): UTF8String;
{$ENDIF}
var
  Counter: Integer;
  SrcLen, SrcLen2, i: Integer;
  J, CharLen: Byte;
  U: UCS4Char;
  PU, PO: PUCS4Char;
  PR: PUTF8Char;
begin
  result := '';
  SrcLen := Length(UCS4Text);
  SrcLen2 := SrcLen - 1;
  if SrcLen = 0 then
    Exit;

  Counter := 0;
  PU := PUCS4Char(UCS4Text);
  PO := PU;
  for i:=0 to SrcLen2-1 do
    begin
      U := PU^;
      case U of
        $00..$7F:
          CharLen := 1;
        $80..$7FF:
          CharLen := 2;
        $800..$FFFF:
          CharLen := 3;
        $10000..$1FFFFF:
          CharLen := 4;
        $200000..$3FFFFFF:
          CharLen := 5;
        $4000000..$7FFFFFFF:
          CharLen := 6;
      else
        CharLen := 0;
      end;
      if CharLen > 0 then
        Inc(Counter, CharLen);
      Inc(PU);
    end;
  SetLength(result, Counter);

  PU := PO;
  PR := PUTF8Char(result);
  for i:=0 to SrcLen2-1 do
    begin
      U := PU^;
      case U of
        $00..$7F:
          begin
            PR^ := UTF8Char(Byte(U));
            Inc(PR);
            CharLen := 1;
          end;
        $80..$7FF:
          CharLen := 2;
        $800..$FFFF:
          CharLen := 3;
        $10000..$1FFFFF:
          CharLen := 4;
        $200000..$3FFFFFF:
          CharLen := 5;
        $4000000..$7FFFFFFF:
          CharLen := 6;
      else
        CharLen := 0;
      end;
      if CharLen >= 2 then
        begin
          Inc(PR, CharLen - 1);
          for J := CharLen downto 2 do
            begin
              PR^ := UTF8Char(Byte((U and $3F) or $80));
              U := U shr 6;
              Dec(PR);
            end;
          PR^ := UTF8Char(Byte(U or ($FE shl (7 - CharLen))));
          Inc(PR, CharLen);
        end;
      Inc(PU);
    end;
end;

// UTF16ToAnsi
{$IFDEF UNICODE}
function UTF16ToAnsi(const WText: WideString): RawByteString;
{$ELSE}
function UTF16ToAnsi(const WText: WideString): AnsiString;
{$ENDIF}
begin
  result := UTF16ToAnsi(WText, DefaultAnsiCodePage);
end;

{$IFDEF UNICODE}
function UTF16ToAnsi(const WText: WideString; Codepage: DWORD): RawByteString;
{$ELSE}
function UTF16ToAnsi(const WText: WideString; Codepage: DWORD): AnsiString;
{$ENDIF}
// WideCharToMultiByte
// http://msdn2.microsoft.com/en-us/library/ms776420(VS.85).aspx
// CP_UTF8(CP_UTF7) is *not* supported in Win95.
var
  BufSize: Integer;
  dwFlags: DWORD;
  dCodePage: WORD;
  NeedFix: Boolean;
begin
  {$IFDEF UNICODE}
  dCodePage := StringCodePage(result);
  {$ENDIF}
  result := '';
  if Length(WText) = 0 then
    Exit;

{  case CodePage of
    CP_UTF7,
    CP_UTF8:
      dwFlags := 0;
  else
    dwFlags := WC_COMPOSITECHECK;
  end;
}
  dwFlags := 0;
  if IsWin95 and (dwFlags = 0) then
    ConvertUnicodeToMultiByte(Codepage, WText, result)
  else
    begin
      BufSize := 0;
      NeedFix := False;
      if IsWin2000 then
        case Codepage of
          50220,
          50221,
          50222:
            NeedFix := True;
        end;
      if not NeedFix then
        begin
          BufSize := WideCharToMultiByte(CodePage, dwFlags, PWideChar(WText), -1 , nil, 0, nil, nil);
          NeedFix := (BufSize = 0);
        end;
      if NeedFix then
        begin
          BufSize := Length(WText) * 3;
          SetLength(result, BufSize);
          BufSize := WideCharToMultiByte(CodePage, dwFlags, PWideChar(WText), -1 , PAnsiChar(result), BufSize, nil, nil);
          SetLength(result, BufSize);
        end
      else
        begin
          SetLength(result, BufSize);
          WideCharToMultiByte(CodePage, dwFlags, PWideChar(WText), -1 , PAnsiChar(result), BufSize, nil, nil);
        end;

      {$IFDEF UNICODE}
      case CodePage of
        CP_UTF7,
        CP_UTF8:
          SetCodePage(result, CodePage, False);
      end;
      if CodePage <> dCodePage then
        SetCodePage(result, dCodePage, True);
      {$ENDIF}
    end;
end;

// UTF16ToUTF32
function UTF16ToUTF32(const WText: WideString): UCS4String;
var
  Counter,
  i: Integer;
  PW, OW: PWideChar;
  PR: PUCS4Char;
  SrcLen: Integer;
  SrcLen2: Integer;
begin
  result := nil;
  SrcLen := Length(WText);
  SrcLen2 := SrcLen-1;
  if SrcLen = 0 then
    Exit;

  Counter := 0;
  PW := PWideChar(WText);
  OW := PW;
  for i := 0 to SrcLen2 do
    begin
      if MecsIsLeadElement(PW^) then
        begin
          // Surrogate Pair
          if MecsIsTrailElement((PW + 1)^) then
            Inc(Counter);
        end
      else if not MecsIsTrailElement(PW^) then
        Inc(Counter);
      Inc(PW);
    end;
  SetLength(result, Counter + 1);

  PW := OW;
  PR := PUCS4Char(result);
  for i := 0 to SrcLen2 do
    begin
      if MecsIsLeadElement(PW^) then
        begin
          // Surrogate Pair
          if MecsIsTrailElement((PW + 1)^) then
            begin
              PR^ := ((WORD(PW^)       and $03FF) shl 10) +
                     ((WORD((PW + 1)^) and $03FF) + $10000);
              Inc(PR);
            end;
        end
      else if not MecsIsTrailElement(PW^) then
        begin
          PR^ := UCS4Char(PW^);
          Inc(PR);
        end;
      Inc(PW);
    end;
  PR^ := 0;
end;

// UTF16ToUTF8
{$IFDEF UNICODE}
function UTF16ToUTF8(const WText: WideString): RawByteString;
{$ELSE}
function UTF16ToUTF8(const WText: WideString): UTF8String;
{$ENDIF}
begin
  if IsWin95 then
    {$IFDEF UNICODE}
    ConvertUnicodeToMultiByte(CP_UTF8, WText, RawByteString(result))
    {$ELSE}
    ConvertUnicodeToMultiByte(CP_UTF8, WText, AnsiString(result))
    {$ENDIF}
  else
    result := UTF16ToAnsi(WText, CP_UTF8);
end;

// UTF8ToAnsi
{$IFDEF UNICODE}
function UTF8ToAnsi(const UCF8Text: RawByteString): RawByteString;
{$ELSE}
function UTF8ToAnsi(const UCF8Text: UTF8String): AnsiString;
{$ENDIF}
begin
  result := UTF8ToAnsi(UCF8Text, DefaultAnsiCodePage);
end;

{$IFDEF UNICODE}
function UTF8ToAnsi(const UCF8Text: RawByteString; Codepage: DWORD): RawByteString;
{$ELSE}
function UTF8ToAnsi(const UCF8Text: UTF8String; Codepage: DWORD): AnsiString;
{$ENDIF}
begin
  if CodePage = CP_UTF8 then
    result := UCF8Text
  else
    begin
      if IsWin95 then
        ConvertString(CP_UTF8, Codepage, UCF8Text, result)
      else
        result := UTF16ToAnsi(AnsiToUTF16(UCF8Text, CP_UTF8), CodePage);
    end;
end;

// UTF8ToUTF16
{$IFDEF UNICODE}
function UTF8ToUTF16(const UCF8Text: RawByteString): WideString;
{$ELSE}
function UTF8ToUTF16(const UCF8Text: UTF8String): WideString;
{$ENDIF}
begin
  if IsWin95 then
    ConvertMultiByteToUnicode(CP_UTF8, UCF8Text, result)
  else
    result := AnsiToUTF16(UCF8Text, CP_UTF8);
end;

// UTF8ToUTF32
{$IFDEF UNICODE}
function UTF8ToUTF32(const UCF8Text: RawByteString): UCS4String;
{$ELSE}
function UTF8ToUTF32(const UCF8Text: UTF8String): UCS4String;
{$ENDIF}
var
  P, O: PUTF8Char;
  J, CharLen: Byte;
  Counter, SrcLen: Integer;
  U: UCS4Char;
  PU: PUCS4Char;
begin
  result := nil;
  SrcLen := Length(UCF8Text);
  if SrcLen = 0 then
    Exit;

  Counter := 0;
  P := PUTF8Char(UCF8Text);
  O := P;
  while (P - O) < SrcLen do
    begin
      if (Byte(P^) and $80) = $00 then
        Inc(Counter)
      else if(Byte(P^) and $C0) = $C0 then
        begin
          CharLen := 0;
          for J := 2 to 6 do
            begin
              if (Byte(P^) and ($80 shr J)) = 0 then
                begin
                  CharLen := J;
                  Break;
                end;
            end;
          if (P - O) <= (SrcLen - CharLen) then
            begin
              for J := 2 to CharLen do
                Inc(P);
              Inc(Counter);
            end;
        end;
      Inc(P);
    end;
  SetLength(result, Counter + 1);

  P := O;
  PU := PUCS4Char(result);
  while (P - O) < SrcLen do
    begin
      if (Byte(P^) and $80) = $00 then
        begin
          PU^ := Byte(P^);
          Inc(PU);
        end
      else if(Byte(P^) and $C0) = $C0 then
        begin
          CharLen := 0;
          for J := 2 to 6 do
            begin
              if (Byte(P^) and ($80 shr J)) = 0 then
                begin
                  CharLen := J;
                  Break;
                end;
            end;
          if (P - O) <= (SrcLen - CharLen) then
            begin
              U := Byte((Byte(P^) shl (CharLen + 1)));
              U := U shr (CharLen + 1);
              for J := 2 to CharLen do
                begin
                  Inc(P);
                  U := (U shl 6);
                  U := U or (Byte(P^) and $3F);
                end;
              PU^ := U;
              Inc(PU);
            end;
        end;
      Inc(P);
    end;
  PU^ := $00000000;
end;

initialization
  DefaultAnsiCodePage := GetACP;
  DefaultOEMCodePage  := GetOEMCP;
  DefaultLCID         := GetUserDefaultLCID;
end.


读完这篇文章后,您心情如何?
0
0
0
0
0
0
0
0
本文网址:
上一篇:delph HASH类