UTF 8解码
Always use UTF8 or unicode strings if possible
Java and dotNet coders don't have to do anything special but "native language" coders must do few extra steps to overcome unicode problems.
Here is widechar := URLDecodeUTF8(ansichar) conversion method and example UI application. I have taken inspiration from Indy urldecode function and converted it to widechar enabled function. Indy urldecode support only string and legacy charset encoded strings.
Here is full sources and example UI exe to try it out. I have created dll library because I need to call it from c++ program. You can copypaste code to suit your needs. Test application uses TntUnicode components, see reference image.
orcoredll.zip sources and exe
dlltest.png reference image
Convert UTF8 encoded ansi string to widestring:
unit orcoredll_unit1;
interface
// unit's public functions. use "C-style" stdcall stack
// for better compatibility to other applications.
function WideStringToAnsiString(const wc: PWideChar; var buf: PAnsiChar): integer; stdcall;
function URLDecodeUTF8(const s: PAnsiChar; var buf: PWideChar): integer; stdcall;
implementation
uses SysUtils;
{ Convert widechar str to ansichar str }
function WideStringToAnsiString(const wc: PWideChar; var buf: PAnsiChar): integer; stdcall;
var
sAnsi: AnsiString;
begin
sAnsi := WideCharToString(wc);
buf := PAnsiChar(sAnsi);
Result := Length(sAnsi);
end;
{ decode utf8 encoded str to widechar str }
function URLDecodeUTF8(const s: PAnsiChar; var buf: PWideChar): integer; stdcall;
var
sAnsi: String; // normal ansi string
sUtf8: String; // utf8-bytes string
sWide: WideString; // unicode string
i,utf8Pos: integer;
ESC: string[2];
CharCode: integer;
c: char;
begin
sAnsi := s; // null-terminated str to pascal str
SetLength(sUtf8, Length(sAnsi));
// Convert URLEncoded str to utf8 str,
// string must have been encoded with utf8 charset.
// + = space
// %2A = *
// %C3%84 = Ä (A with diaeresis)
i := 1;
utf8Pos := 1;
while (i <= Length(sAnsi)) do begin
if (sAnsi[i] <> '%') then begin
if (sAnsi[i] = '+') then begin
c := ' ';
end else begin
c := sAnsi[i];
end;
//sUtf8 := sUtf8 + c;
sUtf8[utf8Pos] := c;
Inc(utf8Pos);
end else begin
Inc(i); // skip the % char
ESC := Copy(sAnsi, i, 2); // Copy the escape code
Inc(i, 1); // skip ESC, another +1 at end of loop
try
CharCode := StrToInt('$' + ESC);
if (CharCode > 0) and (CharCode < 256) then begin
c := Char(CharCode);
//sUtf8 := sUtf8 + c;
sUtf8[utf8Pos] := c;
Inc(utf8Pos);
end;
except end;
end;
Inc(i);
end;
SetLength(sUtf8, utf8Pos-1);
sWide := UTF8Decode(sUtf8); // utf8 string to unicode
buf := PWideChar(sWide);
Result := Length(sWide);
end;
end.
---------------------------------------
以下代码在DELPHI 7上调试通过,主要使用了api函数中MultiByteToWidechar
function UnicodeEncode(Str:string;CodePage:integer):WideString;
var
Len:integer;
begin
Len:=Length(Str)+1;
SetLength(Result,Len);
Len:=MultiByteToWideChar(CodePage,0,PChar(Str),-1,PWideChar(Result),Len);
SetLength(Result,Len-1); //end is #0
end;
function UnicodeDecode(Str:WideString;CodePage:integer):string;
var
Len:integer;
begin
Len:=Length(Str)*2+1; //one for #0
SetLength(Result,Len);
Len:=WideCharToMultiByte(CodePage,0,PWideChar(Str),-1,PChar(Result),Len,nil,nil);
SetLength(Result,Len-1);
end;
function Gb2Big5(Str:string):string;
begin
SetLength(Result,Length(Str));
LCMapString(GetUserDefaultLCID,LCMAP_TRADITIONAL_CHINESE,
PChar(Str),Length(Str),
PChar(Result),Length(Result));
Result:=UnicodeDecode(UnicodeEncode(Result,936),950);
end;
function Big52Gb(Str:string):string;
begin
Str:=UnicodeDecode(UnicodeEncode(Str,950),936);
SetLength(Result,Length(Str));
LCMapString(GetUserDefaultLCID,LCMAP_SIMPLIFIED_CHINESE,
PChar(Str),Length(Str),
PChar(Result),Length(Result));
end;
关键使用了UnicodeToUtf8这个函数
function Utf8Encode(const WS: WideString): UTF8String;
var
L: Integer;
Temp: UTF8String;
begin
Result := '';
if WS = '' then Exit;
SetLength(Temp, Length(WS) * 3); // SetLength includes space for null terminator
L := UnicodeToUtf8(PChar(Temp), Length(Temp)+1, PWideChar(WS), Length(WS));
if L > 0 then
SetLength(Temp, L-1)
else
Temp := '';
Result := Temp;
end; -
=======================
方法二:
后来想到了用Unicode编码,但该死的Delphi控件竟然不支持Unicode,后来在网上找到了Tnt控件,可以支持Unicode不过而项目已经差不多快好了,要大规模换控件是不可能的,就想到读一下源代码,看看Tnt控件是如何做到的。读完之后一阵绝望,Tnt控件几乎全用W结尾的API,连创建窗体都是用CreateWindowExW,那还有什么好话说呢,自己重做就不如全换Tnt控件
----------------------
Delphi: 将unicode码转换为汉字
Night @ 2004-12-01 21:12
function UnicodeToAnsi(SubUnicode: string):string; //将unicode码转换为汉字
var a:array[0..500] of char;
s1,s2:char;
substr1,substr2,s:string;
str:string;
i:integer;
begin
if length(SubUnicode) mod 4 = 0 then
Begin
str:='';
for i:=1 to length(SubUnicode) div 4 do
Begin
s:='';
substr1:=copy(SubUnicode,i*4-3,2);
substr2:=copy(SubUnicode,i*4-1,2);
s1:=chr(hextoint(substr1));
s2:=chr(hextoint(substr2));
s:=s+s2+s1;
strpcopy(a,s);
str:=str+copy(widechartostring(@(a[0])),1,2);
end;
result:=str;
end;
end;
function HexToInt(hex:string):cardinal;
const cHex='0123456789ABCDEF';
var mult,i,loop:integer;
begin
result:=0;
mult:=1;
for loop:=length(hex)downto 1 do
begin
i:=pos(hex[loop],cHex)-1;
if (i<0) then i:=0;
inc(result,(i*mult));
mult:=mult*16;
end;
end;