字符串处理单元

Tags: /超级猛料/Friends.网友专栏/zswang/函数大全/ Date Created:

(*//

标题:字符串处理单元

说明:针对字符处理的函数进行封装

日期:2004-07-03

设计:Zswang

//*)

//*******Begin 修改日志*******//

//2004-07-03 Zswang No.1 建立

//------------------------------------------------------------------------------

//2004-07-10 Zswang No.1 添加 FileToString()、StringToFile()

//2004-07-19 Zswang No.1 添加 Simplified2Traditional()、Traditional2Simplified()

//------------------------------------------------------------------------------

//2004-08-02 Zswang No.1 添加 SameMark()、MarkPosition()特征码的比较

//*******End 修改日志*******//

unit StringFunctions;

interface

uses Windows, SysUtils, Classes;

function HexToStr( //十六进制字符串处理成字符串

mHex: string //十六进制字符串

): string; //返回处理后的字符串

function StrToHex( //字符串处理成十六进制字符串

mStr: string; //字符串

mSpace: Boolean //是否用空格分开

): string; //返回处理后的十六进制字符串

function StrLeft( //取左边的字符串

mStr: string; //原字符串

mDelimiter: string; //分隔符

mIgnoreCase: Boolean = False //是否忽略大小写

): string; //返回第一个分隔符左边的字符串

function StrRight( //取右边的字符串

mStr: string; //原字符串

mDelimiter: string; //分隔符

mIgnoreCase: Boolean = False //是否忽略大小写

): string; //返回第一个分隔符右边的字符串

function StringInvert( //字符串异或

mStr: string; //源字符串

mKey: string //密钥

): string; //返回异或后的结果

function RandomString( //随机字符串

mChars: string; //字符集合,可以利用重复的字符来控制频率

mLength: Integer //生成字符的长度

): string; //返回长度为mLength的随机字符串

function StringSort( //字符串排序

mStr: string; //源字符串

mDesc: Boolean = True //是否升序排列，否则按降序

): string; //返回排序后的字符串

function BufferToString( //将缓冲处理成字符串

const mBuffer; //缓冲内容

mLen: Integer //缓冲大小

): string; //返回缓冲的字符形式

function StringToDisplay( //字符串处理成语法格式

mString: string //源字符串

): string; //返回字符串的语法格式

function DisplayToString( //还原语法格式表达的字符串

mDisplay: string //字符表达式

): string; //返回字符表达式所表达的字符串

function SubStrCount( //计算子字符串的个数

mStr: string; //源字符串

mSub: string; //子串

mIgnoreCase: Boolean = False //是否忽略大小写

): Integer; //返回子字符串出现的次数

function StringFilter( //字符串过滤

mStr: string; //源字符串

mChars: TSysCharSet //能保留下的字符集合

): string; //返回过滤后的字符串

function Combination( //全组合

mStrings: TStrings; //做输出用的字符列表

mStr: string; //源字符

mLen: Integer //元素个数

): Boolean; //返回处理是否成功

function Collocate( //全排列

mStrings: TStrings; //做输出用的字符列表

mStr: string //源字符串

): Boolean; //返回处理是否成功

function StringToCharSet( //字符串集合

mString: string //源字符串

): TSysCharSet; //返回字符串中包含的集合

function StringShuffle( //随机排列字符串

S: string //源字符串

): string; //返回排列后的字符串

function StrSetExists( //判断子串是否存在，忽略排列的顺序

mStr: string; //源字符串

mSubStr: string //子串

): Boolean; //返回子串是否存在

function StrSetExclude( //将子串排除，忽略排列的顺序

mStr: string; //源字符串

mSubStr: string //子串

): string; //返回排除后的字符串

function StringToFile( //字符串存为文件

mString: string; //源字符串

mFileName: TFileName //文件名

): Boolean; //返回字符串保存到文件是否成功

function FileToString( //文件读为字符串

mFileName: TFileName //文件名

): string; //返回从文件载入字符串

function BinToHex( //二进制转换成十六进制

mBin: string //二进制字符

): string; //返回十六进制字符

function HexToBin( //十六进制转换成二进制

mHex: string //十六进制字符串

): string; //返回二进制字符串

function Simplified2Traditional( //简体汉字转化成繁体汉字

mSimplified: string //简体字符串

): string; //返回繁体字符串 //Win98下无效

function Traditional2Simplified( //繁体汉字转化成简体汉字

mTraditional: string //简体字符串

): string; //返回繁体字符串 //Win98下无效

function SameMark( //比较字符串是否符合特征码

mStr: string; //源字符串

mMark: string //特征码

): Boolean; //返回是否符合特征码

function MarkPosition( //特征码的位置

mStr: string; //源字符串

mMark: string //特征码

): Integer; //返回特征码所在的位置

implementation

function HexToStr( //十六进制字符串处理成字符串

mHex: string //十六进制字符串

): string; //返回处理后的字符串

var

I: Integer;

begin

Result := '';

mHex := StringReplace(mHex, #32, '', [rfReplaceAll]);

for I := 1 to Length(mHex) div 2 do

Result := Result + Chr(StrToIntDef('$' + Copy(mHex, I * 2 - 1, 2), 0));

end; { HexToStr }

function StrToHex( //字符串处理成十六进制字符串

mStr: string; //字符串

mSpace: Boolean //是否用空格分开

): string; //返回处理后的十六进制字符串

const

cSpaceStr: array[Boolean] of string = ('', #32);

var

I: Integer;

begin

Result := '';

for I := 1 to Length(mStr) do

Result := Format('%s%s%.2x', [Result, cSpaceStr[mSpace], Ord(mStr[I])]);

if mSpace then Delete(Result, 1, 1);

end; { StrToHex }

function StrLeft( //取左边的字符串

mStr: string; //原字符串

mDelimiter: string; //分隔符

mIgnoreCase: Boolean = False //是否忽略大小写

): string; //返回第一个分隔符左边的字符串

begin

if mIgnoreCase then

Result := Copy(mStr, 1, Pos(UpperCase(mDelimiter), UpperCase(mStr)) - 1)

else Result := Copy(mStr, 1, Pos(mDelimiter, mStr) - 1);

end; { StrLeft }

function StrRight( //取右边的字符串

mStr: string; //原字符串

mDelimiter: string; //分隔符

mIgnoreCase: Boolean = False //是否忽略大小写

): string; //返回第一个分隔符右边的字符串

begin

if mIgnoreCase then

begin

if Pos(UpperCase(mDelimiter), UpperCase(mStr)) > 0 then

Result := Copy(mStr, Pos(UpperCase(mDelimiter), UpperCase(mStr)) +

Length(mDelimiter), MaxInt)

else Result := '';

end else

begin

if Pos(mDelimiter, mStr) > 0 then

Result := Copy(mStr, Pos(mDelimiter, mStr) + Length(mDelimiter), MaxInt)

else Result := '';

end;

end; { StrRight }

function StringInvert( //字符串异或

mStr: string; //源字符串

mKey: string //密钥

): string; //返回异或后的结果

var

I, J: Integer;

begin

J := 1;

Result := '';

for I := 1 to Length(mStr) do

begin

Result := Result + Char(Ord(mStr[I]) xor Ord(mKey[J]));

if J + 1 <= Length(mKey) then

Inc(J)

else J := 1;

end;

end; { StringEncryptDecrypt }

function RandomString( //随机字符串

mChars: string; //字符集合,可以利用重复的字符来控制频率

mLength: Integer //生成字符的长度

): string; //返回长度为mLength的随机字符串

var

I: Integer;

begin

Result := '';

if mChars = '' then Exit;

for I := 1 to mLength do

Result := Result + mChars[Random(Length(mChars)) + 1];

end; { RandomString }

function StringSort( //字符串排序

mStr: string; //源字符串

mDesc: Boolean = True //是否升序排列，否则按降序

): string; //返回排序后的字符串

var

I, J: Integer;

T: Char;

begin

for I := 1 to Length(mStr) - 1 do

for J := I + 1 to Length(mStr) do

if (mDesc and (mStr[I] > mStr[J])) or

(not mDesc and (mStr[I] < mStr[J])) then

begin

T := mStr[I];

mStr[I] := mStr[J];

mStr[J] := T;

end;

Result := mStr;

end; { StringSort }

function BufferToString( //将缓冲处理成字符串

const mBuffer; //缓冲内容

mLen: Integer //缓冲大小

): string; //返回缓冲的字符形式

begin

SetLength(Result, mLen);

Move(mBuffer, Result[1], mLen);

end; { BufferToString }

function StringToDisplay( //字符串处理成语法格式

mString: string //源字符串

): string; //返回字符串的语法格式

var

I: Integer;

S: string;

begin

Result := '';

S := '';

for I := 1 to Length(mString) do

if mString[I] in [#32..#127] then

S := S + mString[I]

else

begin

if S <> '' then

begin

Result := Result + QuotedStr(S);

S := '';

end;

Result := Result + Format('#$%x', [Ord(mString[I])]);

end;

if S <> '' then Result := Result + QuotedStr(S);

end; { StringToDisplay }

function DisplayToString( //还原语法格式表达的字符串

mDisplay: string //字符表达式

): string; //返回字符表达式所表达的字符串

var

I: Integer;

S: string;

B: Boolean;

begin

Result := '';

B := False;

mDisplay := mDisplay;

for I := 1 to Length(mDisplay) do

if B then

case mDisplay[I] of

'''':

begin

if S <> '' then

Result := Result + StringReplace(S, '''''', '''', [rfReplaceAll]);

if Copy(mDisplay, I + 1, 1) = '''' then

Result := Result + '''';

S := '';

B := False;

end;

else S := S + mDisplay[I];

end

else

case mDisplay[I] of

'#', '''':

begin

if S <> '' then Result := Result + Chr(StrToIntDef(S, 0));

S := '';

B := mDisplay[I] = '''';

end;

'$', '0'..'9', 'a'..'f', 'A'..'F': S := S + mDisplay[I];

end;

if not B and (S <> '') then

Result := Result + Chr(StrToIntDef(S, 0));

end; { DisplayToString }

function SubStrCount( //计算子字符串的个数

mStr: string; //源字符串

mSub: string; //子串

mIgnoreCase: Boolean = False //是否忽略大小写

): Integer; //返回子字符串出现的次数

var

vReplaceFlags: TReplaceFlags;

begin

vReplaceFlags := [rfReplaceAll];

if mIgnoreCase then Include(vReplaceFlags, rfIgnoreCase);

Result := (Length(mStr) - Length(

StringReplace(mStr, mSub, '', vReplaceFlags))) div Length(mSub);

end; { SubStrCount }

function StringFilter( //字符串过滤

mStr: string; //源字符串

mChars: TSysCharSet //能保留下的字符集合

): string; //返回过滤后的字符串

var

I: Integer;

begin

Result := '';

for I := 1 to Length(mStr) do

if mStr[I] in mChars then

Result := Result + mStr[I];

end; { StringFilter }

function Combination( //全组合

mStrings: TStrings; //做输出用的字符列表

mStr: string; //源字符

mLen: Integer //元素个数

): Boolean; //返回处理是否成功

procedure pCombination( //全组合子过程

mLeft: string; //排列到左边的字符

mRight: string //排列到右边的字符

);

var

I: Integer;

begin

if Length(mLeft) >= mLen then

mStrings.Add(mLeft)

else for I := 1 to Length(mRight) do

pCombination(mLeft + Copy(mRight, I, 1), Copy(mRight, I + 1, MaxInt));

end;

begin

Result := False;

if not Assigned(mStrings) then Exit;

mStrings.BeginUpdate;

try

mStrings.Clear;

pCombination('', mStr);

finally

mStrings.EndUpdate;

end;

Result := True;

end; { Combination }

function Collocate( //全排列

mStrings: TStrings; //做输出用的字符列表

mStr: string //源字符串

): Boolean; //返回处理是否成功

procedure pCollocate( //全排列子过程

mLeft: string; //排列到左边的字符

mRight: string //排列到右边的字符

);

var

I, L: Integer;

begin

L := Length(mLeft);

if L = 0 then

mStrings.Add(mRight)

else

for I := 1 to L do

pCollocate(

Copy(mLeft, 1, I - 1) + Copy(mLeft, I + 1, MaxInt),

mRight + mLeft[I]

);

end;

begin

Result := False;

if not Assigned(mStrings) then Exit;

mStrings.BeginUpdate;

try

mStrings.Clear;

pCollocate(mStr, '');

finally

mStrings.EndUpdate;

end;

Result := True;

end; { Collocate }

function StringToCharSet( //字符串集合

mString: string //源字符串

): TSysCharSet; //返回字符串中包含的集合

var

I: Integer;

begin

Result := [];

for I := 1 to Length(mString) do Include(Result, mString[I]);

end; { StringToCharSet }

function StringShuffle( //随机排列字符串

S: string //源字符串

): string; //返回排列后的字符串

var

I: Integer;

begin

Result := '';

while S <> '' do

begin

I := Random(Length(S)) + 1;

Result := Result + S[I];

Delete(S, I, 1);

end;

end; { StringShuffle }

function StrSetExists( //判断子串是否存在，忽略排列的顺序

mStr: string; //源字符串

mSubStr: string //子串

): Boolean; //返回子串是否存在

//'aa','aca'=True

//'ab','acb'=True

//'aa','acb'=False

var

I, J: Integer;

begin

Result := False;

for I := 1 to Length(mSubStr) do begin

J := Pos(mSubStr[I], mStr);

if J <= 0 then Exit;

Delete(mStr, J, 1);

end;

Result := True;

end; { StrSetExists }

function StrSetExclude( //将子串排除，忽略排列的顺序

mStr: string; //源字符串

mSubStr: string //子串

): string; //返回排除后的字符串

var

I, J: Integer;

begin

Result := mStr;

for I := 1 to Length(mSubStr) do begin

J := Pos(mSubStr[I], Result);

if J <= 0 then Continue;

Delete(Result, J, 1);

end;

end; { StrSetExclude }

function StringToFile( //字符串存为文件

mString: string; //源字符串

mFileName: TFileName //文件名

): Boolean; //返回字符串保存到文件是否成功

begin

Result := True;

try

if mString <> '' then

with TFileStream.Create(mFileName, fmCreate) do try

Write(mString[1], Length(mString));

finally

Free;

end;

except

Result := False;

end;

end; { StringToFile }

function FileToString( //文件读为字符串

mFileName: TFileName //文件名

): string; //返回从文件载入字符串

begin

Result := '';

try

if FileExists(mFileName) then

with TFileStream.Create(mFileName, fmOpenRead) do try

SetLength(Result, Size);

Read(Result[1], Size);

finally

Free;

end;

except

Result := '';

end;

end; { FileToString }

const

cHexBinStrings: array[0..15] of string = //十六进制和二进制对照表

(

'0000', '0001', '0010', '0011',

'0100', '0101', '0110', '0111',

'1000', '1001', '1010', '1011',

'1100', '1101', '1110', '1111'

);

function BinToHex( //二进制转换成十六进制

mBin: string //二进制字符

): string; //返回十六进制字符

var

I, L: Integer;

S: string;

begin

Result := '';

mBin := StringFilter(mBin, ['0', '1']);

if mBin = '' then Exit;

mBin := '000' + mBin;

L := Length(mBin);

while L >= 4 do

begin

S := Copy(mBin, L - 3, MaxInt);

Delete(mBin, L - 3, MaxInt);

for I := Low(cHexBinStrings) to High(cHexBinStrings) do

if S = cHexBinStrings[I] then

begin

Result := IntToHex(I, 0) + Result;

Break;

end;

L := Length(mBin);

end;

end; { BinToHex }

function HexToBin( //十六进制转换成二进制

mHex: string //十六进制字符串

): string; //返回二进制字符串

var

I: Integer;

begin

Result := '';

mHex := StringFilter(mHex, ['0'..'9', 'a'..'f', 'A'..'F']);

for I := 1 to Length(mHex) do

Result := Result + cHexBinStrings[StrToIntDef('$' + mHex[I], 0)];

end; { HexToBin }

function Simplified2Traditional( //简体汉字转化成繁体汉字

mSimplified: string //简体字符串

): string; //返回繁体字符串 //Win98下无效

var

L: Integer;

begin

L := Length(mSimplified);

SetLength(Result, L);

LCMapString(GetUserDefaultLCID,

LCMAP_TRADITIONAL_CHINESE, PChar(mSimplified), L, @Result[1], L);

end; { Simplified2Traditional }

function Traditional2Simplified( //繁体汉字转化成简体汉字

mTraditional: string //简体字符串

): string; //返回繁体字符串 //Win98下无效

var

L: Integer;

begin

L := Length(mTraditional);

SetLength(Result, L);

LCMapString(GetUserDefaultLCID,

LCMAP_SIMPLIFIED_CHINESE, PChar(mTraditional), L, @Result[1], L);

end; { Traditional2Simplified }

function SameMark( //比较字符串是否符合特征码

mStr: string; //源字符串

mMark: string //特征码

): Boolean; //返回是否符合特征码

var

I: Integer;

begin

Result := False;

mMark := StringReplace(mMark, #32, '', [rfReplaceAll]);

if Length(mStr) <> Length(mMark) div 2 then Exit;

I := Pos('%%', mMark);

while I > 0 do

begin

Delete(mMark, I, 2);

Delete(mStr, I div 2 + 1, 1);

I := Pos('%%', mMark);

end;

Result := mStr = HexToStr(mMark);

end; { SameMark }

function MarkPosition( //特征码的位置

mStr: string; //源字符串

mMark: string //特征码

): Integer; //返回特征码所在的位置

var

I, J, L: Integer;

vLongMarks: string;

vLongPos: Integer;

begin

mMark := StringReplace(mMark, #32, '', [rfReplaceAll]);

L := Length(mMark) div 2;

if Pos('%%', mMark) <= 0 then //如果没有掩码

begin

Result := Pos(HexToStr(mMark), mStr);

Exit;

end;

///////Begin 扫描最长的一个特征码

Result := 0;

J := 0;

vLongPos := 0;

vLongMarks := '';

with TStringList.Create do try

Delimiter := '%';

DelimitedText := StringReplace(mMark, '%%', '%', [rfReplaceAll]);

for I := 0 to Count - 1 do

begin

if Length(vLongMarks) < Length(Strings[I]) div 2 then

begin

vLongMarks := HexToStr(Strings[I]);

vLongPos := J;

end;

Inc(J, Length(Strings[I]) div 2 + 1);

end;

finally

Free;

end;

///////End 扫描最长的一个特征码

///////Begin 用最长的特征码来比较

I := Pos(vLongMarks, mStr);

while I - vLongPos > 0 do

begin

if SameMark(Copy(mStr, I - vLongPos, L), mMark) then

begin

Result := I - vLongPos;

Break;

end;

Delete(mStr, 1, I - vLongPos + L);

I := Pos(vLongMarks, mStr);

end;

///////End 用最长的特征码来比较

end; { MarkPosition }

end.