如何提取RTF中的图片
来自:lanyaoshi, 时间:2006-6-1 21:37:00, ID:3459681
已知道RTF格式中的图片文件是以metafile格式存在的,以{\pict\wmetafile8\picw423\pich688\picwgoal240\pichgoal390开始,然后是metafile的HEX字符串,但不知道如何将这一大段HEX字符串保存为BMP文件或metafile格式文件呢?
例如下面是一段包含图片的RTF文件的内容:
{\rtf1\ansi\ansicpg936\deff0{\fonttbl{\f0\fnil\fcharset134 \'cb\'ce\'cc\'e5;}}
\viewkind4\uc1\pard\lang2052\f0\fs18{\pict\wmetafile8\picw423\pich688\picwgoal240\pichgoal390
010009000003160300000000f102000000000400000003010800050000000b0200000000050000
000c021b001100030000001e000400000007010400f1020000410b2000cc001a00100000000000
1a0010000000000028000000100000001a00000001000800000000000000000000000000000000
00000000000000000000000000ffffff00fbfcfd00ced2ec00a4aee80095a3f100c7cbe300eff0
f900e4e5eb006472d1005c68c7009aa0cf00949acd004c59c300273fde00465bd800bdc1df00e1
e2e700848bc200fdfdfe007f86c300122be000051bd700a5a9cb00f4f4f5007a8ae2003b46ac00
0f1fac00dbdce400354be0006b72ab00b9bedb000c25dd000112b900868aaf008898e100182cca
00777ca800ebedf3004f569700f8f9fb005d71de000c21c800636aa5005065db00091dbf002c35
9000fbfbfc00424a94007282de00424a8d0008128d000210a800a5acda00c8cad8004954bb001a
27a400dee1ea00303ba900daddee003f4fc300f7f7f900202da900d1d3e400d6d9e90000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000101010101010101010101010101010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101013d3f10402801010101010101010126222e3e0a090d1713010101010101081a343a3b01
133b3c2b3d010101012f3721381101010101391b25130101013516212201010101011329333601
0113091633080101010101011934250101282c2a3001010101010101312d32130128292a2b0101
01010101022c2d2e2f0113232425010101010101260e2127010101101d1e0101010101011f2021
2201010118191a1301010101130d161b1c01010101110f12130101011415161701010101010108
090a0b0c0d0e0f1001010101010101010203040504060701010101010101010101010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101010101040000002701ffff030000000000
}
\par }
解答:
Copyright (C) Kingron, 2006
实际上很简单,RTF中对所有的二进制数据均采用HEX化的处理,所以只要把HEX转成二进制即可。也就是把那些HEX字符串,每两个作为一个整体,然后转换成二进制对应的字符,并存储下来,就是原始的数据!
例如把上面的十六进制转换代码如下
const
WMFKey = Integer($9AC6CDD7);
WMFWord = $CDD7;
HundredthMMPerInch = 2540;
type
TMetafileHeader = packed record
Key: Longint;
Handle: SmallInt;
Box: TSmallRect;
Inch: Word;
Reserved: Longint;
CheckSum: Word;
end;
function ComputeAldusChecksum(var WMF: TMetafileHeader): Word;
{
本函数用于计算Checksum校验值
}
type
PWord = ^Word;
var
pW: PWord;
pEnd: PWord;
begin
result := 0;
pW := @WMF;
pEnd := @WMF.CheckSum;
while Longint(pW) < Longint(pEnd) do
begin
result := result xor pW^;
Inc(Longint(pW), SizeOf(Word));
end;
end;
procedure RTFPictToWMF(const WMFFilename, AnsiStorString: string; Width, Height: Integer);
{
本函数用于把RTF中WMF文件ANSISTORPIC存储格式的字符串转换成对应的WMF文件
FileName表示输出的WMF文件名
Width,Height表示WMF文件的宽度和高度,即RTF中的{picwN和{pichN的N的值
Data: ANSISTORPIC格式的数据,可以包含回车,程序自动处理
}
var
PureText : string;
buf : PChar;
Len : DWORD;
wmf : TMetafileHeader;
fs : TFileStream;
begin
/// 去掉回车换行之类的数据
PureText := StringReplace(AnsiStorString, #13#10, '', [rfReplaceAll]);
/// 计算转换后的长度,并申请缓冲区内存
Len := Length(PureText) shr 1;
/// WMF头信息
FillChar(wmf, SizeOf(wmf), 0);
with WMF do
begin
Key := WMFKEY;
Inch := 96; { WMF defaults to 96 units per inch }
with Box do
begin
Right := MulDiv(Width, WMF.Inch, HundredthMMPerInch);
Bottom := MulDiv(Height, WMF.Inch, HundredthMMPerInch);
end;
CheckSum := ComputeAldusChecksum(WMF);
end;
GetMem(buf, Len);
fs := TFileStream.Create(WMFFilename, fmCreate);
try
/// 转换ANSISTORPIC格式为BIN格式
HexToBin(PChar(PureText), Buf, Len);
fs.Write(wmf, SizeOf(wmf));
fs.Write(buf^, Len);
finally
FreeMem(buf);
fs.Free;
end;
end;
function WMFToRTFPict(const WMFFilename: string): string;
var
fs: TMemoryStream;
wh : TMetafileHeader;
p : PChar;
buf : string;
begin
fs := TMemoryStream.Create;
try
fs.LoadFromFile(WMFFilename);
if fs.Read(wh, SizeOf(wh)) <> SizeOf(wh) then
raise Exception.Create('Error read file');
if wh.Key <> WMFKey then
raise Exception.Create('Invalid WMF file');
p := Pointer(Integer(fs.Memory) + SizeOf(wh));
SetLength(buf, (fs.Size - SizeOf(wh)) shl 1);
BinToHex(p, PChar(buf), fs.Size - SizeOf(wh));
with wh.Box do
Result := Format('{\pict\wmetafile8\picwgoal%d\pichgoal%d %s }',
[MulDiv((Right - Left), HundredthMMPerInch, wh.Inch),
MulDiv((Bottom - Top), HundredthMMPerInch, wh.Inch),
buf]);
finally
fs.Free;
end;
end;
procedure RTFPictToJPG(const JPGFilename, RTFAnsiStorString: string);
var
fs : TFileStream;
PureText: string;
buf : PChar;
Len : DWORD;
begin
fs := TFileStream.Create(JPGFilename, fmCreate);
/// 去掉回车换行之类的数据
PureText := StringReplace(RTFAnsiStorString, #13#10, '', [rfReplaceAll]);
/// 计算转换后的长度,并申请缓冲区内存
Len := Length(PureText) shr 1;
GetMem(Buf, Len);
HexToBin(PChar(PureText), buf, Len);
try
fs.Write(buf^, Len);
finally
FreeMem(buf);
fs.Free;
end;
end;
function GetJPEGSize(FileName: string; var Width, Height: WORD): Boolean;
{
本函数返回JPG的图片大小信息
}
const
JPEG_FLAG_BEGIN = $D8FF;
JPEG_FLAG_END = $D9FF;
JPEG_FRAME = $C0FF;
var
FS: TFileStream;
Flag1, Flag2: WORD;
B: Byte;
procedure SeekForFrame;
type
TSegHeader = packed record
Flag: WORD;
LenHi, LenLo: Byte;
end;
var
Seg: TSegHeader;
begin
with Seg, FS do
repeat
Read(Seg, SizeOf(Seg));
if Flag <> JPEG_FRAME then
Position := Position + MakeWord(LenLo, LenHi) - 2;
until (Position >= Fs.Size) or (Seg.Flag = JPEG_FRAME);
end;
begin
FS := TFileStream.Create(FileName, fmOpenRead);
try
{ JPEG 文件开头必须为 FF D8,文件尾必须为 FF D9 }
FS.Read(Flag1, SizeOf(Flag1));
FS.Position := FS.Size - 2;
FS.Read(Flag2, SizeOf(Flag2));
Result := (Flag1 = JPEG_FLAG_BEGIN) and (Flag2 = JPEG_FLAG_END);
if not Result then exit; { 不是合法的JPEG文件则退出 }
FS.Position := 2;
SeekForFrame; { 寻找JPEG的Frame段,即图像数据区 }
FS.Read(B, SizeOf(B)); { Frame段段头后第一个Byte为数据精度 }
FS.Read(B, SizeOf(B)); { 高度高字节 }
WordRec(Height).Hi := B;
FS.Read(B, SizeOf(B)); { 高度低字节 }
WordRec(Height).Lo := B;
FS.Read(B, SizeOf(B)); { 宽度高字节 }
WordRec(Width).Hi := B;
FS.Read(B, SizeOf(B)); { 宽度低字节 }
WordRec(Width).Lo := B;
finally
FS.Free;
end;
end;
function JPGToRTFPict(const JPGFilename: string): string;
{
本函数用于把一个JPG文件转换成RTF字符串
}
var
ms : TMemoryStream;
w, h: Word;
p : PChar;
begin
ms := TMemoryStream.Create;
try
GetJPEGSize(JPGFilename, w, h);
w := MulDiv(w, HundredthMMPerInch, 96);
h := MulDiv(h, HundredthMMPerInch, 96);
ms.LoadFromFile(JPGFilename);
SetLength(Result, ms.Size shl 1);
p := ms.Memory;
BinToHex(p, PChar(Result), ms.Size);
Result := '{{\*\shppict{\pict\picscalex100\picscaley100\' + Format('picw%d\pich%d\jpegblip ', [w, h])
+ Result + '}}';
finally
ms.Free;
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
begin
//RTFPictToWMF('C:\o.wmf', Memo1.Text, 423, 688);
Memo1.Text := WMFToRTFPict('C:\o.wmf');
end;
参考资料:
《RTF文件格式》:
http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dnrtfspec/html/rtfspec.asp
http://www.biblioscape.com/rtf15_spec.htm#Heading49
《WMF文件格式》: http://www.fltvu.com/jiaocheng/chenxu1/FORMAT/windows/wmf.htm
Delphi源代码
其中Memo1的Text就是那些十六进制的字符,注意不要包括RTF的那些格式数据。上面转换得到的图片如下: ,和RTF中是一样的!