unit PatchGenerator; { VPatch 2 - Patch Generator ========================== (c) 2002-2003 Van de Sande Productions This unit contains the 'core' functionality of VPatch. TPatchGenerator can load/create/save .PAT files and supports CreatePatch(Old, New) to generate new patches. The only configurable parameter is StartBlockSize. Though I cleaned up the code a little bit, there is very little documentation. That's why I will briefly explain the general workings of the current VPatch algoritm. There is a source file, which is divided into blocks of BlockSize. Block 1 spans bytes 0-15, block 2 16-31, etc if blocksize = 16. For every block, a checksum is calculated and then the block is inserted into a binary search tree, which is sorted on this checksum. Now, the target file (new version) is traversed linearly. For a block at a certain position, the checksum is calculated. Then, a lookup is performed in the binary search tree to find all blocks in the source file which match this checksum. For every occurence, it is checked how many consecutive bytes match with this block (note: since the checksum is not unique, this can be 0 as well, but since all occurences are checked, the largest match is selected). Note also that this match length is not limited to BlockSize but can be larger as well; everything beyond the block end is checked as well (the block is merely used as a starting position for checking the match). For those biggest block matches between source/target files, a copy instruction will be generated in the patch file. For 'inbetween' or unmatchable blocks, the data of the new file is placed in the patch file. This involves some housekeeping, which is what most of the other code does. What's new ---------- 2.1 20031219 Koen Added error checking to CreatePatch, returns negative numbers when there are errors. 2.0 20030811 Koen Initial documentation } interface uses Classes, Sysutils, TreeCode, VDSP_CRC; type TStatusNotifyEvent = procedure(S: String; Current, Total, Savings: Integer) of object; TDebugNotifyEvent = procedure(S: String) of object; PDataBlock = ^TDataBlock; TDataBlock = record SourceOffset: Integer; TargetOffset: Integer; Size: Integer; Next: PDataBlock; end; //internal structure for FindBlock TBlock = record Offset: Integer; Size: Integer; end; TPatchGenerator = class private noPat: Integer; PRay: Array of TDataBlock; NRay: Array of TDataBlock; FPatchData: TMemoryStream; FStartBlockSize: Integer; //initial block size FBlockDivider: Integer; //... block size is divided by this FMinimumBlockSize: Integer;//until this minimum is reached FStepSize: Integer; //input: ASubBlock, which is a pointer to the start of the block to look //for in ABlock. The entire ABlock is searched. The function returns the //offset of the block, when it is found. The ASize parameter contains the //size of this block procedure ShowDebug(S: String); function FindBlock(ASubBlock, ABlock, ABlockTree: Pointer; var ASubBlockStart: Integer; ASubBlockSize, ABlockSize, AMatchSize, ABlockTreeNodeCount: Integer; var ASize: Integer): Integer; procedure FindBlockSize(ASubBlock, ABlock: Pointer; ASubBlockSize, ABlockSize: Integer; var ASubStart, AStart, AFoundSize: Integer); function WritePatchToStream(Target: Pointer; SourceCRC, TargetCRC: Integer): Integer; procedure RemoveExistingPatch(ACRC: Integer); public constructor Create; destructor Destroy; override; procedure Clear; function CreatePatch(SourceFileName, TargetFileName: String): Integer; property StartBlockSize: Integer read FStartBlockSize write FStartBlockSize; property BlockDivider: Integer read FBlockDivider write FBlockDivider; property MinimumBlockSize: Integer read FMinimumBlockSize write FMinimumBlockSize; property StepSize: Integer read FStepSize write FStepSize; function Size: Integer; procedure WriteToFile(AFileName: String); procedure WriteToStream(AStream: TStream); procedure LoadFromFile(AFileName: String); end; const BUF_BLOCK_SIZE = 4096; INIT_BLOCK_COUNT=10000; var DebugEvent: TDebugNotifyEvent = nil; implementation { TPatchGenerator } procedure TPatchGenerator.Clear; begin FPatchData.Clear; end; constructor TPatchGenerator.Create; begin inherited; FPatchData:=TMemoryStream.Create; end; function TPatchGenerator.CreatePatch(SourceFileName, TargetFileName: String): Integer; var fsSource, fsTarget: TFileStream; fm: TMemoryStream; Source, Target: Pointer; SourceSize, TargetSize: Integer; SourceCRC, TargetCRC: Integer; SourceTree: Pointer; SourceTreeNodeCount: Cardinal; cBlockSize: Integer; o,i,lastO: Integer; Start,Siz,BetweenSiz: Integer; retTO: Integer; noN: Integer; begin fsSource:=TFileStream.Create(SourceFileName,fmOpenRead+fmShareDenyNone); fsTarget:=TFileStream.Create(TargetFileName,fmOpenRead+fmShareDenyNone); fm:=TMemoryStream.Create; SetLength(PRay,INIT_BLOCK_COUNT); SetLength(NRay,INIT_BLOCK_COUNT); //Load those files into memory! SourceSize:=fsSource.Size; try GetMem(Source,SourceSize); except on EOutOfMemory do begin Result:=-2; // not enough memory for source file Exit; end; end; fm.CopyFrom(fsSource,SourceSize); Move(fm.Memory^,Source^,SourceSize); SourceCRC:=FileCRC(fsSource); fsSource.Free; fm.Clear; TargetSize:=fsTarget.Size; try GetMem(Target,TargetSize); except on EOutOfMemory do begin FreeMem(Source,SourceSize); Result:=-3; // not enough memory for target file Exit; end; end; fm.CopyFrom(fsTarget,TargetSize); Move(fm.Memory^,Target^,TargetSize); TargetCRC:=FileCRC(fsTarget); fsTarget.Free; fm.Free; if(SourceCRC = TargetCRC) then begin FreeMem(Source,SourceSize); FreeMem(Target,TargetSize); Result:=-1; Exit; end; PRay[0].TargetOffset:=0; PRay[0].SourceOffset:=0; PRay[0].Size:=0; noPat:=1; //termination block PRay[noPat].SourceOffset:=0; PRay[noPat].TargetOffset:=TargetSize; PRay[noPat].Size:=0; //we only have one pass in this mode // StartBlockSize:=16; MinimumBlockSize:=StartBlockSize; StepSize:=1; BlockDivider:=2; //because we are dividing first inside. cBlockSize:=StartBlockSize*BlockDivider; SourceTree:=nil; SourceTreeNodeCount:=BuildTree(Source,SourceTree,SourceSize,cBlockSize div BlockDivider); SortTree(SourceTree,SourceTreeNodeCount); //now, we must do the above again - with a smaller block size repeat if cBlockSize<=MinimumBlockSize then break; cBlockSize:=cBlockSize div BlockDivider; noN:=0; for i:=1 to noPat do begin //calculate location of the inbetween parts Start:=PRay[i-1].TargetOffset+PRay[i-1].Size; BetweenSiz:=PRay[i].TargetOffset-Start; NRay[noN].SourceOffset:=PRay[i-1].SourceOffset; NRay[noN].TargetOffset:=PRay[i-1].TargetOffset; NRay[noN].Size:=PRay[i-1].Size; Inc(noN); if BetweenSiz>0 then begin o:=Start; repeat //ShowDebug(PChar('DoFind '+IntToStr(o))); LastO:=o; retTO:=FindBlock(Target,Source,SourceTree,o,TargetSize,SourceSize,cBlockSize,SourceTreeNodeCount,Siz); if not (Siz=0) then ShowDebug(IntToStr(LastO)+' -> Source='+IntToStr(retTO)+' Target='+IntToStr(o)+' Size='+IntToStr(Siz)); if Siz=0 then begin o:=LastO+StepSize; end else begin //we have found a block, let's add it! NRay[noN].SourceOffset:=retTO; NRay[noN].TargetOffset:=o; NRay[noN].Size:=Siz; Inc(noN); if noN>=Length(NRay) then begin SetLength(NRay,Length(NRay)*2); SetLength(PRay,Length(PRay)*2); end; Inc(o,Siz); end; //check to see if we're not inside another one. Siz:=NRay[noN].TargetOffset-NRay[noN-1].TargetOffset-NRay[noN-1].Size; If Siz<0 then begin //that's impossible! (overlapping should be eliminated) NRay[noN].TargetOffset:=NRay[noN].TargetOffset-Siz; NRay[noN].Size:=NRay[noN].Size+Siz; NRay[noN].SourceOffset:=NRay[noN].SourceOffset-Siz; end; until o>Start+BetweenSiz; end; end; //I think the last termination block isn't copied: do so now. NRay[noN].SourceOffset:=PRay[noPat].SourceOffset; NRay[noN].TargetOffset:=PRay[noPat].TargetOffset; NRay[noN].Size:=PRay[noPat].Size; //copy back into PRay for i:=0 to noN do begin PRay[i].SourceOffset:=NRay[i].SourceOffset; PRay[i].TargetOffset:=NRay[i].TargetOffset; PRay[i].Size:=NRay[i].Size; end; noPat:=noN; until false; //writing is next! ShowDebug('Writing patch'); Result:=WritePatchToStream(Target, SourceCRC, TargetCRC); ClearTree(SourceTree,SourceTreeNodeCount); FreeMem(Source,SourceSize); FreeMem(Target,TargetSize); ShowDebug('Done'); end; destructor TPatchGenerator.Destroy; begin FPatchData.Free; inherited; end; function TPatchGenerator.FindBlock(ASubBlock, ABlock, ABlockTree: Pointer; var ASubBlockStart: Integer; ASubBlockSize, ABlockSize, AMatchSize, ABlockTreeNodeCount: Integer; var ASize: Integer): Integer; //This procedure locates location of a block in the target file //Then, it calls FindBlockSize to determine size of this block var MatchSize, FoundSize: Integer; q,r,i: Integer; FoundCache_SubOffset, FoundCache_Size, FoundCache_Offset: Integer; Checksum: Cardinal; PFound: PTreeNode; FoundCount: Integer; begin //if we find nothing... FoundCache_Size:=0; FoundCache_Offset:=0; FoundCache_SubOffset:=ASubBlockStart; FindBlock:=0; ASize:=0; MatchSize:=AMatchSize; //we can only find MatchSize sized blocks in the tree! if MatchSize > ASubBlockSize - ASubBlockStart then Exit; if MatchSize = 0 then Exit; Checksum:=0; calculateChecksum(ASubBlock,ASubBlockStart,MatchSize,Checksum); PFound:=TreeFind(Checksum,ABlockTree,ABlockTreeNodeCount,FoundCount); for i:=0 to Pred(FoundCount) do begin FoundSize:=MatchSize; //q = offset in Block q:=PFound^.Offset; //r = offset in SubBlock r:=ASubBlockStart; FindBlockSize(ASubBlock, ABlock, ASubBlockSize, ABlockSize, r, q, FoundSize); if FoundSize>FoundCache_Size then begin FoundCache_SubOffset:=r; FoundCache_Offset:=q; FoundCache_Size:=FoundSize; end; ShowDebug(' Block Size Start='+IntToStr(r)+' tarStart='+IntToStr(q)+' Size='+IntToStr(FoundSize)); PFound:=PTreeNode(Integer(PFound)+SizeOf(TTreeNode)); end; FindBlock:=FoundCache_Offset; ASize:=FoundCache_Size; ASubBlockStart:=FoundCache_SubOffset; end; procedure TPatchGenerator.FindBlockSize(ASubBlock, ABlock: Pointer; ASubBlockSize, ABlockSize: Integer; var ASubStart,AStart,AFoundSize: Integer); var FoundSize: Integer; a,c,d,i: Integer; f1p,f2p,f1Size,f2Size: Integer; beforeSize: Integer; CurBufSize: Integer; begin //OK, now let's go... //Trace after -> how long does this go on? f1p:=Integer(ASubBlock)+ASubStart; f2p:=Integer(ABlock)+AStart; f1Size:=ASubBlockSize-ASubStart; f2Size:=ABlockSize-AStart; FoundSize:=0; CurBufSize := BUF_BLOCK_SIZE; //size of the block we're checking while not (CurBufSize = 0) do begin //we need equal bytes from both... so if one of them EOF, it's the end. if FoundSize+CurBufSize>f1Size then CurBufSize:=f1Size - FoundSize; if FoundSize+CurBufSize>f2Size then CurBufSize:=f2Size - FoundSize; if CompareMem(Pointer(f1p),Pointer(f2p),CurBufSize) then begin Inc(FoundSize,CurBufSize); Inc(f1p,CurBufSize); Inc(f2p,CurBufSize); end else begin CurBufSize:=CurBufSize div 2; end; end; if FoundSize = 0 then begin AFoundSize:=0; Exit; end; //Trace before -> how much bytes are still the same before the block? //First, read 1 block from source and 1 block from target, start from back to compare how much they differ //just take BUF_BLOCK_SIZE as maximum size for the block before - that's surely //big enough! beforeSize:=BUF_BLOCK_SIZE; a:=ASubStart-beforeSize; if a<0 then begin a:=0; beforeSize:=ASubStart; end; //b is the current before block size c:=AStart-beforeSize; if c<0 then begin c:=0; beforeSize:=AStart; a:=ASubStart-beforeSize; end; //a=Offset in source //b=Size of beforeblock //c=offset in target d:=0; for i:=beforeSize-1 downto 0 do begin //if not (f1^[a+i]=f2^[c+i]) then begin if not (PByte(Integer(ASubBlock)+a+i)^=PByte(Integer(ABlock)+c+i)^) then begin //d=how many bytes before are the same? Break; end; Inc(d); end; Inc(FoundSize,d); Dec(ASubStart,d); Dec(AStart,d); AFoundSize:=FoundSize; end; function TPatchGenerator.Size: Integer; begin Result:=FPatchData.Size; end; procedure TPatchGenerator.ShowDebug(S: String); begin if Assigned(DebugEvent) then DebugEvent(S); end; function TPatchGenerator.WritePatchToStream(Target: Pointer; SourceCRC, TargetCRC: Integer): Integer; var HeadID: Array[0..3] of Char; NoBlocks, NoBlocksOffset, BodySize, BodySizeOffset: Integer; b: Byte; w: Word; i, j: Integer; l: LongWord; Start, Siz: Integer; PTarget: Pointer; begin RemoveExistingPatch(SourceCRC); with FPatchData do begin Seek(0,soFromEnd); if Size = 0 then begin HeadID:='VPAT'; Write(HeadID,SizeOf(HeadID)); l:=0; Write(l,SizeOf(l)); //NoFiles end; l:=0; NoBlocksOffset:=Position; Write(l,SizeOf(l)); //should become NoBlocks later Write(SourceCRC,SizeOf(SourceCRC)); //source CRC Write(TargetCRC,SizeOf(TargetCRC)); //target CRC BodySizeOffset:=Position; Write(l,SizeOf(l)); //should become BodySize (of this patch) NoBlocks:=0; BodySize:=0; //Write the patch... for i:=0 to noPat - 1 do begin //write char 1 - integer/copysource //write char 2 - long/copysource //write char 5 - integer/insidepatch //write char 6 - long/insidepatch Start:=PRay[i].TargetOffset+PRay[i].Size; Siz:= PRay[i+1].TargetOffset-Start; If Siz<0 then begin //that's impossible! (overlapping should be eliminated) PRay[i+1].TargetOffset:=PRay[i+1].TargetOffset-Siz; PRay[i+1].Size:=PRay[i+1].Size+Siz; PRay[i+1].SourceOffset:=PRay[i+1].SourceOffset-Siz; Siz:=0; end; if not (PRay[i].Size=0) then begin if (PRay[i].Size<=255) then begin b:=1; Write(b,SizeOf(b)); b:=PRay[i].Size; Write(b,SizeOf(b)); Inc(BodySize,2); end else if PRay[i].Size<=65535 then begin b:=2; Write(b,SizeOf(b)); w:=PRay[i].Size; Write(w,SizeOf(w)); Inc(BodySize,3); end else begin b:=3; Write(b,SizeOf(b)); Write(PRay[i].Size,SizeOf(Integer)); Inc(BodySize,5); end; Write(PRay[i].SourceOffset,SizeOf(Integer)); Inc(BodySize,SizeOf(Integer)); Inc(NoBlocks); end; //Now write the writeblock If Not (Siz = 0) Then begin if Siz<=255 then begin b:=5; Write(b,SizeOf(b)); b:=Siz; Write(b,SizeOf(b)); Inc(BodySize,2); end else if Siz<=65535 then begin b:=6; Write(b,1); w:=Siz; Write(w,2); Inc(BodySize,3); end else begin b:=7; Write(b,1); Write(Siz,4); Inc(BodySize,5); end; PTarget:=Pointer(Integer(Target)+Start); j:=Start; repeat //read if (j+4096>Start+Siz) then begin Write(PTarget^,Start+Siz-j); break; end; Write(PTarget^,4096); Inc(j,4096); PTarget:=Pointer(Integer(PTarget)+4096); until false; Inc(BodySize,Siz); Inc(NoBlocks); end; end; Seek(NoBlocksOffset,soFromBeginning); Write(NoBlocks,SizeOf(NoBlocks)); Seek(BodySizeOffset,soFromBeginning); Write(BodySize,SizeOf(BodySize)); ShowDebug('Patch body size: '+IntToStr(BodySize)); ShowDebug('Total patch size:'+IntToStr(Size)); //now increase file count Seek(4,soFromBeginning); Read(i,SizeOf(i)); Inc(i); Seek(4,soFromBeginning); Write(i,SizeOf(i)); Seek(0,soFromEnd); Result:=BodySize; end; end; procedure TPatchGenerator.WriteToFile(AFileName: String); var fs: TFileStream; begin fs:=TFileStream.Create(AFileName,fmCreate); FPatchData.Seek(0,soFromBeginning); fs.CopyFrom(FPatchData,FPatchData.Size); fs.Free; end; procedure TPatchGenerator.LoadFromFile(AFileName: String); var fs: TFileStream; begin fs:=TFileStream.Create(AFileName,fmOpenRead); FPatchData.Clear; FPatchData.CopyFrom(fs,fs.Size); fs.Free; end; procedure TPatchGenerator.RemoveExistingPatch(ACRC: Integer); var HeadID: Array[0..3] of Char; NoFiles, i, j, SourceCRC, MSize: Integer; StartPos: Integer; ms: TMemoryStream; begin with FPatchData do begin if Size = 0 then Exit; Seek(0,soFromBeginning); Read(HeadID,SizeOf(HeadID)); if HeadID = 'VPAT' then begin Read(NoFiles,SizeOf(NoFiles)); for i:=0 to Pred(NoFiles) do begin if Position >= Size then Break; StartPos:=Position; Read(j,SizeOf(j)); //NoBlocks Read(SourceCRC,SizeOf(SourceCRC)); //SourceCRC Read(j,SizeOf(j)); //TargetCRC Read(j,SizeOf(j)); //BodySize Seek(j,soFromCurrent); if SourceCRC = ACRC then begin ms:=TMemoryStream.Create; MSize:=Size-Position; if MSize > 0 then ms.CopyFrom(FPatchData,MSize); ms.Seek(0, soFromBeginning); FPatchData.Seek(StartPos,soFromBeginning); FPatchData.SetSize(Size - j - SizeOf(Integer) * 4); FPatchData.CopyFrom(ms,ms.Size); ms.Free; Dec(NoFiles); Seek(4,soFromBeginning); Write(NoFiles,SizeOf(NoFiles)); Break; end; end; end; end; end; procedure TPatchGenerator.WriteToStream(AStream: TStream); begin FPatchData.Seek(0,soFromBeginning); AStream.CopyFrom(FPatchData,FPatchData.Size); end; end.