NSIS/Contrib/VPatch/Source/GenPat/PatchGenerator.cpp
wizou 752d7d239a Jim Park's Unicode NSIS merging - Step 1 : switch to TCHARs where relevant.
Compiler output is identical before & after this step

git-svn-id: https://svn.code.sf.net/p/nsis/code/NSIS/branches/wizou@6036 212acab6-be3b-0410-9dea-997c60f758d6
2010-03-24 17:22:56 +00:00

250 lines
10 KiB
C++

//---------------------------------------------------------------------------
// PatchGenerator.cpp
//---------------------------------------------------------------------------
// -=* VPatch *=-
//---------------------------------------------------------------------------
// Copyright (C) 2001-2005 Koen van de Sande / Van de Sande Productions
//---------------------------------------------------------------------------
// Website: http://www.tibed.net/vpatch
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgment in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
//
// Unicode support by Jim Park -- 08/29/2007
#include "tchar.h"
#include "PatchGenerator.h"
#include <algorithm>
#define TARGET_BUFFER_SIZE 65536
#define TARGET_LOOKAHEAD_SIZE 4096
#define MAX_BLOCK_SIZE 16384
#define DEFAULT_MAX_MATCHES 500
PatchGenerator::PatchGenerator(bistream& source, TFileOffset sourceSize, bistream& target, TFileOffset targetSize, bostream& patch) :
source(source), sourceSize(sourceSize), target(target), targetSize(targetSize), patch(patch),
targetCData(NULL), targetCDataBaseOffset(0), targetCDataSize(0), blockSize(64), maxMatches(DEFAULT_MAX_MATCHES), beVerbose(false) {
targetCData = new unsigned char[TARGET_BUFFER_SIZE];
}
PatchGenerator::~PatchGenerator() {
if(targetCData != NULL) delete[] targetCData;
}
void PatchGenerator::execute(vector<SameBlock*>& sameBlocks) {
ChunkedFile* sourceTree = new ChunkedFile(source,sourceSize,blockSize);
// sameBlocks: this vector will store blocks that have been found to be the same
// between files
// the vector needs an 'empty' first block so checking for overlap with the
// 'previous' block never fails
SameBlock* firstBlock = new SameBlock;
firstBlock->sourceOffset = 0;
firstBlock->targetOffset = 0;
firstBlock->size = 0;
sameBlocks.push_back(firstBlock);
targetCDataBaseOffset = 0;
targetCDataSize = 0;
bool firstRun = true;
// currentOffset is in the target file
for(TFileOffset currentOffset = 0; currentOffset < targetSize; ) {
bool reloadTargetCData = true;
if((currentOffset >= targetCDataBaseOffset) &&
(currentOffset + TARGET_LOOKAHEAD_SIZE < targetCDataBaseOffset + TARGET_BUFFER_SIZE)) {
if(firstRun) {
firstRun = false;
} else {
reloadTargetCData = false;
}
}
if(reloadTargetCData) {
// at least support looking back blockSize, if possible (findBlock relies on this!)
targetCDataBaseOffset = currentOffset - blockSize;
// handle start of file correctly
if(currentOffset < blockSize) targetCDataBaseOffset = 0;
targetCDataSize = TARGET_BUFFER_SIZE;
// check if this does not extend beyond EOF
if(targetCDataBaseOffset + targetCDataSize > targetSize) {
targetCDataSize = targetSize - targetCDataBaseOffset;
}
// we need to update the memory cache of target
tout << _T("[CacheReload] File position = ") << static_cast<unsigned int>(targetCDataBaseOffset) << _T("\n");
target.seekg(targetCDataBaseOffset,ios::beg);
target.read(reinterpret_cast<char*>(targetCData),targetCDataSize);
}
//tout << currentOffset << _T(" ");
SameBlock* currentSameBlock = findBlock(sourceTree,currentOffset);
if(currentSameBlock) {
// we have a match
SameBlock* previousBlock = sameBlocks.back();
if(previousBlock->targetOffset + previousBlock->size > currentSameBlock->targetOffset) {
// there is overlap, resolve it
TFileOffset difference = previousBlock->targetOffset + previousBlock->size - currentSameBlock->targetOffset;
currentSameBlock->sourceOffset += difference;
currentSameBlock->targetOffset += difference;
currentSameBlock->size -= difference;
}
sameBlocks.push_back(currentSameBlock);
// debug info
if(beVerbose) {
tout << _T("Block found: ") << static_cast<unsigned int>(currentSameBlock->targetOffset)
<< _T(" ") << static_cast<unsigned int>(currentSameBlock->size)
<< _T(" (source offset=") << static_cast<unsigned int>(currentSameBlock->sourceOffset) << _T(")\n");
}
currentOffset = currentSameBlock->targetOffset + currentSameBlock->size;
} else {
// no match, advance one byte
currentOffset++;
}
}
// add a block at the end, again to prevent bounds checking hassle
SameBlock* lastBlock = new SameBlock;
lastBlock->sourceOffset = 0;
lastBlock->targetOffset = targetSize;
lastBlock->size = 0;
sameBlocks.push_back(lastBlock);
delete sourceTree;
}
SameBlock* PatchGenerator::findBlock(ChunkedFile* sourceTree,
TFileOffset targetFileStartOffset) {
if(targetSize - targetFileStartOffset < blockSize) return NULL;
TFileOffset preDataSize = targetFileStartOffset - targetCDataBaseOffset;
//unsigned char* p = &(targetCData[preDataSize]);
// SameBlock* currentSameBlock = findBlock(sourceTree,p,preDataSize,
// targetCDataSize - preDataSize,
// currentOffset);
//unsigned char* targetData, // target data contains a memory part of target stream
//TFileOffset targetDataPreSize, // this specifies how many bytes we can access in front (required to be at least blockSize)
//TFileOffset targetDataSize, // this specifies how many bytes we can acces in the pointer (must be at least blockSize)
// read the current data part into memory
TChunkChecksum checksum;
sourceTree->calculateChecksum(&(targetCData[preDataSize]),blockSize,checksum);
TFileOffset foundIndex; // location into sourceTree chunks array of found chunk
if(sourceTree->search(checksum,&foundIndex)) {
// we found something
SameBlock* bestMatch = new SameBlock;
bestMatch->sourceOffset = sourceTree->chunks[foundIndex].offset;
bestMatch->targetOffset = targetFileStartOffset;
bestMatch->size = 0; // default to 0, because they can all be mismatches as well
// increase match size if possible, also check if it is a match at all
int matchCount = 0;
while((sourceTree->chunks[foundIndex].checksum == checksum) && ((maxMatches == 0) || (matchCount < maxMatches))) {
// check if this one is better than the current match
SameBlock match;
match.sourceOffset = sourceTree->chunks[foundIndex].offset;
match.targetOffset = targetFileStartOffset;
match.size = 0; // default to 0, could be a mismatch with the same key
improveSameBlockMatch(match,bestMatch->size);
if(match.size > bestMatch->size) {
*bestMatch = match;
}
foundIndex++;
matchCount++;
}
if(beVerbose) {
if(maxMatches != 0) {
if(matchCount == maxMatches) {
tout << _T("[FindBlock] Abort due to >") << static_cast<unsigned int>(maxMatches)
<< _T(" matches; file position = ") << static_cast<unsigned int>(targetFileStartOffset) << _T("\n");
}
}
}
if(bestMatch->size == 0)
return NULL;
else
return bestMatch;
} else {
return NULL;
}
}
#define COMPARISON_SIZE 2048
void PatchGenerator::improveSameBlockMatch(SameBlock& match, TFileOffset currentBest) {
// we should now try to make the match longer by reading big chunks of the
// files to come
source.seekg(match.sourceOffset + match.size,ios::beg);
target.seekg(match.targetOffset + match.size,ios::beg);
while(true) {
unsigned char sourceData[COMPARISON_SIZE];
unsigned char targetData[COMPARISON_SIZE];
TFileOffset startTarget = match.targetOffset + match.size;
TFileOffset startSource = match.sourceOffset + match.size;
TFileOffset checkSize = COMPARISON_SIZE;
if(checkSize > targetSize - startTarget) checkSize = targetSize - startTarget;
if(checkSize > sourceSize - startSource) checkSize = sourceSize - startSource;
if(checkSize == 0) break;
source.read(reinterpret_cast<char*>(sourceData),checkSize);
target.read(reinterpret_cast<char*>(targetData),checkSize);
TFileOffset i = 0;
while((sourceData[i] == targetData[i]) && (i < checkSize )) {
match.size++;
i++;
}
// check if we stopped because we had a mismatch
if(i < checkSize) break;
}
if(match.size < blockSize) {
match.size = 0;
} else {
// try to improve before match if this is useful
if(match.size + blockSize <= currentBest) return;
// do not do if there is no more data in the target...
if(match.targetOffset == 0) return;
// we know it is stored in the cache... so we just need the source one
unsigned char sourceData[MAX_BLOCK_SIZE];
TFileOffset startSource = match.sourceOffset - blockSize;
TFileOffset checkSize = blockSize;
if(checkSize > match.sourceOffset) {
checkSize = match.sourceOffset;
startSource = 0;
}
if(checkSize == 0) return;
source.seekg(startSource,ios::beg);
source.read(reinterpret_cast<char*>(sourceData),checkSize);
checkSize--;
while(sourceData[checkSize] == targetCData[match.targetOffset - targetCDataBaseOffset - 1]) {
match.targetOffset--;
match.sourceOffset--;
match.size++;
checkSize--;
if(checkSize == 0) break;
if(match.targetOffset == 0) break;
}
}
}