Nie wiem czy to się nadaje na forum :) ale klepię sobie program do kompresji danych i mam problem bo źle dekompresuje :p

Oto kod:

/* 
 * chunker.cpp -- a fast experimental compression program
 * 
 * Copyright (C) 2013 Piotr Tarsa ( http://github.com/tarsa )
 *
 *  This software is provided 'as-is', without any express or implied
 *  warranty.  In no event will the author be held liable for any damages
 *  arising from the use of this software.
 *
 *  Permission is granted to anyone to use this software for any purpose,
 *  including commercial applications, and to alter it and redistribute it
 *  freely, subject to the following restrictions:
 *
 *  1. The origin of this software must not be misrepresented; you must not
 *     claim that you wrote the original software. If you use this software
 *     in a product, an acknowledgment in the product documentation would be
 *     appreciated but is not required.
 *  2. Altered source versions must be plainly marked as such, and must not be
 *     misrepresented as being the original software.
 *  3. This notice may not be removed or altered from any source distribution.
 * 
 */

#include <algorithm>

#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>

#define MAX_BLOCK_SIZE (1 << 20)

#if MAX_BLOCK_SIZE % 4 != 0
#error Max block size must contain integral number of chunks
#endif

#define FAST_MODE_MARKER 1
#define GOOD_MODE_MARKER 2
#define TAG_SIZE_FAST 2
#define TAG_SIZE_GOOD 3
#define CHUNKS_PER_SIGNATURE_FAST (32 / TAG_SIZE_FAST)
#define CHUNKS_PER_SIGNATURE_GOOD (32 / TAG_SIZE_GOOD)

#define restrict __restrict__

typedef uint32_t chunk_t;

typedef struct {
    chunk_t dictionary[1 << 16][2];
    chunk_t predictions[1 << 16];
    chunk_t smallDictionary[1 << 10];
    uint32_t lastHash;
#ifndef NDEBUG
    bool initialized;
#endif
} workset_t;

int err(char const * const message) {
    fputs(message, stderr);
    fputs("\n", stderr);
    return EXIT_FAILURE;
}

size_t fgetd(uint32_t * const value) {
    return fread_unlocked(value, sizeof (uint32_t), 1, stdin);
}

size_t fputd(uint32_t const value) {
    return fwrite_unlocked(&value, sizeof (uint32_t), 1, stdout);
}

size_t loadBlock(uint8_t * const buffer, size_t const toLoad) {
    size_t bufferLoaded = 0;
    while (bufferLoaded < toLoad && feof_unlocked(stdin) == 0
            && ferror_unlocked(stdin) == 0) {
        size_t const loaded = fread_unlocked(buffer + bufferLoaded,
                sizeof (uint8_t), toLoad - bufferLoaded, stdin);
        bufferLoaded += loaded;
    }
    return bufferLoaded;
}

uint32_t hashF(uint32_t const value) {
    return value * 123456791u >> 16;
}

uint32_t reduceHash(uint32_t const bigHash) {
    return bigHash >> 6;
}

int32_t compressBlockFast(chunk_t const * const inputChunksBuffer,
        int32_t const inputChunksNumber, chunk_t * const signaturesBuffer,
        uint8_t * const codeStreamBuffer, workset_t * const workset) {
    int32_t codeStreamIndex = 0;
    int32_t chunkNumber = 0;
    int32_t signatureNumber = 0;
    while (chunkNumber < inputChunksNumber) {
        chunk_t signature = 0;
        int32_t lastChunkInSlice = std::min(inputChunksNumber,
                chunkNumber + CHUNKS_PER_SIGNATURE_FAST);
        while (chunkNumber < lastChunkInSlice) {
            chunk_t const chunk = inputChunksBuffer[chunkNumber++];
            uint32_t const hash = hashF(chunk);
            chunk_t * const prediction = workset->predictions
                    + workset->lastHash;
            uint32_t selector = 3;
            if (*prediction != chunk) {
                chunk_t * const found = workset->dictionary[hash];
                int8_t const equal_0 = found[0] == chunk;
                int8_t const equal_1 = found[1] == chunk;
                int8_t const equal_any = equal_0 || equal_1;
                selector = equal_0 * 2 + equal_1;
                found[1] = found[equal_0];
                found[0] = chunk;
                uint32_t const code = equal_any ? hash : chunk;
                *(uint32_t *) (codeStreamBuffer + codeStreamIndex) = code;
                codeStreamIndex += equal_any ? 2 : 4;
                *prediction = chunk;
            }
            workset->lastHash = hash;
            signature <<= TAG_SIZE_FAST;
            signature |= selector;
        }
        if (chunkNumber == inputChunksNumber) {
            int32_t const tagsInSignature = chunkNumber
                    % CHUNKS_PER_SIGNATURE_FAST;
            if (tagsInSignature != 0) {
                for (int32_t i = tagsInSignature; i < CHUNKS_PER_SIGNATURE_FAST;
                        i++) {
                    signature <<= TAG_SIZE_FAST;
                }
            }
        }
        signaturesBuffer[signatureNumber++] = signature;
    }
    assert(chunkNumber == inputChunksNumber);
    assert(signatureNumber == (inputChunksNumber + CHUNKS_PER_SIGNATURE_FAST
            - 1) / CHUNKS_PER_SIGNATURE_FAST);
    assert(codeStreamIndex >= 0);
    assert(codeStreamIndex <= inputChunksNumber * sizeof (chunk_t));
    return codeStreamIndex;
}

int32_t compressBlockGood(chunk_t const * const inputChunksBuffer,
        int32_t const inputChunksNumber, chunk_t * const signaturesBuffer,
        uint8_t * const codeStreamBuffer, workset_t * const workset) {
    int32_t codeStreamIndex = 0;
    int32_t chunkNumber = 0;
    int32_t signatureNumber = 0;
    while (chunkNumber < inputChunksNumber) {
        chunk_t signature = 0;
        int32_t lastChunkInSlice = std::min(inputChunksNumber,
                chunkNumber + CHUNKS_PER_SIGNATURE_GOOD);
        while (chunkNumber < lastChunkInSlice) {
            chunk_t const chunk = inputChunksBuffer[chunkNumber++];
            uint32_t const hash = hashF(chunk);
            chunk_t * const prediction = workset->predictions
                    + workset->lastHash;
            uint32_t selector = 7;
            if (*prediction != chunk) {
                uint32_t const hashSmall = reduceHash(hash);
                chunk_t * const foundSmall = workset->smallDictionary
                        + hashSmall;
                int8_t const equalSmall = *foundSmall == chunk;
                if (equalSmall) {
                    selector = hashSmall >> 8;
                    uint8_t const code = hashSmall;
                    codeStreamBuffer[codeStreamIndex++] = code;
                } else {
                    chunk_t * const found = workset->dictionary[hash];
                    int8_t const equal_0 = found[0] == chunk;
                    int8_t const equal_1 = found[1] == chunk;
                    int8_t const equal_any = equal_0 || equal_1;
                    *foundSmall = equal_0 ? chunk : *foundSmall;
                    selector = 4 + equal_0 * 2 + equal_1;
                    found[1] = found[equal_0];
                    found[0] = chunk;
                    uint32_t const code = equal_any ? hash : chunk;
                    *(uint32_t *) (codeStreamBuffer + codeStreamIndex) = code;
                    codeStreamIndex += equal_any ? 2 : 4;
                }
                *prediction = chunk;
            }
            workset->lastHash = hash;
            signature <<= TAG_SIZE_GOOD;
            signature |= selector;
        }
        if (chunkNumber == inputChunksNumber) {
            int32_t const tagsInSignature = chunkNumber
                    % CHUNKS_PER_SIGNATURE_GOOD;
            if (tagsInSignature != 0) {
                for (int32_t i = tagsInSignature; i < CHUNKS_PER_SIGNATURE_GOOD;
                        i++) {
                    signature <<= TAG_SIZE_GOOD;
                }
            }
        }
        signaturesBuffer[signatureNumber++] = signature;
    }
    assert(chunkNumber == inputChunksNumber);
    assert(signatureNumber == (inputChunksNumber + CHUNKS_PER_SIGNATURE_GOOD
            - 1) / CHUNKS_PER_SIGNATURE_GOOD);
    assert(codeStreamIndex >= 0);
    assert(codeStreamIndex <= inputChunksNumber * sizeof (chunk_t));
    return codeStreamIndex;
}

int compress(bool const fastMode) {
    fputc_unlocked(fastMode ? FAST_MODE_MARKER : GOOD_MODE_MARKER, stdout);

    uint8_t * const inputBuffer = new uint8_t[MAX_BLOCK_SIZE
            + sizeof (chunk_t)];
    chunk_t * inputChunksBuffers[3] = {nullptr, nullptr, nullptr};
    chunk_t * signaturesBuffers[3] = {nullptr, nullptr, nullptr};
    uint8_t * codeStreamBuffers[3] = {nullptr, nullptr, nullptr};
    workset_t * worksets[3] = {nullptr, nullptr, nullptr};
    uint32_t inputChunksQuantities[3];
    uint32_t signaturesQuantities[3];
    uint32_t codeStreamsLengths[3];
    inputChunksBuffers[0] = (chunk_t *) inputBuffer;

    bool firstBlock = true;
    bool fullBlock;
    do {
        size_t const inputBufferLoaded = loadBlock(inputBuffer, MAX_BLOCK_SIZE);
        fullBlock = inputBufferLoaded == MAX_BLOCK_SIZE;
        bool const onlyBlock = firstBlock && !fullBlock;

        int32_t const iterations = fastMode ? 2 : 3;
        for (int32_t iteration = 1; iteration <= iterations; iteration++) {
            if (iteration == 1) {
                inputChunksQuantities[0] = (inputBufferLoaded + sizeof (chunk_t)
                        - 1) / sizeof (chunk_t);
            } else {
                uint32_t const chunksPerSignature = (iteration != 2 || fastMode)
                        ? CHUNKS_PER_SIGNATURE_FAST : CHUNKS_PER_SIGNATURE_GOOD;
                inputChunksQuantities[iteration - 1] =
                        (inputChunksQuantities[iteration - 2]
                        + chunksPerSignature - 1) / chunksPerSignature;
            }
            {
                uint32_t const chunksPerSignature = (iteration != 1 || fastMode)
                        ? CHUNKS_PER_SIGNATURE_FAST : CHUNKS_PER_SIGNATURE_GOOD;
                signaturesQuantities[iteration - 1] =
                        (inputChunksQuantities[iteration - 1]
                        + chunksPerSignature - 1) / chunksPerSignature;
            }

            if (firstBlock) {
                if (iteration > 1) {
                    inputChunksBuffers[iteration - 1] =
                            signaturesBuffers[iteration - 2];
                }
                signaturesBuffers[iteration - 1] =
                        new chunk_t[signaturesQuantities[iteration - 1]];
                codeStreamBuffers[iteration - 1] =
                        new uint8_t[(inputChunksQuantities[iteration - 1] + 1)
                        * sizeof (chunk_t)];
                if (onlyBlock && iteration > 1) {
                    worksets[iteration - 1] = worksets[iteration - 2];
                } else {
                    worksets[iteration - 1] = new workset_t;
                }
                workset_t * const workset = worksets[iteration - 1];
                memset(workset->dictionary, 0, sizeof (workset->dictionary));
                memset(workset->predictions, 0, sizeof (workset->predictions));
                memset(workset->smallDictionary, 0,
                        sizeof (workset->smallDictionary));
                workset->lastHash = 0;
#ifndef NDEBUG
                workset->initialized = true;
#endif
            }

#ifndef NDEBUG
            assert(worksets[iteration - 1]->initialized);
#endif
            codeStreamsLengths[iteration - 1] = (iteration != 1 || fastMode)
                    ? compressBlockFast(inputChunksBuffers[iteration - 1],
                    inputChunksQuantities[iteration - 1],
                    signaturesBuffers[iteration - 1],
                    codeStreamBuffers[iteration - 1], worksets[iteration - 1])
                    : compressBlockGood(inputChunksBuffers[iteration - 1],
                    inputChunksQuantities[iteration - 1],
                    signaturesBuffers[iteration - 1],
                    codeStreamBuffers[iteration - 1], worksets[iteration - 1]);

        }

        int32_t bestNumberOfIterations = 0;
        {
            uint32_t bestTotalSize = inputChunksQuantities[0]
                    * sizeof (chunk_t);
            for (int32_t iteration = 1; iteration <= iterations; iteration++) {
                uint32_t totalSize = signaturesQuantities[iteration - 1]
                        * sizeof (chunk_t);
                for (int32_t iteration2 = 1; iteration2 <= iteration;
                        iteration2++) {
                    totalSize += codeStreamsLengths[iteration2 - 1];
                }
                if (totalSize < bestTotalSize) {
                    bestNumberOfIterations = iteration;
                    bestTotalSize = totalSize;
                }
            }
        }

        fputc_unlocked(bestNumberOfIterations, stdout);
        fputd(inputChunksQuantities[0]);

        if (bestNumberOfIterations == 0) {
            fwrite_unlocked(inputChunksBuffers[0], sizeof (chunk_t),
                    inputChunksQuantities[0], stdout);
        } else {
            fwrite_unlocked(signaturesBuffers[bestNumberOfIterations - 1],
                    sizeof (chunk_t), signaturesQuantities[
                    bestNumberOfIterations - 1], stdout);
            for (int32_t iteration = bestNumberOfIterations; iteration >= 1;
                    iteration--) {
                fputd(codeStreamsLengths[iteration - 1]);
                fwrite_unlocked(codeStreamBuffers[iteration - 1],
                        sizeof (uint8_t), codeStreamsLengths[iteration - 1],
                        stdout);
            }
        }

        if (!fullBlock) {
            size_t const remainingsLength = inputBufferLoaded
                    % sizeof (chunk_t);
            fputc_unlocked(remainingsLength, stdout);
            fwrite_unlocked(inputBuffer + inputBufferLoaded - remainingsLength,
                    sizeof (uint8_t), remainingsLength, stdout);
        }

        firstBlock = false;
    } while (fullBlock);

    return EXIT_SUCCESS;
}

int32_t decompressBlockFast(chunk_t const * const signaturesBuffer,
        uint8_t const * const codeStreamBuffer, workset_t * const workset,
        chunk_t * const outputChunksBuffer, int32_t const outputChunksNumber) {
    int32_t codeStreamIndex = 0;
    int32_t chunkNumber = 0;
    int32_t signatureNumber = 0;
    while (chunkNumber < outputChunksNumber) {
        chunk_t signature = signaturesBuffer[signatureNumber++];
        int32_t lastChunkInSlice = std::min(outputChunksNumber,
                chunkNumber + CHUNKS_PER_SIGNATURE_FAST);
        while (chunkNumber < lastChunkInSlice) {
            uint32_t const selector = signature >> 30;
            if (selector == 3) {
                chunk_t const chunk = workset->predictions[workset->lastHash];
                outputChunksBuffer[chunkNumber++] = chunk;
                workset->lastHash = hashF(chunk);
            } else if (selector) {
                uint32_t const hash = *(uint16_t*) (codeStreamBuffer
                        + codeStreamIndex);
                codeStreamIndex += 2;
                chunk_t * const found = workset->dictionary[hash];
                chunk_t const chunk = found[selector & 1];
                uint8_t const equal_0 = selector >> 1;
                found[1] = found[equal_0];
                found[0] = chunk;
                outputChunksBuffer[chunkNumber++] = chunk;
                workset->predictions[workset->lastHash] = chunk;
                workset->lastHash = hash;
            } else {
                chunk_t const chunk = *(chunk_t*) (codeStreamBuffer
                        + codeStreamIndex);
                codeStreamIndex += 4;
                outputChunksBuffer[chunkNumber++] = chunk;
                uint32_t const hash = hashF(chunk);
                chunk_t * const found = workset->dictionary[hash];
                found[1] = found[0];
                found[0] = chunk;
                workset->predictions[workset->lastHash] = chunk;
                workset->lastHash = hash;
            }
            signature <<= 2;
        }
    }
    assert(chunkNumber == outputChunksNumber);
    assert(signatureNumber == (outputChunksNumber + CHUNKS_PER_SIGNATURE_FAST
            - 1) / CHUNKS_PER_SIGNATURE_FAST);
    assert(codeStreamIndex >= 0);
    assert(codeStreamIndex <= outputChunksNumber * sizeof (chunk_t));
    return codeStreamIndex;
}

int32_t decompressBlockGood(chunk_t const * const signaturesBuffer,
        uint8_t const * const codeStreamBuffer, workset_t * const workset,
        chunk_t * const outputChunksBuffer, int32_t const outputChunksNumber) {
    int32_t codeStreamIndex = 0;
    int32_t chunkNumber = 0;
    int32_t signatureNumber = 0;
    while (chunkNumber < outputChunksNumber) {
        chunk_t signature = signaturesBuffer[signatureNumber++] << 2;
        int32_t lastChunkInSlice = std::min(outputChunksNumber,
                chunkNumber + CHUNKS_PER_SIGNATURE_GOOD);
        while (chunkNumber < lastChunkInSlice) {
            uint32_t const selector = signature >> 29;
            if (selector == 7) {
                chunk_t const chunk = workset->predictions[workset->lastHash];
                outputChunksBuffer[chunkNumber++] = chunk;
                workset->lastHash = hashF(chunk);
            } else if (selector < 4) {
                uint32_t const hashSmall = (selector << 8)
                        | codeStreamBuffer[codeStreamIndex];
                codeStreamIndex++;
                chunk_t const chunk = workset->smallDictionary[hashSmall];
                outputChunksBuffer[chunkNumber++] = chunk;
                workset->predictions[workset->lastHash] = chunk;
                workset->lastHash = hashF(chunk);
            } else if (selector & 3) {
                uint32_t const hash = *(uint16_t*) (codeStreamBuffer
                        + codeStreamIndex);
                codeStreamIndex += 2;
                chunk_t * const found = workset->dictionary[hash];
                chunk_t const chunk = found[selector & 1];
                uint8_t const equal_0 = !(selector & 1);
                found[1] = found[equal_0];
                found[0] = chunk;
                uint32_t const hashSmall = reduceHash(hash);
                workset->smallDictionary[hashSmall] = equal_0 ? chunk
                        : workset->smallDictionary[hashSmall];
                outputChunksBuffer[chunkNumber++] = chunk;
                workset->predictions[workset->lastHash] = chunk;
                workset->lastHash = hash;
            } else {
                chunk_t const chunk = *(chunk_t*) (codeStreamBuffer
                        + codeStreamIndex);
                codeStreamIndex += 4;
                outputChunksBuffer[chunkNumber++] = chunk;
                uint32_t const hash = hashF(chunk);
                chunk_t * const found = workset->dictionary[hash];
                found[1] = found[0];
                found[0] = chunk;
                workset->predictions[workset->lastHash] = chunk;
                workset->lastHash = hash;
            }
            signature <<= 3;
        }
    }
    assert(chunkNumber == outputChunksNumber);
    assert(signatureNumber == (outputChunksNumber + CHUNKS_PER_SIGNATURE_GOOD
            - 1) / CHUNKS_PER_SIGNATURE_GOOD);
    assert(codeStreamIndex >= 0);
    assert(codeStreamIndex <= outputChunksNumber * sizeof (chunk_t));
    return codeStreamIndex;
}

int decompress() {
    int32_t const modeMarker = fgetc_unlocked(stdin);
    if (modeMarker != FAST_MODE_MARKER && modeMarker != GOOD_MODE_MARKER) {
        return err("Invalid mode marker.");
    }
    bool const fastMode = modeMarker == FAST_MODE_MARKER;

    uint8_t * const outputBuffer = new uint8_t[MAX_BLOCK_SIZE
            + sizeof (chunk_t)];
    chunk_t * outputChunksBuffers[3] = {nullptr, nullptr, nullptr};
    chunk_t * signaturesBuffers[3] = {nullptr, nullptr, nullptr};
    uint8_t * codeStreamBuffers[3] = {nullptr, nullptr, nullptr};
    workset_t * worksets[3] = {nullptr, nullptr, nullptr};
    uint32_t outputChunksQuantities[3];
    uint32_t signaturesQuantities[3];
    uint32_t codeStreamsLengths[3];
    outputChunksBuffers[0] = (chunk_t *) outputBuffer;

    bool firstBlock = true;
    bool fullBlock;
    do {
        int32_t const maxIterations = fastMode ? 2 : 3;
        int32_t const iterations = fgetc_unlocked(stdin);
        assert(iterations >= 0);
        assert(iterations <= maxIterations);
        //fprintf(stderr, "%d\n", iterations);
        fgetd(outputChunksQuantities + 0);
        fullBlock = outputChunksQuantities[0] * sizeof (chunk_t)
                == MAX_BLOCK_SIZE;
        bool const onlyBlock = firstBlock && !fullBlock;

        if (!fullBlock && iterations == 0) {
            loadBlock((uint8_t*) outputChunksBuffers[0],
                    outputChunksQuantities[0] * sizeof (chunk_t));
        } else {
            for (int32_t iteration = 1; iteration <= maxIterations;
                    iteration++) {
                if (iteration > 1) {
                    uint32_t const chunksPerSignature = (iteration != 2
                            || fastMode) ? CHUNKS_PER_SIGNATURE_FAST
                            : CHUNKS_PER_SIGNATURE_GOOD;
                    outputChunksQuantities[iteration - 1] =
                            (outputChunksQuantities[iteration - 2]
                            + chunksPerSignature - 1) / chunksPerSignature;
                }
                {
                    uint32_t const chunksPerSignature = (iteration != 1
                            || fastMode) ? CHUNKS_PER_SIGNATURE_FAST
                            : CHUNKS_PER_SIGNATURE_GOOD;
                    signaturesQuantities[iteration - 1] =
                            (outputChunksQuantities[iteration - 1]
                            + chunksPerSignature - 1) / chunksPerSignature;
                }
            }

            if (firstBlock) {
                int32_t const allocatedIterations = fullBlock
                        ? maxIterations : iterations;
                for (int32_t iteration = 1; iteration <= allocatedIterations;
                        iteration++) {
                    if (iteration > 1) {
                        outputChunksBuffers[iteration - 1] =
                                signaturesBuffers[iteration - 2];
                    }
                    signaturesBuffers[iteration - 1] =
                            new chunk_t[signaturesQuantities[iteration - 1]];
                    codeStreamBuffers[iteration - 1] =
                            new uint8_t[(outputChunksQuantities[iteration - 1]
                            + 1) * sizeof (chunk_t)];
                    if (onlyBlock && iteration > 1) {
                        worksets[iteration - 1] = worksets[iteration - 2];
                    } else {
                        worksets[iteration - 1] = new workset_t;
                    }
#ifndef NDEBUG
                    worksets[iteration - 1]->initialized = false;
#endif
                }
            }

            if (iterations == 0) {
                fread_unlocked(outputChunksBuffers[0], sizeof (chunk_t),
                        outputChunksQuantities[0], stdin);
            } else {
                for (int32_t iteration = iterations; iteration >= 1;
                        iteration--) {
                    if (iteration == iterations) {
                        fread_unlocked(signaturesBuffers[iteration - 1],
                                sizeof (chunk_t),
                                signaturesQuantities[iteration - 1], stdin);
                    }
                    fgetd(codeStreamsLengths + iteration - 1);
                    fread_unlocked(codeStreamBuffers[iteration - 1],
                            sizeof (uint8_t), codeStreamsLengths[iteration - 1],
                            stdin);
                    if (firstBlock) {
                        workset_t * const workset = worksets[iteration - 1];
                        memset(workset->dictionary, 0,
                                sizeof (workset->dictionary));
                        memset(workset->predictions, 0,
                                sizeof (workset->predictions));
                        memset(workset->smallDictionary, 0,
                                sizeof (workset->smallDictionary));
                        workset->lastHash = 0;
#ifndef NDEBUG
                        workset->initialized = true;
#endif
                    }
                    int32_t decodedCodeStreamLength;
#ifndef NDEBUG
                    assert(worksets[iteration - 1]->initialized);
#endif
                    if (iteration != 1 || fastMode) {
                        decodedCodeStreamLength = decompressBlockFast(
                                signaturesBuffers[iteration - 1],
                                codeStreamBuffers[iteration - 1],
                                worksets[iteration - 1],
                                outputChunksBuffers[iteration - 1],
                                outputChunksQuantities[iteration - 1]);
                    } else {
                        decodedCodeStreamLength = decompressBlockGood(
                                signaturesBuffers[iteration - 1],
                                codeStreamBuffers[iteration - 1],
                                worksets[iteration - 1],
                                outputChunksBuffers[iteration - 1],
                                outputChunksQuantities[iteration - 1]);
                    }
                    if (decodedCodeStreamLength
                            != codeStreamsLengths[iteration - 1]) {
                        fprintf(stderr, "%d %d\n", decodedCodeStreamLength,
                                codeStreamsLengths[iteration - 1]);
                    }
                    assert(decodedCodeStreamLength
                            == codeStreamsLengths[iteration - 1]);
                }
            }

            if (fullBlock) {
                for (int32_t iteration = iterations + 1; iteration
                        <= maxIterations; iteration++) {
                    if (firstBlock) {
                        workset_t * const workset = worksets[iteration - 1];
                        memset(workset->dictionary, 0,
                                sizeof (workset->dictionary));
                        memset(workset->predictions, 0,
                                sizeof (workset->predictions));
                        memset(workset->smallDictionary, 0,
                                sizeof (workset->smallDictionary));
                        workset->lastHash = 0;
#ifndef NDEBUG
                        workset->initialized = true;
#endif
                    }
#ifndef NDEBUG
                    assert(worksets[iteration - 1]->initialized);
#endif
                    if (iteration != 1 || fastMode) {
                        compressBlockFast(outputChunksBuffers[iteration - 1],
                                outputChunksQuantities[iteration - 1],
                                signaturesBuffers[iteration - 1],
                                codeStreamBuffers[iteration - 1],
                                worksets[iteration - 1]);
                    } else {
                        compressBlockGood(outputChunksBuffers[iteration - 1],
                                outputChunksQuantities[iteration - 1],
                                signaturesBuffers[iteration - 1],
                                codeStreamBuffers[iteration - 1],
                                worksets[iteration - 1]);
                    }
                }
            }
        }

        fwrite_unlocked(outputChunksBuffers[0], sizeof (chunk_t),
                outputChunksQuantities[0], stdout);

        if (!fullBlock) {
            size_t const remainingsLength = fgetc(stdin);
            uint8_t smallBuffer[4];
            fread_unlocked(smallBuffer, sizeof (uint8_t), remainingsLength,
                    stdin);
            fwrite_unlocked(smallBuffer, sizeof (uint8_t), remainingsLength,
                    stdout);
        }

        firstBlock = false;
    } while (fullBlock);

    return EXIT_SUCCESS;
}

int main(int const argc, char const * const * const argv) {
    err("Chunker -- fast compression utility");
    err("Author: Piotr Tarsa ( http://github.com/tarsa )");
    err("Release date: January, 2013");
    err("");

    if (argc != 2 || (strcmp(argv[1], "cf") != 0 && strcmp(argv[1], "cx") != 0
            && strcmp(argv[1], "d") != 0)) {
        err("Usage: <program name> [cf|cx|d] < input > output");
        return EXIT_FAILURE;
    }

    int result;
    if (strcmp(argv[1], "d") == 0) {
        result = decompress();
    } else {
        result = compress(strcmp(argv[1], "cf") == 0);
    }
    err("Done.");
    return result;
}

Szkopuł w tym, że jak wyłączę "rekurencyjną" kompresję sygnatur, podmieniając w metodzie compress linijkę:
int32_t const iterations = fastMode ? 2 : 3;
na:
int32_t const iterations = fastMode ? 1 : 1;
To wtedy hula.

Próbowałem działać Valgrindem i nie wykrył żadnych problemów. Dodałem trochę assertów i puściłem - one też generalnie nie pomogły wiele, bo dostaję tylko że (przy próbie na pliku enwik9 stąd: http://mattmahoney.net/dc/textdata.html ):
chunker: chunker.cpp:588: int decompress(): Assertion decodedCodeStreamLength == codeStreamsLengths[iteration - 1]' failed.`

Kod mam zamiar opublikować (na GitHubie). Jak ktoś znajdzie krytycznego buga to w nagrodę dam mu creditsa, tzn napiszę w komentarzu u góry kodu, że znalazł buga :)

Kompresja działa w taki sposób, że strumień jest dzielony na bloki. Dla każdego bloku po kompresji powstają dwa bloki wyjściowe: blok sygnatur i blok innych danych (w kodzie nazwany codeStream). Jeśli iterations jest większe od 1 to blok sygnatur jest kompresowany rekurencyjnie. I właśnie ta kompresja rekurencyjna nie działa dobrze.

Jak algorytm działa na niskim poziomie? Dane są dzielone na 4-bajtowe kawałki, następnie hashowane. Jest jeden lub dwa słowniki wielopoziomowe (tzn dla danego hasha jest cały zbiór elementów w słowniku) oraz tablica predykcji kawałków (która podaje po prostu następny kawałek z poprzedniego wystąpienia aktualnie przerabianego kawałka). Sygnatura składa się z tagów, gdzie każdy tag odpowiada jednemu kawałkowi i opisuje czy kawałek został znaleziony w słowniku (a jeśli tak to w którym słowniku i którym elemencie w zbiorze), tabeli predykcji albo nigdzie (brak kompresji). Do bloku codeStream idą zarówno dane nieskompresowane jak i hashe (małe i duże).

Może jest tu jakiś trywialny błąd którego nie zauważyłem. Albo może ktoś ma pomysł jak by go szybko znaleźć?