Ostatnie wpisy

#include <algorithm>
#include <cctype>
#include <fstream>
#include <iostream>
#include <list>
#include <vector>

//-------------------------------------------------------------------
char const * const inFilename = "xor2.txt";

//-------------------------------------------------------------------
namespace is_good_char_details
{
    unsigned char const polskie_iso_8859_2 [18] = { 0xa1,0xc6,0xca,0xa3,0xd1,0xd3,0xa6,0xac,0xaf,0xb1,0xe6,0xea,0xb3,0xf1,0xf3,0xb6,0xbc,0xbf, };
    unsigned char const polskie_win_1250   [18] = { 0xa5,0xc6,0xca,0xa3,0xd1,0xd3,0x8c,0x8f,0xaf,0xb9,0xe6,0xea,0xb3,0xf1,0xf3,0x9c,0x9f,0xbf, };
    unsigned char const polskie_utf8_hi    [ 2] = { 0xc4, 0xc5 };
    unsigned char const polskie_utf8_lo    [18] = { 0x84,0x86,0x98,0x81,0x83,0x93,0x9a,0xb9,0xbb,0x85,0x87,0x99,0x82,0x84,0xb3,0x9b,0xba,0xbc, };
    unsigned char const polskie_utf16_hi   [ 2] = { 0x00, 0x01 };
    unsigned char const polskie_utf16_lo   [18] = { 0x04,0x06,0x18,0x41,0x43,0xd3,0x5a,0x79,0x7b,0x05,0x07,0x19,0x42,0x44,0xf3,0x5b,0x7a,0x7c, };

    unsigned char goodchar[256];

    void init()
    {
        // assume nothing:
        // any reasonable charset is possible
        for(int i=0;i<256;i++) goodchar[i]= isprint(i) || isspace(i);
        for(int i=0;i<18;i++) goodchar[polskie_win_1250[i]]=1;
        for(int i=0;i<18;i++) goodchar[polskie_iso_8859_2[i]]=1;
        for(int i=0;i<18;i++) goodchar[polskie_utf8_lo[i]]=1;
        for(int i=0;i<2;i++) goodchar[polskie_utf8_hi[i]]=1;
        for(int i=0;i<18;i++) goodchar[polskie_utf16_lo[i]]=1;
        for(int i=0;i<2;i++) goodchar[polskie_utf16_hi[i]]=1;
    }
}

bool is_good_char(unsigned char c) { return is_good_char_details::goodchar[c]; }

//-------------------------------------------------------------------
typedef std::basic_ifstream<unsigned char> u_ifstream;

typedef std::vector<unsigned char> byteary;
typedef byteary::const_iterator byteary_cit;

typedef std::pair<size_t, size_t> range;
typedef std::list<range> ranges;

typedef std::list<unsigned char> bytelist;
typedef bytelist::const_iterator bytelist_cit;

typedef std::vector<bytelist> bytelists;
typedef bytelists::const_iterator bytelists_cit;

//-------------------------------------------------------------------
// checks whever all bytes included in range could possibly be a characters
// END must satisfy conventional lastpos+1 assumption
bool checkAllGood(byteary_cit begin, byteary_cit end)
{
    while(begin != end)
        if(!is_good_char(*begin++))
            return false;
    return true;
}

// checks if the list of possible bytes at each key's position does
// actually form at least one possible key
bool checkKeyPossible(bytelists const & key)
{
    for(bytelists_cit it = key.begin(); it != key.end(); ++it)
        if(!it->size())
            // if for ANY key index there is NO possible bytes
            // - no such key can exist
            return false;
    return true;
}

// returns set of l-r both inclusive ranges of the input
// that does not contain any 0x0D0A sequences
// END must satisfy conventional lastpos+1 assumption
ranges findSafeRanges(byteary_cit begin, byteary_cit end)
{
    ranges result;

    byteary_cit range_start = begin;
    for(byteary_cit it = begin; it != end && it+1 != end; ++it)
        if(*it == 0x0d && *(it+1) == 0x0a)
        {
            result.push_back(std::make_pair(range_start - begin, (it - begin) - 1)); // -1 makes the range right-inclusive
            it += 2; // skip the sequence
            range_start = it; // next range may start after the 0x0D0A pair just been found

            --it; // dirty fix to account the autoincrement from the loop
        }

    if(range_start != end) // was there an open range at the end of the input?
        result.push_back(std::make_pair(range_start - begin, end - begin));

    return result;
}

// removes from given set of byte ranges all those,
// which length is smaller than the specified
void trimSmallRanges(ranges & rngs, size_t minlength)
{
    ranges::const_iterator it = rngs.begin();
    while(it != rngs.end())
        if(it->second - it->first < minlength)
            rngs.erase(it++);
        else
            ++it;
}

// for a range of encrypted input bytes, assuming some length of the encrypted key,
// test every byte of the input and check what could be the value of corresponding key byte
// (what key-value could had been used to generate such encoded-byte)
// returns a set (keylength long) of lists of values, each single list in set for each key position
// resulting lists are sorted in ascending order
bytelists findKeyPossb(byteary_cit i_begin, range const & range, size_t keyLength)
{
    // how many blocks of length equal to the keyLength would fit into
    // the given range?
    const size_t blockCount = (range.second - range.first + 1) / keyLength;

    // mind, that this function is NOT optimized in ANY way and it ONLY does scan whole-fitting subblocks
    // any remaining bytes after the last whole subblock are not checked for the sake of simplicity

    bytelists possibleKeyBytes(keyLength); // prepare keyLength lists

    for(size_t keyIdx=0; keyIdx < keyLength; ++keyIdx) // for each key position == equal to in-subblock position
        for(int possibleByte = 0; possibleByte < 256 /*byte max*/; ++possibleByte) // for each possible 'value'
        {
            bool thisByteSeemsOk = true;
            unsigned char psbByte = (unsigned char)possibleByte;
            for(size_t blockNumber = 0; thisByteSeemsOk && blockNumber < blockCount; ++blockNumber) // for each sub-block
            {
                size_t idx = keyIdx + blockNumber*keyLength;
                unsigned char c = *(i_begin+idx) ^ psbByte; // extract input byte, try decrypting with 'value'
                bool test = !!is_good_char(c); // if such key 'value' for this position and in this block
                thisByteSeemsOk &= test; // is INVALID - does NOT produce a valid clear text, FAIL
            }
            if(thisByteSeemsOk) // if this key-value has NOT FAILED for ANY blocks, store it as possible key value!
                possibleKeyBytes[keyIdx].push_back(psbByte);
        }

    return possibleKeyBytes;
}

// assuming the lists elements are ascdending, produces intersection of the two lists;
// resulting list consists only of values contained in both given lists
bytelist intersect(bytelist const & target, bytelist const & mask)
{
    bytelist result;

    bytelist_cit it1 = target.begin(), it2 = mask.begin();
    while(it1 != target.end() && it2 != mask.end())
    {
        unsigned char v1 = *it1, v2 = *it2;
        if(v1 > v2) ++it2;
        else if(v1==v2){result.push_back(v1);++it1;++it2;}
        else ++it1;
    }

    return result;
}

// takes two sets of key's possible values and intersects them, keyindex-vise;
// produces a set that will contain only possibilities common to both the left and right sets;
// for each index of the key, the result is list of common possible values at that index;
// function assumes that the SECOND set of possibilities has its indexes positively and circularily offsetted modulo keylength;
// that is,
//   for left set {ABCDE}, right set {01234} and offset 0, the function will generate: {A&0, B&1, C&2, D&3, E&4}
//   for left set {ABCDE}, right set {01234} and offset 2, the function will generate: {A&2, B&3, C&4, D&0, E&1}
//   where A/B/C/D/E/0/1/2/3/4 denote some bytelist contained in the sets
//   where & denotes a bytelist intersection
// slightly optimized: if it detects that any intersection produces empty result, stops and returns an empty response
// example:
//   left set: {[3,6], [1,2], [5,4]}, right set: {[4,6], [3], [1,7]}, offset: 1 --> result: {[3], [1], [4]}
//   left set: {[3,6], [1,2], [5,7]}, right set: {[4,6], [3], [1,7]}, offset: 1 --> result: {}
bytelists crossPossbKeys(bytelists const & keyA, bytelists const & keyB, size_t crossoffset)
{
    bytelists result;

    for(size_t idx = 0; idx < keyA.size(); ++idx)
    {
        bytelist cross = intersect(keyA[idx], keyB[(idx + crossoffset) % keyB.size()]);
        if(!cross.size()) { result.clear(); break; }// detect and mark failures early
        result.push_back(cross);
    }

    if(!result.size())
        result.push_back(bytelist());

    return result;
}

// playground
int main()
{
    using namespace std;

    is_good_char_details::init(); // assumes nothing: any reasonable (printable, polish) charset is possible

    byteary fileBytes;
    // read the input (encrypted) file
    {
        u_ifstream f(inFilename, ifstream::ate | ifstream::binary);
        fileBytes.resize(f.tellg());
        f.seekg(0);
        f.read( &fileBytes[0], fileBytes.size());
    }

    // determine parts of the input that are safe for analysis
    ranges rngs = findSafeRanges(fileBytes.begin(), fileBytes.end());
    
    // remove those shorter than a doubled key length, as they are harder to analyze
    // actually the file is quite long, so maybe even three or four lengths;
    // assume the key was 256 bytes long
    trimSmallRanges(rngs, 256*3);
    
    // FOR THE XOR2.TXT, only 6 ranges are longer-equal to that threshold

    ranges::const_iterator it = rngs.begin();
    bytelists a = findKeyPossb(fileBytes.begin(), *it++, 256); // now, playing with those ranges A-F,
    bytelists b = findKeyPossb(fileBytes.begin(), *it++, 256); // calculate what could have been
    bytelists c = findKeyPossb(fileBytes.begin(), *it++, 256); // the key values used to generate them
    bytelists d = findKeyPossb(fileBytes.begin(), *it++, 256); // from the cleartext (characterized by 'is_good_char')
    bytelists e = findKeyPossb(fileBytes.begin(), *it++, 256); // encrypted by a simple XOR (with key of 256 bytes)
    bytelists f = findKeyPossb(fileBytes.begin(), *it++, 256); // 
    
    if(!checkKeyPossible(a) || !checkKeyPossible(b) || !checkKeyPossible(c) || !checkKeyPossible(d) || !checkKeyPossible(e) || !checkKeyPossible(f))
    {
        // if ANY of the blocks happened to be non-generatible
        cout << "FAIL" << endl; // oh well.. not a XOR? wrong key length? wrong charset in is_good_char?
        return -1;
    }

    // FOR XOR2.TXT
    // it happens that:
    {
        bool ab = a == b; // true
        bool ac = a == c; // true
        bool ad = a == d; // FALSE
        bool ae = a == e; // FALSE
        bool af = a == f; // true
        bool de = d == e; // TRUE
    }
    // ranges A,B,C,F and D,E generate completely the same key possibilities
    // thats a surprise, not even an offset between them?

    // anyways, no point in intersecting possibilities that are equal
    // from the original 6, we are left with 2

    // remaining two might be offsetted in respect to each other
    // check what offsets are possible;
    // that is, assuming which offsets, an intersection of these possibilities
    // still leaves some valid key possibility
    for(size_t off = 0; off<256; ++off)
        if(checkKeyPossible(crossPossbKeys(a, d, off)))
            cout << off << endl;

    // and it turns out that the only possible offset is .. NO offset again
    
    // At this point I began to wonder, whever there is some subtle error
    // in the analysis. I find it quite hard to believe that all those 6 ranges
    // are perfectly aligned with each other. Unless the crypter was written/bugged
    // to do it that way, it is very improbable.

    // ok, going further:
    bytelists x = crossPossbKeys(a, d, 0x00 /*the only possible offset between them*/);

    {
        bool xa = x == a; // false
        bool xd = x == d; // true
    }
    // and it turns out that the possibilities of range A are a superset of those from D
    // so, possibilities from A may be dropped out of analysis

    // at this point you may for example write out the D, etc
    // but I don't see any point in that - it is QUITE WIDE for ALL KEY INDEXES anyways.

    int wtf = 8; wtf /= (wtf - 8); // a breakpoint! :)

    // - END --------------------------------------------------------------------------------------

    // for the result to be actualy usable, you would have to use some additional knowledge
    // about the cleartext or the cipher or the key. for example, you could trim the is_good_char
    // to accept fewer characters as valid. you could dig through more ranges that were found.
    // mind that the original INPUT FILE contained MANY MORE RANGES that were 'easily' analyzable,
    // but I have IGNORED all those smaller than 3*keylength completely intentionally.
    // AT LEAST ALL THOSE >=1xKEYLENGHT ARE USABLE in a similar ways.
    // I have NOT checked them, because I THINK that they are short, and will produce so WIDE key
    // possibilities, that it will not invalidate any further possible keys.
    // I may be wrong, weren't I so lazy, I'd check check them against all possible cross-offsets.

    // also, the code is severely underoptimized. I tried to keep it straightforward both because
    // I'm lazy, and because I wanted to show the very basics. It was meant to be an introductory
    // example, not a NSA-grade distributed breaker:) I think it should be written all the way
    // differently, but it was written as a test answering "is it really simple XOR?",
    // so it is AS IS.

    // have fun, if any
    // q.

    cin.sync();
    cin.get();
}
		
  1. Nie posiadasz obsługi JavaScript. Aby potwierdzić, że nie jesteś botem, wpisz tutaj wartość GJCKC
4programmers.net