Ostatnie wpisy

#include <algorithm>
#include <cctype>
#include <fstream>
#include <iostream>
#include <list>
#include <vector>

//-------------------------------------------------------------------
char const * const inFilename = "xor2.txt";

//-------------------------------------------------------------------
namespace is_good_char_details
{
	unsigned char const polskie_iso_8859_2 [18] = { 0xa1,0xc6,0xca,0xa3,0xd1,0xd3,0xa6,0xac,0xaf,0xb1,0xe6,0xea,0xb3,0xf1,0xf3,0xb6,0xbc,0xbf, };
	unsigned char const polskie_win_1250   [18] = { 0xa5,0xc6,0xca,0xa3,0xd1,0xd3,0x8c,0x8f,0xaf,0xb9,0xe6,0xea,0xb3,0xf1,0xf3,0x9c,0x9f,0xbf, };
	unsigned char const polskie_utf8_hi    [ 2] = { 0xc4, 0xc5 };
	unsigned char const polskie_utf8_lo    [18] = { 0x84,0x86,0x98,0x81,0x83,0x93,0x9a,0xb9,0xbb,0x85,0x87,0x99,0x82,0x84,0xb3,0x9b,0xba,0xbc, };
	unsigned char const polskie_utf16_hi   [ 2] = { 0x00, 0x01 };
	unsigned char const polskie_utf16_lo   [18] = { 0x04,0x06,0x18,0x41,0x43,0xd3,0x5a,0x79,0x7b,0x05,0x07,0x19,0x42,0x44,0xf3,0x5b,0x7a,0x7c, };

	unsigned char goodchar[256];

	void init()
	{
		// assume nothing:
		// any reasonable charset is possible
		for(int i=0;i<256;i++) goodchar[i]= isprint(i) || isspace(i);
		for(int i=0;i<18;i++) goodchar[polskie_win_1250[i]]=1;
		for(int i=0;i<18;i++) goodchar[polskie_iso_8859_2[i]]=1;
		for(int i=0;i<18;i++) goodchar[polskie_utf8_lo[i]]=1;
		for(int i=0;i<2;i++) goodchar[polskie_utf8_hi[i]]=1;
		for(int i=0;i<18;i++) goodchar[polskie_utf16_lo[i]]=1;
		for(int i=0;i<2;i++) goodchar[polskie_utf16_hi[i]]=1;
	}
}

bool is_good_char(unsigned char c) { return is_good_char_details::goodchar[c]; }

//-------------------------------------------------------------------
typedef std::basic_ifstream<unsigned char> u_ifstream;

typedef std::vector<unsigned char> byteary;
typedef byteary::const_iterator byteary_cit;

typedef std::pair<size_t, size_t> range;
typedef std::list<range> ranges;

typedef std::list<unsigned char> bytelist;
typedef bytelist::const_iterator bytelist_cit;

typedef std::vector<bytelist> bytelists;
typedef bytelists::const_iterator bytelists_cit;

//-------------------------------------------------------------------
// checks whever all bytes included in range could possibly be a characters
// END must satisfy conventional lastpos+1 assumption
bool checkAllGood(byteary_cit begin, byteary_cit end)
{
	while(begin != end)
		if(!is_good_char(*begin++))
			return false;
	return true;
}

// checks if the list of possible bytes at each key's position does
// actually form at least one possible key
bool checkKeyPossible(bytelists const & key)
{
	for(bytelists_cit it = key.begin(); it != key.end(); ++it)
		if(!it->size())
			// if for ANY key index there is NO possible bytes
			// - no such key can exist
			return false;
	return true;
}

// returns set of l-r both inclusive ranges of the input
// that does not contain any 0x0D0A sequences
// END must satisfy conventional lastpos+1 assumption
ranges findSafeRanges(byteary_cit begin, byteary_cit end)
{
	ranges result;

	byteary_cit range_start = begin;
	for(byteary_cit it = begin; it != end && it+1 != end; ++it)
		if(*it == 0x0d && *(it+1) == 0x0a)
		{
			result.push_back(std::make_pair(range_start - begin, (it - begin) - 1)); // -1 makes the range right-inclusive
			it += 2; // skip the sequence
			range_start = it; // next range may start after the 0x0D0A pair just been found

			--it; // dirty fix to account the autoincrement from the loop
		}

	if(range_start != end) // was there an open range at the end of the input?
		result.push_back(std::make_pair(range_start - begin, end - begin));

	return result;
}

// removes from given set of byte ranges all those,
// which length is smaller than the specified
void trimSmallRanges(ranges & rngs, size_t minlength)
{
	ranges::const_iterator it = rngs.begin();
	while(it != rngs.end())
		if(it->second - it->first < minlength)
			rngs.erase(it++);
		else
			++it;
}

// for a range of encrypted input bytes, assuming some length of the encrypted key,
// test every byte of the input and check what could be the value of corresponding key byte
// (what key-value could had been used to generate such encoded-byte)
// returns a set (keylength long) of lists of values, each single list in set for each key position
// resulting lists are sorted in ascending order
bytelists findKeyPossb(byteary_cit i_begin, range const & range, size_t keyLength)
{
	// how many blocks of length equal to the keyLength would fit into
	// the given range?
	const size_t blockCount = (range.second - range.first + 1) / keyLength;

	// mind, that this function is NOT optimized in ANY way and it ONLY does scan whole-fitting subblocks
	// any remaining bytes after the last whole subblock are not checked for the sake of simplicity

	bytelists possibleKeyBytes(keyLength); // prepare keyLength lists

	for(size_t keyIdx=0; keyIdx < keyLength; ++keyIdx) // for each key position == equal to in-subblock position
		for(int possibleByte = 0; possibleByte < 256 /*byte max*/; ++possibleByte) // for each possible 'value'
		{
			bool thisByteSeemsOk = true;
			unsigned char psbByte = (unsigned char)possibleByte;
			for(size_t blockNumber = 0; thisByteSeemsOk && blockNumber < blockCount; ++blockNumber) // for each sub-block
			{
				size_t idx = keyIdx + blockNumber*keyLength;
				unsigned char c = *(i_begin+idx) ^ psbByte; // extract input byte, try decrypting with 'value'
				bool test = !!is_good_char(c); // if such key 'value' for this position and in this block
				thisByteSeemsOk &= test; // is INVALID - does NOT produce a valid clear text, FAIL
			}
			if(thisByteSeemsOk) // if this key-value has NOT FAILED for ANY blocks, store it as possible key value!
				possibleKeyBytes[keyIdx].push_back(psbByte);
		}

	return possibleKeyBytes;
}

// assuming the lists elements are ascdending, produces intersection of the two lists;
// resulting list consists only of values contained in both given lists
bytelist intersect(bytelist const & target, bytelist const & mask)
{
	bytelist result;

	bytelist_cit it1 = target.begin(), it2 = mask.begin();
	while(it1 != target.end() && it2 != mask.end())
	{
		unsigned char v1 = *it1, v2 = *it2;
		if(v1 > v2) ++it2;
		else if(v1==v2){result.push_back(v1);++it1;++it2;}
		else ++it1;
	}

	return result;
}

// takes two sets of key's possible values and intersects them, keyindex-vise;
// produces a set that will contain only possibilities common to both the left and right sets;
// for each index of the key, the result is list of common possible values at that index;
// function assumes that the SECOND set of possibilities has its indexes positively and circularily offsetted modulo keylength;
// that is,
//   for left set {ABCDE}, right set {01234} and offset 0, the function will generate: {A&0, B&1, C&2, D&3, E&4}
//   for left set {ABCDE}, right set {01234} and offset 2, the function will generate: {A&2, B&3, C&4, D&0, E&1}
//   where A/B/C/D/E/0/1/2/3/4 denote some bytelist contained in the sets
//   where & denotes a bytelist intersection
// slightly optimized: if it detects that any intersection produces empty result, stops and returns an empty response
// example:
//   left set: {[3,6], [1,2], [5,4]}, right set: {[4,6], [3], [1,7]}, offset: 1 --> result: {[3], [1], [4]}
//   left set: {[3,6], [1,2], [5,7]}, right set: {[4,6], [3], [1,7]}, offset: 1 --> result: {}
bytelists crossPossbKeys(bytelists const & keyA, bytelists const & keyB, size_t crossoffset)
{
	bytelists result;

	for(size_t idx = 0; idx < keyA.size(); ++idx)
	{
		bytelist cross = intersect(keyA[idx], keyB[(idx + crossoffset) % keyB.size()]);
		if(!cross.size()) { result.clear(); break; }// detect and mark failures early
		result.push_back(cross);
	}

	if(!result.size())
		result.push_back(bytelist());

	return result;
}

// playground
int main()
{
    using namespace std;

	is_good_char_details::init(); // assumes nothing: any reasonable (printable, polish) charset is possible

    byteary fileBytes;
	// read the input (encrypted) file
    {
        u_ifstream f(inFilename, ifstream::ate | ifstream::binary);
        fileBytes.resize(f.tellg());
        f.seekg(0);
        f.read( &fileBytes[0], fileBytes.size());
    }

	// determine parts of the input that are safe for analysis
	ranges rngs = findSafeRanges(fileBytes.begin(), fileBytes.end());
	
	// remove those shorter than a doubled key length, as they are harder to analyze
	// actually the file is quite long, so maybe even three or four lengths;
	// assume the key was 256 bytes long
	trimSmallRanges(rngs, 256*3);
	
	// FOR THE XOR2.TXT, only 6 ranges are longer-equal to that threshold

	ranges::const_iterator it = rngs.begin();
	bytelists a = findKeyPossb(fileBytes.begin(), *it++, 256); // now, playing with those ranges A-F,
	bytelists b = findKeyPossb(fileBytes.begin(), *it++, 256); // calculate what could have been
	bytelists c = findKeyPossb(fileBytes.begin(), *it++, 256); // the key values used to generate them
	bytelists d = findKeyPossb(fileBytes.begin(), *it++, 256); // from the cleartext (characterized by 'is_good_char')
	bytelists e = findKeyPossb(fileBytes.begin(), *it++, 256); // encrypted by a simple XOR (with key of 256 bytes)
	bytelists f = findKeyPossb(fileBytes.begin(), *it++, 256); // 
	
	if(!checkKeyPossible(a) || !checkKeyPossible(b) || !checkKeyPossible(c) || !checkKeyPossible(d) || !checkKeyPossible(e) || !checkKeyPossible(f))
	{
		// if ANY of the blocks happened to be non-generatible
		cout << "FAIL" << endl; // oh well.. not a XOR? wrong key length? wrong charset in is_good_char?
		return -1;
	}

	// FOR XOR2.TXT
	// it happens that:
	{
		bool ab = a == b; // true
		bool ac = a == c; // true
		bool ad = a == d; // FALSE
		bool ae = a == e; // FALSE
		bool af = a == f; // true
		bool de = d == e; // TRUE
	}
	// ranges A,B,C,F and D,E generate completely the same key possibilities
	// thats a surprise, not even an offset between them?

	// anyways, no point in intersecting possibilities that are equal
	// from the original 6, we are left with 2

	// remaining two might be offsetted in respect to each other
	// check what offsets are possible;
	// that is, assuming which offsets, an intersection of these possibilities
	// still leaves some valid key possibility
	for(size_t off = 0; off<256; ++off)
		if(checkKeyPossible(crossPossbKeys(a, d, off)))
			cout << off << endl;

	// and it turns out that the only possible offset is .. NO offset again
	
	// At this point I began to wonder, whever there is some subtle error
	// in the analysis. I find it quite hard to believe that all those 6 ranges
	// are perfectly aligned with each other. Unless the crypter was written/bugged
	// to do it that way, it is very improbable.

	// ok, going further:
	bytelists x = crossPossbKeys(a, d, 0x00 /*the only possible offset between them*/);

	{
		bool xa = x == a; // false
		bool xd = x == d; // true
	}
	// and it turns out that the possibilities of range A are a superset of those from D
	// so, possibilities from A may be dropped out of analysis

	// at this point you may for example write out the D, etc
	// but I don't see any point in that - it is QUITE WIDE for ALL KEY INDEXES anyways.

	int wtf = 8; wtf /= (wtf - 8); // a breakpoint! :)

	// - END --------------------------------------------------------------------------------------

	// for the result to be actualy usable, you would have to use some additional knowledge
	// about the cleartext or the cipher or the key. for example, you could trim the is_good_char
	// to accept fewer characters as valid. you could dig through more ranges that were found.
	// mind that the original INPUT FILE contained MANY MORE RANGES that were 'easily' analyzable,
	// but I have IGNORED all those smaller than 3*keylength completely intentionally.
	// AT LEAST ALL THOSE >=1xKEYLENGHT ARE USABLE in a similar ways.
	// I have NOT checked them, because I THINK that they are short, and will produce so WIDE key
	// possibilities, that it will not invalidate any further possible keys.
	// I may be wrong, weren't I so lazy, I'd check check them against all possible cross-offsets.

	// also, the code is severely underoptimized. I tried to keep it straightforward both because
	// I'm lazy, and because I wanted to show the very basics. It was meant to be an introductory
	// example, not a NSA-grade distributed breaker:) I think it should be written all the way
	// differently, but it was written as a test answering "is it really simple XOR?",
	// so it is AS IS.

	// have fun, if any
	// q.

	cin.sync();
    cin.get();
}
		
  1. Nie posiadasz obsługi JavaScript. Aby potwierdzić, że nie jesteś botem, wpisz tutaj wartość NIGXV
4programmers.net