// http://www.team5150.com/~andrew/blog/2007/03/breaking_superfasthash.html #include #include #define SEED ( 0 ) /* A copy of SuperFastHash to ensure we're actually making duplicates */ #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) || defined(_MSC_VER) || defined (__BORLANDC__) #define get16bits(d) ( *( (const unsigned short *)( d ) ) ) #endif #if !defined(get16bits) #define get16bits(d) ((((const unsigned char *)d)[1] << 8)\ +((const unsigned char *)d)[0]) #endif unsigned int SuperFastHash( const char *data, size_t size ) { if ( !size || !data ) return ( 0 ); unsigned int hash = (unsigned int )size + SEED, tmp; size_t remainder = ( size & 3 ); size >>= ( 2 ); for ( ; size > 0; size-- ) { hash += get16bits( data ); tmp = ( ( get16bits( data + 2 ) << 11 ) ^ hash ); hash = ( ( hash << 16 ) ^ tmp ); data += ( 2 * sizeof( unsigned short ) ); hash += ( hash >> 11 ); } switch ( remainder ) { case 3: { hash += ( get16bits ( data ) ); hash ^= ( hash << 16 ); hash ^= ( data[ sizeof( unsigned short ) ] << 18 ); hash += ( hash >> 11 ); } break; case 2: { hash += ( get16bits ( data ) ); hash ^= ( hash << 11 ); hash += ( hash >> 17 ); } break; case 1: { hash += ( *data ); hash ^= ( hash << 10 ); hash += ( hash >> 1 ); } break; } hash ^= ( hash << 3 ); hash += ( hash >> 5 ); hash ^= ( hash << 4 ); hash += ( hash >> 17 ); hash ^= ( hash << 25 ); hash += ( hash >> 6 ); return ( hash ); } /* easy way to limit duplicates to certain characters / classes of characters */ class Charset { public: Charset( ) { init(); } Charset &add_symbols() { add_allowed_charset( "!@#$%^&*()_+-=[]\\{}|;',./:\"<>?~`" ); return ( *this ); } Charset &add_numeric() { add_allowed_charset( "0123456789" ); return ( *this ); } Charset &add_lowercase() { add_allowed_charset( "abcdefghijklmnopqrstuvwxyz" ); return ( *this ); } Charset &add_uppercase() { add_allowed_charset( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ); return ( *this ); } void add_all() { for ( int c = 0; c < 256; ++c ) { mAllowed[ c ] = true; mCharset[ c ] = c; } mCharacters = ( 256 ); } Charset &add_extended() { for ( int c = 128; c < 256; ++c ) { if ( !mAllowed[ c ] ) { mAllowed[ c ] = ( true ); mCharset[ mCharacters++ ] = ( c ); } } return ( *this ); } Charset &add_allowed_charset( const char *charset ) { while ( *charset ) { unsigned char c = ( (unsigned char )*charset++ ); if ( !mAllowed[ c ] ) { mAllowed[ c ] = ( true ); mCharset[ mCharacters++ ] = ( c ); } } return ( *this ); } void emit() const { printf( "%d allowed characters: ", mCharacters ); for ( int i = 0; i < mCharacters; ++i ) printf( "%c", mCharset[ i ] ); printf( "\n" ); } Charset &init( ) { memset( mAllowed, 0, sizeof( mAllowed ) ); mCharacters = ( 0 ); return ( *this ); } int count( ) const { return ( mCharacters ); } bool allowed( unsigned char c ) const { return ( mAllowed[ c ] ); } const unsigned char &operator[] ( unsigned char i ) const { return ( mCharset[ i ] ); } private: bool mAllowed[ 256 ]; unsigned char mCharset[ 256 ]; int mCharacters; }; /* attack state */ class Hsieh_Attack { public: Hsieh_Attack( ) {} void solve( const char *data, unsigned int len ) { mDuplicates = ( 0 ); // get round constants and key len if ( !init( data, len ) ) return; memset( mWorkingKey, 0, sizeof( mWorkingKey ) ); mCurrentKeyPos = ( &mWorkingKey[0] ); mKeyMax = ( mWorkingKey + mKeyLen ); mRoundIndex = ( 0 ); solve_round1( SEED + mKeyLen ); // initial hash value is length of string mCharset.emit( ); printf( "source hash %08x\n", SuperFastHash( data, len ) ); printf( "source string: \"%s\"\n", data ); printf( "duplicate hashes: %d\n", duplicates() ); } void solve( const char *data ) { solve( data, (unsigned int )strlen( data ) ); } unsigned int duplicates() const { return ( mDuplicates > 0 ) ? ( mDuplicates - 1 ) : 0; } Charset &charset() { return( mCharset ); } private: bool init( const char *data, unsigned int key_len ); void solve_round1( unsigned int initial_hash ); void solve_round2( unsigned int hash ); private: Charset mCharset; unsigned int mKeyLen; unsigned char mWorkingKey[ 1024 ], *mCurrentKeyPos, *mKeyMax; unsigned int mRoundIndex, mRound2_Step1[ 128 ], mRound2_Step2[ 128 ]; unsigned int mDuplicates; }; bool Hsieh_Attack::init( const char *data, unsigned int key_len ) { mKeyLen = ( key_len ); if ( mKeyLen % 8 != 0 ) { printf( "source string length needs to be a multiple of 8!\n" ); return ( false ); } // initial hash value is length of string unsigned int hash = ( SEED + mKeyLen ), tmp; // set up the round constants for ( unsigned int i = 0; i < ( mKeyLen >> 3 ); ++i ) { hash += get16bits( data ); tmp = ( ( get16bits( data + 2 ) << 11 ) ^ hash ); hash = ( ( hash << 16 ) ^ tmp ); hash += ( hash >> 11 ); data += 4; hash += get16bits( data ); tmp = ( ( get16bits( data + 2 ) << 11 ) ^ hash ); mRound2_Step1[ i ] = ( hash & 0x0000ffff); mRound2_Step2[ i ] = ( tmp ); hash = ( ( hash << 16 ) ^ tmp ); hash += ( hash >> 11 ); data += 4; } return ( true ); } void Hsieh_Attack::solve_round1( unsigned int initial_hash ) { // the round2_step1 values we care about need to be masked off unsigned char r21_masked[ 2 ] = { ( mRound2_Step1[ mRoundIndex ] & 0x00ff ), ( mRound2_Step1[ mRoundIndex ] & 0xff00 ) >> 8, }; const unsigned int count = ( mCharset.count() ); for ( unsigned int i = 0; i < count; ++i ) { for ( unsigned int j = 0; j < count; ++j ) { for ( unsigned int k = 0; k < count; ++k ) { for ( unsigned int l = 0; l < count; ++l ) { mCurrentKeyPos[ 0 ] = ( mCharset[ i ] ); mCurrentKeyPos[ 1 ] = ( mCharset[ j ] ); mCurrentKeyPos[ 2 ] = ( mCharset[ k ] ); mCurrentKeyPos[ 3 ] = ( mCharset[ l ] ); // walk through the first round unsigned int hash = ( initial_hash ), tmp; hash += get16bits( mCurrentKeyPos ); tmp = ( ( get16bits( mCurrentKeyPos + 2 ) << 11 ) ^ hash ); hash = ( ( hash << 16 ) ^ tmp ); hash += ( hash >> 11 ); // find the two characters that satisfy the next step unsigned char h[ 2 ] = { ( hash & 0x00ff ), ( hash & 0xff00 ) >> 8 }; unsigned char low = ( r21_masked[ 0 ] - h[ 0 ] ); unsigned char high = ( r21_masked[ 1 ] - ( h[ 1 ] + ( ( (unsigned int )h[ 0 ] + (unsigned int )low ) >> 8 ) ) ); if ( mCharset.allowed( low ) && mCharset.allowed( high ) ) { mCurrentKeyPos[ 4 ] = ( low ); mCurrentKeyPos[ 5 ] = ( high ); solve_round2( hash ); } } } } } } void Hsieh_Attack::solve_round2( unsigned int hash ) { hash += get16bits( mCurrentKeyPos + 4 ); // reverse (2:2) unsigned int tmp = ( mRound2_Step2[ mRoundIndex ] ^ hash ) >> 11; // tmp can only affect the lower 16 bits, if something is set up top we can't flip it if ( ( tmp & 0xffff0000 ) != 0 ) return; // deduce final two characters unsigned char low = ( tmp & 0xff ), high = ( tmp >> 8 ); if ( !mCharset.allowed( low ) || !mCharset.allowed( high ) ) return; mCurrentKeyPos[ 6 ] = ( low ); mCurrentKeyPos[ 7 ] = ( high ); // (2:2) tmp = ( ( get16bits( mCurrentKeyPos + 6 ) << 11 ) ^ hash ); hash = ( ( hash << 16 ) ^ tmp ); hash += ( hash >> 11 ); // make sure it matches if ( tmp != mRound2_Step2[ mRoundIndex ] ) return; // generated enough characters? if ( ( mCurrentKeyPos + 8 ) == mKeyMax ) { mDuplicates++; printf( "%s: %08x\n", mWorkingKey, SuperFastHash( (char *)mWorkingKey, mKeyLen ) ); } else { // next group of 8 mRoundIndex += 1; mCurrentKeyPos += 8; solve_round1( hash ); mCurrentKeyPos -= 8; mRoundIndex -= 1; } } int main( int argc, char *argv[] ) { Hsieh_Attack attack; attack.charset() .init() .add_numeric() .add_lowercase() .add_allowed_charset( " $*:" ); attack.solve( "********" ); printf( "\nmy favorite collisions:\n" ); attack.charset() .init() .add_allowed_charset( "0123456 =/<>" ); attack.solve( "10/5 = 2" ); return ( 0 ); }