from IPython.html.widgets import * from IPython.display import display from collections import Counter from itertools import combinations, chain from hashlib import sha384 from time import time from sys import stdout text = TextWidget() display(text) s0 = """/*X It doesn't matter you use vim, emacs, or echo.X Real programmers use one-way hash functions to compile their source code.X This file can be compiled to executable binary by gcc or SHA384.X To get this file, simplyX $ wget https://raw.githubusercontent.com/tjwei/tjw_ipynb/master/a.cX To compile this file, eitherX $ sha384sum a.cX|Xxxd -r -pX>Xa.outX&&Xchmod a+x a.outX orX $ gcc a.cX Then tryX $ ./a.outX to execute the binary.X */X int main(int argc,Xchar **argv)X {X Xreturn 0;X }X X""" L = Counter(s0)['X'] print "L=", L all_comb = chain( *[combinations(range(L), r) for r in range(L+1)] ) iters = 0 for iters, idx in enumerate(all_comb): x=[0]*L for i in idx: x[i]=1 s=s0.replace('X', '%s')%tuple(' '*x[i] for i in range(L)) d = sha384(s).digest() if iters%10000 ==0: text.value="iters={} progress={:5.2f}%".format(iters,iters*100.0/2**L) if d.startswith('\n#') and d.rindex('\n')==0: print x print s.replace(' ', '.') with open('a.c', 'w') as f: f.write(s) break print "done", sum(x) from IPython.parallel import Client rc = Client() view = rc[:] from sys import stdout from collections import Counter from itertools import combinations, chain from more_itertools import chunked from IPython.display import display from IPython.html.widgets import * from time import time %%px from hashlib import sha384, md5, sha1 def check_hash(start): for x in range(start, start+step): s=s0%tuple(' '*((x>>i)&1) for i in range(L)) d = sha384(s).digest() if d.startswith(src) and d.rindex('\n')==first_n: with open('0.c', 'w') as f: f.write(s) return s s0="""#include X/*X echo "Hello world!"X<< } &&XexitX X It doesn't matter you use vim, emacs, or echo.X Real programmers use one-way functions to compile programs.X X $ sh 0.cX $ gcc 0.cX&&X./a.outX $ sha384sum 0.cX|Xxxd -r -pX>Xa.outX&&Xchmod a+x a.outX $ ./a.outX X sha384 was chosen because we can fit an ELF in the digest!(48>45)X X*/X main()X{X Xputs("Hello world!");X } X""" step = 1000 src = '. *\n' L = Counter(s0)['X'] s0 = s0.replace('X', '%s') first_n = src.index('\n') view.push(dict(src=src, step=step, s0=s0, L=L, first_n=first_n)) t0 = time() expected_number = 256**len(src) print "L=", L stdout.flush() text = TextWidget() display(text) for chunk in chunked(xrange(0, 2**L, step), 500): result = view.map_sync(lambda i:check_hash(i), chunk) if any(result): break i = chunk[0] elapsed = (time()-t0)/60 percent = i*100.0/expected_number exp = 0 if i==0 else 100.0*elapsed/percent/60.0/24.0 text.value = "{} {:5.2f}% {:.1f}min EXP={:.2f}days".format(i, percent, elapsed, exp) print "done" %%px from hashlib import sha384, md5, sha1 table = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_." def check_hash(start): for x in range(start, start+step): s=s0%tuple( table[(x>>(6*i))&63] for i in range(L)) d = sha384(s).digest() if d.startswith(src) and d.rindex('\n')==first_n: with open('0.c', 'w') as f: f.write(s) return s s0 = """#define echo main(){puts( #define exit );} echo "Hello world!" exit """ step = 1000 src = 'sh *\n#' L = Counter(s0)['X'] s0 = s0.replace('X', '%s') first_n = src.index('\n') view.push(dict(src=src, step=step, s0=s0, L=L, first_n=first_n)) t0 = time() expected_number = 256**len(src) print "L=", L stdout.flush() text = TextWidget() display(text) for chunk in chunked(xrange(0, 2**(6*L), step), 500): result = view.map_sync(lambda i:check_hash(i), chunk) if any(result): break i = chunk[0] elapsed = (time()-t0)/60 percent = i*100.0/expected_number exp = 0 if i==0 else 100.0*elapsed/percent/60.0/24.0 text.value = "{} {:5.2f}% {:.1f}min EXP={:.2f}days".format(i, percent, elapsed, exp) print "done" # OPENCL VERSION STARTS HERE from collections import Counter s0="""#include X/*X echo "Hello world!"X<< '}'X&&XexitX X It doesn't matter you use vim, emacs, or echo.X Real programmers use one-way functions to compile programs.X X Usage:X $ mkdir testX $ cd testX $ wget https://raw.githubusercontent.com/tjwei/tjw_ipynb/master/0.cX $ sh 0.cX Hello World!X $ gcc 0.cX&&X./a.outX Hello World!X $ shasum -a 384 0.cX|Xxxd -r -pX>Xa.outX&&Xchmod a+x a.outX $ ./a.outX Hello World!X X sha384 was chosenX because we can fit an ELF in the digest!(48>45)X X*/X main()X{X Xputs("Hello world!");X } X""" len(s0), Counter(s0)['X'] from more_itertools import chunked sp = s0.split('X')[:-1] prefix = '. *\n' s0_ord = map(ord, s0.replace('X', ' ')) with open("opencl_realman.h" ,"w") as f: print >>f, "__constant uint8_t SRC[%d]={"%len(s0)+ ",\n".join(str(x)[1:-1] for x in chunked(s0_ord, 20))+"};" pos = 0 print >>f, "#define UPDATE \\" xx = 1 for l in sp: L=len(l)+1 print >>f, "ctx_update(ctx, SRC+%d, (x&0x%xUL) ? %d : %d);\\"%(pos, xx, L, L-1) pos=pos+L xx<<=1 print >>f prefix_hex=prefix_mask=0 for i, c in enumerate(prefix): offset = 56-8*i prefix_hex |= ord(c)<>f, "#define CONDITION (ctx->H[0]&0x%xUL) == 0x%xUL"%(prefix_mask, prefix_hex) with open("opencl_realman.h" ,"r") as f: print f.read() from __future__ import division import numpy as np import pyopencl as cl import pyopencl.array import pyopencl.tools %load_ext pyopencl.ipython_ext ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) device=ctx.devices[0] print device.max_mem_alloc_size print device.max_work_group_size %%cl_kernel -o "-I ." #define _OPENCL_COMPILER #define DEVICE_INFO 2|64|1024 #include "opencl_cryptsha512.h" #include "opencl_realman.h" __constant uint64_t k[] = { 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, 0xd192e819d6ef5218UL, 0xd69906245565a910UL, 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, 0x28db77f523047d84UL, 0x32caab7b40c72493UL, 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; inline void init_ctx384(__local sha512_ctx * ctx) { ctx->H[0] = 0xcbbb9d5dc1059ed8UL; ctx->H[1] = 0x629a292a367cd507UL; ctx->H[2] = 0x9159015a3070dd17UL; ctx->H[3] = 0x152fecd8f70e5939UL; ctx->H[4] = 0x67332667ffc00b31UL; ctx->H[5] = 0x8eb44a8768581511UL; ctx->H[6] = 0xdb0c2e0d64f98fa7UL; ctx->H[7] = 0x47b5481dbefa4fa4UL; ctx->total = 0; ctx->buflen = 0; } inline void sha512_block(__local sha512_ctx * ctx) { uint64_t a = ctx->H[0]; uint64_t b = ctx->H[1]; uint64_t c = ctx->H[2]; uint64_t d = ctx->H[3]; uint64_t e = ctx->H[4]; uint64_t f = ctx->H[5]; uint64_t g = ctx->H[6]; uint64_t h = ctx->H[7]; uint64_t t1, t2; uint64_t w[16]; #ifdef VECTOR_USAGE ulong16 w_vector; w_vector = vload16(0, ctx->buffer->mem_64); w_vector = SWAP64_V(w_vector); vstore16(w_vector, 0, w); #else #pragma unroll for (int i = 0; i < 16; i++) w[i] = SWAP64(ctx->buffer->mem_64[i]); #endif #pragma unroll for (int i = 0; i < 80; i++) { if (i > 15) { w[i & 15] = sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i - 16) & 15] + w[(i - 7) & 15]; } t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g); t2 = Maj(a, b, c) + Sigma0(a); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } /* Put checksum in context given as argument. */ ctx->H[0] += a; ctx->H[1] += b; ctx->H[2] += c; ctx->H[3] += d; ctx->H[4] += e; ctx->H[5] += f; ctx->H[6] += g; ctx->H[7] += h; } inline void insert_to_buffer(__local sha512_ctx * ctx, __constant uint8_t * string, const uint32_t len) { __local uint8_t * dest; dest = ctx->buffer->mem_08 + ctx->buflen; for (int i = 0; i < len; i++) PUTCHAR(dest, i, GETCHAR(string, i)); ctx->buflen += len; } inline void ctx_update(__local sha512_ctx * ctx, __constant uint8_t * string, uint32_t len) { ctx->total += len; uint32_t startpos = ctx->buflen; uint32_t offset = 0; while(1){ insert_to_buffer(ctx, string+offset, (len -offset <= 128 - startpos ? len-offset : 128 - startpos)); if(ctx->buflen<128) break; mem_fence(CLK_LOCAL_MEM_FENCE); sha512_block(ctx); offset = offset +128 - startpos; ctx->buflen = 0; startpos=0; } } inline void ctx_append_1(__local sha512_ctx * ctx) { uint32_t length = ctx->buflen; PUTCHAR(ctx->buffer->mem_08, length, 0x80); while (++length & 3) PUTCHAR(ctx->buffer->mem_08, length, 0); if (length & 7) { __local uint32_t * l = (__local uint32_t *) (ctx->buffer->mem_08 + length); *l = 0; length += 4; } __local uint64_t * l = (__local uint64_t *) (ctx->buffer->mem_08 + length); while (length < 128) { *l++ = 0; length += 8; } } inline void ctx_add_length(__local sha512_ctx * ctx) { ctx->buffer->mem_64[15] = SWAP64((uint64_t) (ctx->total * 8)); } inline void finish_ctx(__local sha512_ctx * ctx) { ctx_append_1(ctx); ctx_add_length(ctx); ctx->buflen = 0; } inline void clear_ctx_buffer(__local sha512_ctx * ctx) { #ifdef VECTOR_USAGE ulong16 w_vector = 0; vstore16(w_vector, 0, ctx->buffer->mem_64); #else #pragma unroll for (int i = 0; i < 16; i++) ctx->buffer->mem_64[i] = 0; #endif ctx->buflen = 0; } inline void sha384_digest(__local sha512_ctx * ctx, __global uint8_t *result) { if (ctx->buflen <= 111) { //data+0x80+datasize fits in one 1024bit block finish_ctx(ctx); } else { bool moved = true; if (ctx->buflen < 128) { //data and 0x80 fits in one block ctx_append_1(ctx); moved = false; } sha512_block(ctx); clear_ctx_buffer(ctx); if (moved) //append 1,the rest is already clean PUTCHAR(ctx->buffer->mem_08, 0, 0x80); ctx_add_length(ctx); } sha512_block(ctx); if( CONDITION ) *result=1; else *result=0; /* #pragma unroll for (int i = 0; i < 6; i++) result[i] = SWAP64(ctx->H[i]); */ } inline void sha384crypt(__local sha512_ctx * ctx, __global uint8_t *output, uint64_t x) { init_ctx384(ctx); UPDATE; sha384_digest(ctx, output); } __kernel void kernel_sha384(uint64_t base, __global uint8_t * out_buffer, __local sha512_ctx * ctx_memory) { size_t gid = get_global_id(0); size_t lid = get_local_id(0); uint64_t x = base + gid; sha384crypt(&ctx_memory[lid], &out_buffer[gid], x); } from time import sleep step = 10**7 t0 = time() expected_number = 256**4 L=len(sp) print "L=", L stdout.flush() text = TextWidget() display(text) for i in xrange(0,2**L, step): output=cl.array.zeros(queue, step, dtype=np.uint8) working = cl.LocalMemory(200*128) kernel_sha384(queue, (step,), (128,), np.uint64(i), output.data, working).wait() result = output.get() for j in np.where(result==1)[0]: x =i+j s=s0.replace('X', '%s')%tuple(' '*((x>>i)&1) for i in range(L)) print x,repr(sha384(s).digest()) with open("good/%d.c"%x, "w") as f: f.write(s) stdout.flush() elapsed = (time()-t0)/60 percent = i*100.0/expected_number exp = 0 if i==0 else 100.0*elapsed/percent/60.0 text.value = "{} {:5.2f}% {:.2f}min EXP={:.2f}hrs".format(i, percent, elapsed, exp) sleep(2) print "done"