finding entropy in binary files
Update: added routine to print out hex data for blocks where entropy passes a given threshold. Tidied up code.
Ero Carrera responded yesterday to a request on OpenRCE concerning using entropy analysis to find RSA keys and other random blocks of data in binaries. Here in is a full wrapper for the code he gives. We use matplotlib instead of Mathematica to generate the graph. Also if you plan to scan files larger than 100k I’d highly recommend downloading the modified progressBar class included here.
example output:
Target data:
data = ''.join (
[chr (random.randint (0, 64)) for x in xrange (1024)] +
[chr (random.randint (0, 255)) for x in xrange (1024)] +
[chr (random.randint (0, 64)) for x in xrange (1024)] )
[==================================100%=====================================]
949 7.00: 1a060113050c2d0d 17302e091d2d0117 →♠☺‼♣♀- ↨0. ↔-☺↨
entropy_graph.py
""" Entropy scan H() and entropy_scan() originally by Ero Carrera (blog.dkbza.org) Modified May 2007 by cyphunk (deadhacker.com) USAGE: cmd [target_path] """ # FLAGS: SHOWPROGRESS = 1 # Show console progress bar? PRINTONTHRESHOLD = 7 # When block is > than threshold # print first 16 bytes in both # hex and ascii. Set to 0 to turn # off. ONLYFIRSTBLOCK = 1 # Set to 1 it will only print the first # block that goes over threshold and not # blocks > threshold that are only offset # by 1. By setting to zero block windows # that match will be printed. BLOCKSIZE = 256 # size of blocks scanned. import math import random from pylab import * import tkFileDialog from Tkinter import * from progressBar import * from binascii import hexlify def H(data): if not data: return 0 entropy = 0 for x in range(256): p_x = float(data.count(chr(x)))/len(data) if p_x > 0: entropy += - p_x*math.log(p_x, 2) return entropy def entropy_scan (data, block_size) : if SHOWPROGRESS: progress = progressBar(0, len(data) - block_size, 77) # creates blocks of block_size for all possible offsets ('x'): blocks = (data[x : block_size + x] for x in range (len (data) - block_size)) i = 0 for block in (blocks) : i += 1 if SHOWPROGRESS: progress(i) yield H (block) # get target file as argument var or from dialog: filename = "" if sys.argv[1:]: filename = sys.argv[1] else: root = Tk() root.withdraw() filename = tkFileDialog.askopenfilename(title="Target binary", filetypes=[("All files", "*")]) # run, print graph: if filename: # Open and scan for entropy: data = open(filename, 'rb') raw = data.read() results = list( entropy_scan(raw,BLOCKSIZE) ) # Print blocks that are above a defined threshold of entropy: if PRINTONTHRESHOLD > 0: print found = 0 for i in range(len(results)): if results[i] > PRINTONTHRESHOLD: if found == 0: table = string.maketrans("rnt", ' ') # don't like newlines blockstr = string.translate(str(raw[i : i+16]), table) print "%8d %.2f: %s %s %s" % (i, results[i], hexlify(raw[i : i+8]), hexlify(raw[i+8 : i+16]), blockstr) #%.3f - %016X / %s" % (i, results[i], raw[i : i + 16], raw[i : i + 16]) found = ONLYFIRSTBLOCK else: found = 0 # Plot plot(results) xlabel('block') ylabel('entropy') title('Entropy levels') grid(True) show()
progressBar.py (originally from active state but modified for our use)
import sys
class progressBar:
""" Creates a text-based progress bar. Call the object with the `print'
command to see the progress bar, which looks something like this:
[=======> 22% ]
You may specify the progress bar's width, min and max values on init.
"""
def __init__(self, minValue = 0, maxValue = 100, totalWidth=80):
self.progBar = "[]" # This holds the progress bar string
self.min = minValue
self.max = maxValue
self.span = maxValue - minValue
self.width = totalWidth
self.amount = 0 # When amount == max, we are 100% done
self.updateAmount(0) # Build progress bar string
self._old_pbar = "" # used to track change
self.pbar_str = ""
def updateAmount(self, newAmount = 0):
""" Update the progress bar with the new amount (with min and max
values set at initialization; if it is over or under, it takes the
min or max value as a default. """
if newAmount > self.max: newAmount = self.max
self.amount = newAmount
# Figure out the new percent done, round to an integer
diffFromMin = float(self.amount - self.min)
percentDone = (diffFromMin / float(self.span)) * 100.0
percentDone = int(round(percentDone))
# Figure out how many hash bars the percentage should be
allFull = self.width - 2
numHashes = (percentDone / 100.0) * allFull
numHashes = int(round(numHashes))
# Build a progress bar with an arrow of equal signs; special cases for
# empty and full
if numHashes == 0:
self.progBar = "[>%s]" % (' '*(allFull-1))
elif numHashes == allFull:
self.progBar = "[%s]" % ('='*allFull)
else:
self.progBar = "[%s>%s]" % ('='*(numHashes-1),
' '*(allFull-numHashes))
# figure out where to put the percentage, roughly centered
percentPlace = (len(self.progBar) / 2) - len(str(percentDone))
percentString = str(percentDone) + "%"
# slice the percentage into the bar
self.progBar = ''.join([self.progBar[0:percentPlace], percentString,
self.progBar[percentPlace+len(percentString):]
])
def __str__(self):
return str(self.progBar)
def __call__(self, value):
""" Updates the amount, and writes to stdout. Prints a carriage return
first, so it will overwrite the current line in stdout."""
self.updateAmount(value)
self.pbar_str = str(self)
if self.pbar_str != self._old_pbar:
self._old_pbar = self.pbar_str
sys.stdout.write(self.pbar_str + 'r')
sys.stdout.flush()
About this entry
You’re currently reading “finding entropy in binary files,” an entry on deadhacker.com
- Published:
- May 13, 2007 / 3:41 am
- Category:
- Reverse Engineering
- Tags:

7 Comments
Jump to comment form | comments rss [?] | trackback uri [?]