# A pure Python replacement for libmagic. Supports most libmagic features, plus
# several additional features not provided by libmagic. Tailored specifically
# for quickly searching blocks of data for multiple embedded signatures.

__all__ = ['Magic']

import re
import struct
import datetime
import binwalk.core.common
import binwalk.core.compat
from binwalk.core.exceptions import ParserException


class SignatureResult(binwalk.core.module.Result):

    '''
    Container class for signature results.
    '''

    def __init__(self, **kwargs):
        # These are set by signature keyword tags.
        # Keyword tags can also set any other object attributes,
        # including those in binwalk.core.module.Result.
        self.jump = 0
        self.many = False
        self.adjust = 0
        self.strlen = 0
        self.string = False
        self.invalid = False
        self.once = False
        self.overlap = False
        self.end = False

        # These are set by code internally
        self.id = 0

        # Kwargs overrides the defaults set above
        super(self.__class__, self).__init__(**kwargs)

        self.valid = (not self.invalid)


class SignatureLine(object):

    '''
    Responsible for parsing signature lines from magic signature files.
    '''

    # Printed strings are truncated to this size
    MAX_STRING_SIZE = 128

    def __init__(self, line):
        '''
        Class constructor. Responsible for parsing a line from a signature file.

        @line - A line of text from the signature file.

        Returns None.
        '''
        self.tags = {}
        self.text = line
        self.regex = False

        # Split the line on any white space; for this to work, backslash-escaped
        # spaces ('\ ') are replaced with their escaped hex value ('\x20').
        #
        # [offset] [data type] [comparison value] [format string]
        # 0        belong      0x12345678         Foo file type,
        # >4       string      x                  file name: %s,
        parts = line.replace('\\ ', '\\x20').split(None, 3)

        # Sanity check on the split line
        if len(parts) not in [3, 4]:
            raise ParserException("Invalid signature line: '%s'" % line)

        # The indentation level is determined by the number of '>' characters at
        # the beginning of the signature line.
        self.level = parts[0].count('>')

        # Get rid of the indentation characters and try to convert the remaining
        # characters to an integer offset. This will fail if the offset is a complex
        # value (e.g., '(4.l+16)').
        self.offset = parts[0].replace('>', '')
        try:
            self.offset = int(self.offset, 0)
        except ValueError as e:
            pass

        # self.type is the specified data type ('belong', 'string', etc)
        self.type = parts[1]
        self.opvalue = None
        self.operator = None

        # Each data type can specify an additional operation to be performed on the
        # data being scanned before performing a comparison (e.g., 'belong&0xFF' will
        # AND the data with 0xFF before the comparison is performed).
        #
        # We support the following operators:
        for operator in ['**', '<<', '>>', '&', '|', '*', '+', '-', '/', '~', '^']:
            # Look for each operator in self.type
            if operator in self.type:
                # If found, split self.type into the type and operator value
                (self.type, self.opvalue) = self.type.split(operator, 1)

                # Keep a record of the specified operator
                self.operator = operator

                # Try to convert the operator value into an integer. This works for
                # simple operator values, but not for complex types (e.g.,
                # '(4.l+12)').
                try:
                    self.opvalue = int(self.opvalue, 0)
                except ValueError as e:
                    pass

                # Only one operator type is supported, so break as soon as one
                # is found
                break

        # If the specified type starts with 'u' (e.g., 'ubelong'), then it is
        # unsigned; else, it is signed
        if self.type[0] == 'u':
            self.signed = False
            self.type = self.type[1:]
        else:
            self.signed = True

        # Big endian values start with 'be' ('belong'), little endian values start with 'le' ('lelong').
        # The struct module uses '>' to denote big endian and '<' to denote
        # little endian.
        if self.type.startswith('be'):
            self.type = self.type[2:]
            self.endianness = '>'
        elif self.type.startswith('le'):
            self.endianness = '<'
            self.type = self.type[2:]
        # Assume big endian if no endianness was explicitly specified
        else:
            self.endianness = '>'

        # Check the comparison value for the type of comparison to be performed (e.g.,
        # '=0x1234', '>0x1234', etc). If no operator is specified, '=' is implied.
        if parts[2][0] in ['=', '!', '>', '<', '&', '|', '^', '~']:
            self.condition = parts[2][0]
            self.value = parts[2][1:]
        else:
            self.condition = '='
            self.value = parts[2]

        # If this is a wildcard value, explicitly set self.value to None
        if self.value == 'x':
            self.value = None
        # String values need to be decoded, as they may contain escape
        # characters (e.g., '\x20')
        elif self.type == 'string':
            # String types support multiplication to easily match large
            # repeating byte sequences
            if '*' in self.value:
                try:
                    p = self.value.split('*')
                    self.value = p[0]
                    for n in p[1:]:
                        self.value *= int(n, 0)
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    raise ParserException("Failed to expand string '%s' with integer '%s' in line '%s'" % (self.value, n, line))
            try:
                self.value = binwalk.core.compat.string_decode(self.value)
            except ValueError as e:
                raise ParserException("Failed to decode string value '%s' in line '%s'" % (self.value, line))
        # If a regex was specified, compile it
        elif self.type == 'regex':
            self.regex = True

            try:
                self.value = re.compile(self.value)
            except KeyboardInterrupt as e:
                raise e
            except Exception as e:
                raise ParserException("Invalid regular expression '%s': %s" % (self.value, str(e)))
        # Non-string types are integer values
        else:
            try:
                self.value = int(self.value, 0)
            except ValueError as e:
                raise ParserException("Failed to convert value '%s' to an integer on line '%s'" % (self.value, line))

        # Sanity check to make sure the first line of a signature has an
        # explicit value
        if self.level == 0 and self.value is None:
            raise ParserException("First element of a signature must specify a non-wildcard value: '%s'" % (line))

        # Set the size and struct format value for the specified data type.
        # This must be done, obviously, after the value has been parsed out
        # above.
        if self.type == 'string':
            # Strings don't have a struct format value, since they don't have
            # to be unpacked
            self.fmt = None

            # If a string type has a specific value, set the comparison size to
            # the length of that string
            if self.value:
                self.size = len(self.value)
            # Else, truncate the string to self.MAX_STRING_SIZE
            else:
                self.size = self.MAX_STRING_SIZE
        elif self.type == 'regex':
            # Regular expressions don't have a struct format value, since they
            # don't have to be unpacked
            self.fmt = None
            # The size of a matching regex is unknown until it is applied to
            # some data
            self.size = self.MAX_STRING_SIZE
        elif self.type == 'byte':
            self.fmt = 'b'
            self.size = 1
        elif self.type == 'short':
            self.fmt = 'h'
            self.size = 2
        elif self.type == 'quad':
            self.fmt = 'q'
            self.size = 8
        # Assume 4 byte length for all other supported data types
        elif self.type in ['long', 'date']:
            self.fmt = 'i'
            self.size = 4
        else:
            raise ParserException("Unknown data type '%s' in line '%s'" % (self.type, line))

        # The struct module uses the same characters for specifying signed and unsigned data types,
        # except that signed data types are upper case. The above if-else code sets self.fmt to the
        # lower case (unsigned) values.
        if not self.signed:
            self.fmt = self.fmt.upper()

        # If a struct format was identified, create a format string to be passed to struct.unpack
        # which specifies the endianness and data type format.
        if self.fmt:
            self.pkfmt = '%c%c' % (self.endianness, self.fmt)
        else:
            self.pkfmt = None

        # Check if a format string was specified (this is optional)
        if len(parts) == 4:
            # %lld formats are only supported if Python was built with HAVE_LONG_LONG
            self.format = parts[3].replace('%ll', '%l')

            # Regex to parse out tags, which are contained within curly braces
            retag = re.compile(r'\{.*?\}')

            # Parse out tag keywords from the format string
            for match in retag.finditer(self.format):
                # Get rid of the curly braces.
                tag = match.group().replace('{', '').replace('}', '')

                # If the tag specifies a value, it will be colon delimited
                # (e.g., '{name:%s}')
                if ':' in tag:
                    (n, v) = tag.split(':', 1)
                else:
                    n = tag
                    v = True

                # Create a new SignatureTag instance and append it to self.tags
                self.tags[n] = v

            # Remove all tags from the printable format string
            self.format = retag.sub('', self.format).strip()
        else:
            self.format = ""


class Signature(object):

    '''
    Class to hold signature data and generate signature regular expressions.
    '''

    def __init__(self, sid, first_line):
        '''
        Class constructor.

        @sid        - A ID value to uniquely identify this signature.
        @first_line - The first SignatureLine of the signature (subsequent
                      SignatureLines should be added via self.append).

        Returns None.
        '''
        self.id = sid
        self.lines = [first_line]
        self.title = first_line.format
        self.offset = first_line.offset
        self.regex = self._generate_regex(first_line)
        try:
            self.confidence = first_line.tags['confidence']
        except KeyError:
            self.confidence = first_line.size

    def _generate_regex(self, line):
        '''
        Generates a regular expression from the magic bytes of a signature.
        The regex is used by Magic._analyze.

        @line - The first SignatureLine object of the signature.

        Returns a compile regular expression.
        '''
        restr = ""

        # Strings and single byte signatures are taken at face value;
        # multi-byte integer values are turned into regex strings based
        # on their data type size and endianness.
        if line.type == 'regex':
            # Regex types are already compiled expressions.
            # Note that since re.finditer is used, unless the specified
            # regex accounts for it, overlapping signatures will be ignored.
            return line.value
        if line.type == 'string':
            restr = line.value
        elif line.size == 1:
            restr = chr(line.value)
        elif line.size == 2:
            if line.endianness == '<':
                restr = chr(line.value & 0xFF) + chr(line.value >> 8)
            elif line.endianness == '>':
                restr = chr(line.value >> 8) + chr(line.value & 0xFF)
        elif line.size == 4:
            if line.endianness == '<':
                restr = (chr(line.value & 0xFF) +
                         chr((line.value >> 8) & 0xFF) +
                         chr((line.value >> 16) & 0xFF) +
                         chr(line.value >> 24))
            elif line.endianness == '>':
                restr = (chr(line.value >> 24) +
                         chr((line.value >> 16) & 0xFF) +
                         chr((line.value >> 8) & 0xFF) +
                         chr(line.value & 0xFF))
        elif line.size == 8:
            if line.endianness == '<':
                restr = (chr(line.value & 0xFF) +
                         chr((line.value >> 8) & 0xFF) +
                         chr((line.value >> 16) & 0xFF) +
                         chr((line.value >> 24) & 0xFF) +
                         chr((line.value >> 32) & 0xFF) +
                         chr((line.value >> 40) & 0xFF) +
                         chr((line.value >> 48) & 0xFF) +
                         chr(line.value >> 56))
            elif line.endianness == '>':
                restr = (chr(line.value >> 56) +
                         chr((line.value >> 48) & 0xFF) +
                         chr((line.value >> 40) & 0xFF) +
                         chr((line.value >> 32) & 0xFF) +
                         chr((line.value >> 24) & 0xFF) +
                         chr((line.value >> 16) & 0xFF) +
                         chr((line.value >> 8) & 0xFF) +
                         chr(line.value & 0xFF))

        # Since re.finditer is used on a per-signature basis, signatures should be crafted carefully
        # to ensure that they aren't potentially self-overlapping (e.g., a signature of "ABCDAB" could
        # be confused by the byte sequence "ABCDABCDAB"). The longer the signature, the less likely an
        # unintentional overlap is, although files could still be maliciously crafted to cause false
        # negative results.
        #
        # Thus, unless a signature has been explicitly marked as knowingly overlapping ('{overlap}'),
        # spit out a warning about any self-overlapping signatures.
        if not binwalk.core.compat.has_key(line.tags, 'overlap'):
            for i in range(1, line.size):
                if restr[i:] == restr[0:(line.size - i)]:
                    binwalk.core.common.warning("Signature '%s' is a self-overlapping signature!" % line.text)
                    break

        return re.compile(re.escape(restr))

    def append(self, line):
        '''
        Add a new SignatureLine object to the signature.

        @line - A new SignatureLine instance.

        Returns None.
        '''
        # This method is kind of useless, but may be a nice wrapper for future
        # code.
        self.lines.append(line)


class Magic(object):

    '''
    Primary class for loading signature files and scanning
    blocks of arbitrary data for matching signatures.
    '''

    def __init__(self, exclude=[], include=[], invalid=False):
        '''
        Class constructor.

        @include - A list of regex strings describing which signatures should be included in the scan results.
        @exclude - A list of regex strings describing which signatures should not be included in the scan results.
        @invalid - If set to True, invalid results will not be ignored.

        Returns None.
        '''
        # Used to save the block of data passed to self.scan (see additional
        # comments in self.scan)
        self.data = ""
        # A list of Signature class objects, populated by self.parse (see also:
        # self.load)
        self.signatures = []
        # A set of signatures with the 'once' keyword that have already been
        # displayed once
        self.display_once = set()
        self.dirty = True

        self.show_invalid = invalid
        self.includes = [re.compile(x) for x in include]
        self.excludes = [re.compile(x) for x in exclude]

        # Regex rule to replace backspace characters (an the preceeding character)
        # in formatted signature strings (see self._analyze).
        self.bspace = re.compile(".\\\\b")
        # Regex rule to match printable ASCII characters in formatted signature
        # strings (see self._analyze).
        self.printable = re.compile("[ -~]*")
        # Regex rule to find format strings
        self.fmtstr = re.compile("%[^%]")
        # Regex rule to find periods (see self._do_math)
        self.period = re.compile("\.")

    def reset(self):
        self.display_once = set()

    def _filtered(self, text):
        '''
        Tests if a string should be filtered out or not.

        @text - The string to check against filter rules.

        Returns True if the string should be filtered out, i.e., not displayed.
        Returns False if the string should be displayed.
        '''
        filtered = None
        # Text is converted to lower case first, partially for historical
        # purposes, but also because it simplifies writing filter rules
        # (e.g., don't have to worry about case sensitivity).
        text = text.lower()

        for include in self.includes:
            if include.search(text):
                filtered = False
                break

        # If exclusive include filters have been specified and did
        # not match the text, then the text should be filtered out.
        if self.includes and filtered == None:
            return True

        for exclude in self.excludes:
            if exclude.search(text):
                filtered = True
                break

        # If no explicit exclude filters were matched, then the
        # text should *not* be filtered.
        if filtered == None:
            filtered = False

        return filtered

    def _do_math(self, offset, expression):
        '''
        Parses and evaluates complex expressions, e.g., "(4.l+12)", "(6*32)", etc.

        @offset      - The offset inside self.data that the current signature starts at.
        @expressions - The expression to evaluate.

        Returns an integer value that is the result of the evaluated expression.
        '''
        # Does the expression contain an offset (e.g., "(4.l+12)")?
        if '.' in expression and '(' in expression:
            replacements = {}

            for period in [match.start() for match in self.period.finditer(expression)]:
                # Separate the offset field into the integer offset and type
                # values (o and t respsectively)
                s = expression[:period].rfind('(') + 1
                # The offset address may be an evaluatable expression, such as '(4+0.L)', typically the result
                # of the original offset being something like '(&0.L)'.
                o = binwalk.core.common.MathExpression(expression[s:period]).value
                t = expression[period + 1]

                # Re-build just the parsed offset portion of the expression
                text = "%s.%c" % (expression[s:period], t)

                # Have we already evaluated this offset expression? If so, skip
                # it.
                if binwalk.core.common.has_key(replacements, text):
                    continue

                # The offset specified in the expression is relative to the
                # starting offset inside self.data
                o += offset

                # Read the value from self.data at the specified offset
                try:
                    # Big and little endian byte format
                    if t in ['b', 'B']:
                        v = struct.unpack('b', binwalk.core.compat.str2bytes(self.data[o:o + 1]))[0]
                    # Little endian short format
                    elif t == 's':
                        v = struct.unpack('<h', binwalk.core.compat.str2bytes(self.data[o:o + 2]))[0]
                    # Little endian long format
                    elif t == 'l':
                        v = struct.unpack('<i', binwalk.core.compat.str2bytes(self.data[o:o + 4]))[0]
                    # Big endian short format
                    elif t == 'S':
                        v = struct.unpack('>h', binwalk.core.compat.str2bytes(self.data[o:o + 2]))[0]
                    # Bit endian long format
                    elif t == 'L':
                        v = struct.unpack('>i', binwalk.core.compat.str2bytes(self.data[o:o + 4]))[0]
                # struct.error is thrown if there is not enough bytes in
                # self.data for the specified format type
                except struct.error as e:
                    v = 0

                # Keep track of all the recovered values from self.data
                replacements[text] = v

            # Finally, replace all offset expressions with their corresponding
            # text value
            v = expression
            for (text, value) in binwalk.core.common.iterator(replacements):
                v = v.replace(text, "%d" % value)

        # If no offset, then it's just an evaluatable math expression (e.g.,
        # "(32+0x20)")
        else:
            v = expression

        # Evaluate the final expression
        value = binwalk.core.common.MathExpression(v).value

        return value

    def _analyze(self, signature, offset):
        '''
        Analyzes self.data for the specified signature data at the specified offset .

        @signature - The signature to apply to the data.
        @offset    - The offset in self.data to apply the signature to.

        Returns a dictionary of tags parsed from the data.
        '''
        description = []
        max_line_level = 0
        previous_line_end = 0
        tags = {'id': signature.id, 'offset':
                offset, 'invalid': False, 'once': False}

        # Apply each line of the signature to self.data, starting at the
        # specified offset
        for n in range(0, len(signature.lines)):
            line = signature.lines[n]

            # Ignore indentation levels above the current max indent level
            if line.level <= max_line_level:
                # If the relative offset of this signature line is just an
                # integer value, use it
                if isinstance(line.offset, int):
                    line_offset = line.offset
                # Else, evaluate the complex expression
                else:
                    # Format the previous_line_end value into a string. Add the '+' sign to explicitly
                    # state that this value is to be added to any subsequent values in the expression
                    # (e.g., '&0' becomes '4+0').
                    ple = '%d+' % previous_line_end
                    # Allow users to use either the '&0' (libmagic) or '&+0' (explcit addition) sytaxes;
                    # replace both with the ple text.
                    line_offset_text = line.offset.replace('&+', ple).replace('&', ple)
                    # Evaluate the expression
                    line_offset = self._do_math(offset, line_offset_text)

                # Sanity check
                if not isinstance(line_offset, int):
                    raise ParserException("Failed to convert offset '%s' to a number: '%s'" % (line.offset, line.text))

                # The start of the data needed by this line is at offset + line_offset.
                # The end of the data will be line.size bytes later.
                start = offset + line_offset
                end = start + line.size

                # If the line has a packed format string, unpack it
                if line.pkfmt:
                    try:
                        dvalue = struct.unpack(line.pkfmt, binwalk.core.compat.str2bytes(self.data[start:end]))[0]
                    # Not enough bytes left in self.data for the specified
                    # format size
                    except struct.error as e:
                        dvalue = 0
                # Else, this is a string
                else:
                    # Wildcard strings have line.value == None
                    if line.value is None:
                        # Check to see if this is a string whose size is known and has been specified on a previous
                        # signature line.
                        if binwalk.core.compat.has_key(tags, 'strlen') and binwalk.core.compat.has_key(line.tags, 'string'):
                            dvalue = self.data[start:(start + tags['strlen'])]
                        # Else, just terminate the string at the first newline,
                        # carriage return, or NULL byte
                        else:
                            dvalue = self.data[start:end].split('\x00')[0].split('\r')[0].split('\n')[0]
                    # Non-wildcard strings have a known length, specified in
                    # the signature line
                    else:
                        dvalue = self.data[start:end]

                # Some integer values have special operations that need to be performed on them
                # before comparison (e.g., "belong&0x0000FFFF"). Complex math expressions are
                # supported here as well.
                # if isinstance(dvalue, int) and line.operator:
                if line.operator:
                    try:
                        # If the operator value of this signature line is just
                        # an integer value, use it
                        if isinstance(line.opvalue, int):
                            opval = line.opvalue
                        # Else, evaluate the complex expression
                        else:
                            opval = self._do_math(offset, line.opvalue)

                        # Perform the specified operation
                        if line.operator == '**':
                            dvalue **= opval
                        elif line.operator == '<<':
                            dvalue <<= opval
                        elif line.operator == '>>':
                            dvalue >>= opval
                        elif line.operator == '&':
                            dvalue &= opval
                        elif line.operator == '|':
                            dvalue |= opval
                        elif line.operator == '*':
                            dvalue *= opval
                        elif line.operator == '+':
                            dvalue += opval
                        elif line.operator == '-':
                            dvalue -= opval
                        elif line.operator == '/':
                            dvalue /= opval
                        elif line.operator == '~':
                            dvalue = ~opval
                        elif line.operator == '^':
                            dvalue ^= opval
                    except KeyboardInterrupt as e:
                        raise e
                    except Exception as e:
                        raise ParserException("Operation '" +
                                              str(dvalue) +
                                              " " +
                                              str(line.operator) +
                                              "= " +
                                              str(line.opvalue) +
                                              "' failed: " + str(e))

                # Does the data (dvalue) match the specified comparison?
                if ((line.value is None) or
                    (line.regex and line.value.match(dvalue)) or
                    (line.condition == '=' and dvalue == line.value) or
                    (line.condition == '>' and dvalue > line.value) or
                    (line.condition == '<' and dvalue < line.value) or
                    (line.condition == '!' and dvalue != line.value) or
                    (line.condition == '~' and (dvalue == ~line.value)) or
                    (line.condition == '^' and (dvalue ^ line.value)) or
                    (line.condition == '&' and (dvalue & line.value)) or
                        (line.condition == '|' and (dvalue | line.value))):

                    # Up until this point, date fields are treated as integer values,
                    # but we want to display them as nicely formatted strings.
                    if line.type == 'date':
                        try:
                            ts = datetime.datetime.utcfromtimestamp(dvalue)
                            dvalue = ts.strftime("%Y-%m-%d %H:%M:%S")
                        except KeyboardInterrupt as e:
                            raise e
                        except Exception:
                            dvalue = "invalid timestamp"

                    # Generate the tuple for the format string
                    dvalue_tuple = ()
                    for x in self.fmtstr.finditer(line.format):
                        dvalue_tuple += (dvalue,)

                    # Format the description string
                    desc = line.format % dvalue_tuple

                    # If there was any description string, append it to the
                    # list of description string parts
                    if desc:
                        description.append(desc)

                    # Process tag keywords specified in the signature line. These have already been parsed out of the
                    # original format string so that they can be processed
                    # separately from the printed description string.
                    for (tag_name, tag_value) in binwalk.core.compat.iterator(line.tags):
                        # If the tag value is a string, try to format it
                        if isinstance(tag_value, str):
                            # Generate the tuple for the format string
                            dvalue_tuple = ()
                            for x in self.fmtstr.finditer(tag_value):
                                dvalue_tuple += (dvalue,)

                            # Format the tag string
                            tags[tag_name] = tag_value % dvalue_tuple
                        # Else, just use the raw tag value
                        else:
                            tags[tag_name] = tag_value

                        # Some tag values are intended to be integer values, so
                        # try to convert them as such
                        try:
                            tags[tag_name] = int(tags[tag_name], 0)
                        except KeyboardInterrupt as e:
                            raise e
                        except Exception as e:
                            pass

                    # Abort processing soon as this signature is marked invalid, unless invalid results
                    # were explicitly requested. This means that the sooner invalid checks are made in a
                    # given signature, the faster the scan can filter out false
                    # positives.
                    if not self.show_invalid and tags['invalid']:
                        break

                    # Look ahead to the next line in the signature; if its indent level is greater than
                    # that of the current line, then track the end of data for the current line. This is
                    # so that subsequent lines can use the '>>&0' offset syntax to specify relative offsets
                    # from previous lines.
                    try:
                        next_line = signature.lines[n + 1]
                        if next_line.level > line.level:
                            if line.type == 'string':
                                previous_line_end = line_offset + len(dvalue)
                            else:
                                previous_line_end = line_offset + line.size
                    except IndexError as e:
                        pass

                    # If this line satisfied its comparison, +1 the max
                    # indentation level
                    max_line_level = line.level + 1
                else:
                    # No match on the first line, abort
                    if line.level == 0:
                        break
                    else:
                        # If this line did not satisfy its comparison, then higher
                        # indentation levels will not be accepted.
                        max_line_level = line.level

        # Join the formatted description strings and remove backspace
        # characters (plus the preceeding character as well)
        tags['description'] = self.bspace.sub('', " ".join(description))

        # This should never happen
        if not tags['description']:
            tags['display'] = False
            tags['invalid'] = True

        # If the formatted string contains non-printable characters, consider
        # it invalid
        if self.printable.match(tags['description']).group() != tags['description']:
            tags['invalid'] = True

        return tags

    def match(self, data):
        '''
        Match the beginning of a data buffer to a signature.

        @data - The data buffer to match against the loaded signature list.

        Returns a list of SignatureResult objects.
        '''
        return self.scan(data, 1)

    def scan(self, data, dlen=None):
        '''
        Scan a data block for matching signatures.

        @data - A string of data to scan.
        @dlen - If specified, signatures at offsets larger than dlen will be ignored.

        Returns a list of SignatureResult objects.
        '''
        results = []
        matched_offsets = set()

        # Since data can potentially be quite a large string, make it available to other
        # methods via a class attribute so that it doesn't need to be passed around to
        # different methods over and over again.
        self.data = data

        # If dlen wasn't specified, search all of self.data
        if dlen is None:
            dlen = len(data)

        sc = 0
        for signature in self.signatures:
            # Use regex to search the data block for potential signature
            # matches (fast)
            sc += 1
            for match in signature.regex.finditer(data):
                # Take the offset of the start of the signature into account
                offset = match.start() - signature.offset

                # Signatures are ordered based on the length of their magic bytes (largest first).
                # If this offset has already been matched to a previous signature, ignore it unless
                # self.show_invalid has been specified. Also ignore obviously invalid offsets (<0)
                # as well as those outside the specified self.data range (dlen).
                if (offset not in matched_offsets or self.show_invalid) and offset >= 0 and offset < dlen:
                # if offset >= 0 and offset < dlen:
                    # Analyze the data at this offset using the current
                    # signature rule
                    tags = self._analyze(signature, offset)

                    # Generate a SignatureResult object and append it to the results list if the
                    # signature is valid, or if invalid results were requested.
                    if (not tags['invalid'] or self.show_invalid) and not self._filtered(tags['description']):
                        # Only display results with the 'once' tag once.
                        if tags['once']:
                            if signature.title in self.display_once:
                                continue
                            else:
                                self.display_once.add(signature.title)
                        
                        # Append the result to the results list
                        results.append(SignatureResult(**tags))

                        # Add this offset to the matched_offsets set, so that it can be ignored by
                        # subsequent loops.
                        matched_offsets.add(offset)

        # Sort results by offset
        results.sort(key=lambda x: x.offset, reverse=False)

        return results

    def load(self, fname):
        '''
        Load signatures from a file.

        @fname - Path to signature file.

        Returns None.
        '''
        # Magic files must be ASCII, else encoding issues can arise.
        fp = open(fname, "r")
        lines = fp.readlines()
        self.parse(lines)
        fp.close()

    def parse(self, lines):
        '''
        Parse signature file lines.

        @lines - A list of lines from a signature file.

        Returns None.
        '''
        signature = None

        for line in lines:
            # Split at the first comment delimiter (if any) and strip the
            # result
            line = line.split('#')[0].strip()
            # Ignore blank lines and lines that are nothing but comments.
            # We also don't support the '!mime' style line entries.
            if line and line[0] != '!':
                # Parse this signature line
                sigline = SignatureLine(line)
                # Level 0 means the first line of a signature entry
                if sigline.level == 0:
                    # If there is an existing signature, append it to the signature list,
                    # unless the text in its title field has been filtered by user-defined
                    # filter rules.
                    if signature and not self._filtered(signature.title):
                        self.signatures.append(signature)

                    # Create a new signature object; use the size of self.signatures to
                    # assign each signature a unique ID.
                    signature = Signature(len(self.signatures), sigline)
                # Else, just append this line to the existing signature
                elif signature:
                    # signature.append(sigline)
                    signature.lines.append(sigline)
                # If this is not the first line of a signature entry and there is no other
                # existing signature entry, something is very wrong with the
                # signature file.
                else:
                    raise ParserException("Invalid signature line: '%s'" % line)

        # Add the final signature to the signature list
        if signature:
            if not self._filtered(signature.lines[0].format):
                self.signatures.append(signature)

        # Sort signatures by confidence (aka, length of their magic bytes),
        # largest first
        self.signatures.sort(key=lambda x: x.confidence, reverse=True)