| #! /usr/bin/python |
| import argparse |
| import logging |
| import mmap |
| import os |
| import re |
| import sys |
| |
| exclude_regexs = [] |
| |
| noncomment_regex = re.compile( |
| "(^[ \t][^#]+.*\n)+" |
| , re.MULTILINE) |
| |
| def config_import_file(filename): |
| """ |
| Imports regular expresions from any file *.conf in the given path |
| |
| Each file follows the format:: |
| |
| # |
| # Comments for multiline regex 1... |
| # |
| multilineregex |
| multilineregex |
| multilineregex |
| # |
| # Comments for multiline regex 2... |
| # |
| multilineregex |
| multilineregex |
| multilineregex |
| |
| etc. |
| """ |
| try: |
| with open(filename, "rb") as f: |
| mm = mmap.mmap(f.fileno(), 0, access = mmap.ACCESS_READ) |
| # That regex basically selects any block of |
| # lines that is not a comment block. The |
| # finditer() finds all the blocks and selects |
| # the bits of mmapped-file that comprises |
| # each--we compile it into a regex and append. |
| for m in re.finditer("(^\s*[^#].*\n)+", mm, re.MULTILINE): |
| origin = "%s:%s-%s" % (filename, m.start(), m.end()) |
| try: |
| r = re.compile(mm[m.start():m.end()], re.MULTILINE) |
| except Exception as e: |
| logging.error("%s: bytes %d-%d: bad regex: %s", |
| filename, m.start(), m.end(), e) |
| raise |
| logging.debug("%s: found regex at bytes %d-%d: %s", |
| filename, m.start(), m.end(), |
| mm[m.start():m.end()]) |
| exclude_regexs.append((r, origin)) |
| logging.debug("%s: loaded", filename) |
| except Exception as e: |
| raise Exception("E: %s: can't load config file: %s" % (filename, e)) |
| |
| def config_import_path(path): |
| """ |
| Imports regular expresions from any file *.conf in the given path |
| """ |
| file_regex = re.compile(".*\.conf$") |
| try: |
| for dirpath, dirnames, filenames in os.walk(path): |
| for _filename in sorted(filenames): |
| filename = os.path.join(dirpath, _filename) |
| if not file_regex.search(_filename): |
| logging.debug("%s: ignored", filename) |
| continue |
| config_import_file(filename) |
| except Exception as e: |
| raise Exception("E: %s: can't load config files: %s" % (path, e)) |
| |
| def config_import(paths): |
| """ |
| Imports regular expresions from any file *.conf in the list of paths. |
| |
| If a path is "" or None, the list of paths until then is flushed |
| and only the new ones are considered. |
| """ |
| _paths = [] |
| # Go over the list, flush it if the user gave an empty path ("") |
| for path in paths: |
| if path == "" or path == None: |
| logging.debug("flushing current config list: %s", _paths) |
| _paths = [] |
| else: |
| _paths.append(path) |
| logging.debug("config list: %s", _paths) |
| for path in _paths: |
| config_import_path(path) |
| |
| arg_parser = argparse.ArgumentParser() |
| arg_parser.add_argument("-v", "--verbosity", action = "count", default = 0, |
| help = "increase verbosity") |
| arg_parser.add_argument("-q", "--quiet", action = "count", default = 0, |
| help = "decrease verbosity") |
| arg_parser.add_argument("-c", "--config-dir", action = "append", nargs = "?", |
| default = [ ".known-issues/" ], |
| help = "configuration directory (multiple can be " |
| "given; if none given, clears the current list) " |
| "%(default)s") |
| arg_parser.add_argument("FILENAMEs", nargs = "+", |
| help = "files to filter") |
| args = arg_parser.parse_args() |
| |
| logging.basicConfig(level = 40 - 10 * (args.verbosity - args.quiet), |
| format = "%(levelname)s: %(message)s") |
| |
| path = ".known-issues/" |
| logging.debug("Reading configuration from directory `%s`", path) |
| config_import(args.config_dir) |
| |
| exclude_ranges = [] |
| |
| for filename in args.FILENAMEs: |
| try: |
| with open(filename, "r+b") as f: |
| logging.info("%s: filtering", filename) |
| # Yeah, this should be more protected in case of exception |
| # and such, but this is a short running program... |
| mm = mmap.mmap(f.fileno(), 0) |
| for ex, origin in exclude_regexs: |
| logging.info("%s: searching from %s: %s", |
| filename, origin, ex.pattern) |
| for m in re.finditer(ex.pattern, mm, re.MULTILINE): |
| logging.debug("%s: %s-%s: match from from %s", |
| filename, m.start(), m.end(), origin) |
| exclude_ranges.append((m.start(), m.end())) |
| |
| exclude_ranges = sorted(exclude_ranges, key=lambda r: r[0]) |
| logging.warning("%s: ranges excluded: %s", filename, exclude_ranges) |
| |
| # Printd what has not been filtered |
| offset = 0 |
| for b, e in exclude_ranges: |
| mm.seek(offset) |
| d = b - offset |
| logging.debug("%s: exclude range (%d, %d), from %d %dB", |
| filename, b, e, offset, d) |
| if b > offset: |
| print(mm.read(d - 1)) |
| offset = e |
| mm.seek(offset) |
| if len(mm) != offset: |
| print mm.read(len(mm) - offset - 1) |
| del mm |
| except Exception as e: |
| logging.error("%s: cannot load: %s", filename, e) |