@ wrote... (11 years, 11 months ago)

A little python script that parses your procmail.log and shows where the last 20 emails got filed.

download: mailhist, download is likely newer than what's posted here.

#!/usr/bin/env python
#
# License: GNU
# Copywrite: Kurt Neufeld
#
# shows last n messages that procmail filtered
#
# procmail must have logabstract enabled, ie: your procmailrc must have:
# LOGABSTRACT=all
# you might also want to set: LOGFILE=~/.procmail.log
#
# You may want to edit the defaults a few lines down.
#
# Known Bugs: no known bugs in mailhist, but occasionally the procmail abstract
# is messed up so this output is a bit buggered

import sys,os
import re
import time
import string
import pprint
from optparse import OptionParser,OptionGroup

"""
From: http://code.activestate.com/recipes/120686/, Matt Billenstein

read a file returning the lines in reverse order for each call of readline()
This actually just reads blocks (4096 bytes by default) of data from the end of
the file and returns last line in an internal buffer.  I believe all the corner
cases are handled, but never can be sure...
"""

class BackwardsReader:
        def readline(self):
                while len(self.data) == 1 and ((self.blkcount * self.blksize) < self.size):
                        self.blkcount = self.blkcount + 1
                        line = self.data[0]
                        try:
                                self.f.seek(-self.blksize * self.blkcount, 2) # read from end of file
                                self.data = string.split(self.f.read(self.blksize) + line, '\n')
                        except IOError:  # can't seek before the beginning of the file
                                self.f.seek(0)
                                self.data = string.split(self.f.read(self.size - (self.blksize * (self.blkcount-1))) + line, '\n')

                if len(self.data) == 0:
                        return ""

                # self.data.pop()
                # make it compatible with python <= 1.5.1
                line = self.data[-1]
                self.data = self.data[:-1]
                return line + '\n'

        def __init__(self, file, blksize=4096):
                """initialize the internal structures"""
                # get the file size
                self.size = os.stat(file)[6]
                # how big of a block to read from the file...
                self.blksize = blksize
                # how many blocks we've read
                self.blkcount = 1
                self.f = open(file, 'rb')
                # if the file is smaller than the blocksize, read a block,
                # otherwise, read the whole thing...
                if self.size > self.blksize:
                        self.f.seek(-self.blksize * self.blkcount, 2) # read from end of file
                self.data = string.split(self.f.read(self.blksize), '\n')
                # strip the last item if it's empty...  a byproduct of the last line having
                # a newline at the end of it
                if not self.data[-1]:
                        # self.data.pop()
                        self.data = self.data[:-1]


# change the following two lines if your procmail.log is not located at: ~/.procmail.log
basedir = os.environ['HOME']
def_logfilename = basedir + "/.procmail.log"

# default values for options are specified (and can be changed) here
def_exclude_list=["/dev/null","mail/archive","mail/spam","formail","root","system/bacula","sa-learn"]
def_num_messages = 10
def_show_all = False
def_today = False
def_date = None
def_count_only = False
def_show_time = True
def_skip = 0                            # how many msg's to skip on output

# you probably don't want to change anything after this line

def ParseCmdLine(argv):

        prog="mailhist"
        version = "v1.2"
        desc="mailhist parses your procmail.log and shows where the last n messages were filed"
        usage = "usage: %prog [ n ]"
        epilog = "Default exclude list: %s" % ", ".join( def_exclude_list )

        parser = OptionParser(prog=prog, version="%prog "+version, description=desc, usage=usage, epilog=epilog)


        exclude_str = "|".join( def_exclude_list[:2] )

        #help and version are built into optparse
        parser.add_option("-e", action="append", type="string", dest="exclude",
                help="exclude a mailbox. multiple instances allowed. regex allowed")
        parser.add_option("-a", action="store_true", dest="show_all", default=def_show_all,
                help="show all mailboxes" )
        parser.add_option("-s", action="store", type="int", dest="skip", default=def_skip,
                help="skip the last n messages")
        parser.add_option("-t", action="store_true", dest="today", default=def_today,
                help="show only messages received today (equiv to: -d 0)")
        parser.add_option("-T", action="store_false", dest="time", default=def_show_time,
                help="do not print current time")
        parser.add_option("-d", action="store", dest="date", type="string", default=def_date,
                help="show specific date (yyyy-mm-dd) or n for n days ago")
        parser.add_option("-c", action="store_true", dest="count_only", default=def_count_only,
                help="don't display summaries, just a count")
        parser.add_option("-l", action="store", dest="logfilename", metavar="LOGFILE", default=def_logfilename,
                help="specify procmail logfile. default: %s" % def_logfilename)

        (options, args) = parser.parse_args(argv)

        if options.today:
                options.date = 0

        if len(args) > 1:
                try:
                        options.count = string.atoi(args[1])
                except ValueError:
                        sys.stderr.write("%s\n" % parser.get_usage())
                        sys.stderr.write("'%s' is not an integer\n" % args[1])
                        sys.exit(1)
        else:
                options.count = def_num_messages

                if options.date is not None:
                        options.count += 1000

                if options.show_all:
                        options.count += 1000

                options.count += options.skip

        return options


def BuildExcludeList(options):

        e_list = def_exclude_list

        if options.show_all:
                        e_list=[]

        if options.exclude:
                # go through the exclude list, splitting on ","
                # ex: ["a","bc"] becomes [a,b,c]
                for e in options.exclude:
                        e_list += e.split(",")

        if e_list:
                return "(" + "|".join(e_list) + ")"
        else:
                return "^$"      # will not match

def ExcludeRegEx( options ):

        regex = BuildExcludeList(options)
        return re.compile( regex )


def DateRegEx(prev):
        """
        Return a regex object based on input
        input: a number (how many days ago) or a date string (yyyy-mm-dd)
        """

        if prev is None:
                return re.compile( "^" )                                                # this will always match

        strftime = time.strftime                                                        # shortcut

        try:                                                                                            # passed in a number
                prev = abs(int(prev))                                           # make sure it's positive

                now = time.time()                                                       # secs since epoch
                prev = now - (prev * 3600 * 24)                         # subtract seconds (prev==0 = now)
                prev = time.localtime( prev )                                   # convert to tuple

        except ValueError:                                                              # passed in date string
                try:
                        prev = time.strptime(prev,"%Y-%m-%d")
                except ValueError:                                                      # could not parse passed in string
                        print "could not parse date string: %s" % prev
                        return re.compile( "^" )                                # this will always match


        # example of procmail datestring: Fri Nov  9 15:37:10 2007

        day             = strftime("%d", prev)
        day             = "%2s" % day.lstrip("0")               # procmail is not 0 padded

        date_str  = strftime("%a %b ", prev)    # Fri Nov
        date_str += day                                                 #  9
        date_str += " [0-9:]+ "                                 # don't worry about the time
        date_str += strftime("%Y", prev)                # 2007
        #print date_str

        return re.compile( date_str )


def main(argv):

        options=ParseCmdLine(argv)
        num_messages = options.count

        try:
                reader = BackwardsReader( options.logfilename )
        except IOError:
                print "Can't open %s" % options.logfilename
                sys.exit(-1)

        data=[]                 # holds lines of the procmail.log file
        matches=[]              # index of matches from data list

        # compile our regular expressions
        re_include = re.compile("^  Folder: ")
        re_incdate = DateRegEx( options.date )
        re_exclude = ExcludeRegEx( options )
        re_hacks = re.compile("^From foo@bar")

        eof = False                     # end of file
        date_hit = False

        # reverse the range to search from tail to head
        #for i in xrange(len(data)-1,-1,-1):
        while True:

                # only show/find the last n messages
                if len(matches) >= num_messages:
                        break

                # search backwards through the log file until we find a Folder line
                # useful if debug logging is turned on
                m_include = None
                while not m_include:
                        line = reader.readline()

                        if not line:
                                eof = True
                                break

                        data.append( line )
                        m_include=re_include.search( data[-1] )

                if eof: break

                m_exclude=re_exclude.search( data[-1] )
                if m_exclude: continue

                data.append( reader.readline() )        # Subject
                data.append( reader.readline() )        # From line

                m_incdate=re_incdate.search( data[-1] )
                m_hacks  =re_hacks.search( data[-1] )

                # if we're searching on a date (includes today) and we've had a date hit
                # then stop searching when we miss on a date hit. ie: stop searching when
                # we're no longer "on" desired date
                if options.date is not None:            # must compare to None, today is date=0
                        if m_incdate: date_hit = True

                        if not m_incdate and date_hit: break

                if m_incdate and not m_hacks:
                        matches.append( len(data)-1 )


        #print "Total read lines: ",len(data)

        if not options.count_only:                              # only print results if didn't '-c'

                # calc how many msgs we skipped
                if options.skip - len(matches) >= 0:
                        skipped = len(matches)
                else:
                        skipped = options.skip

                matches = matches[options.skip:]        # this returns an empty list and not an IndexError
                                                                                        # if index is higher than list length

                # matches are added in reverse order so print out messages in the correct order
                matches.reverse()
                for i in matches:

                        if i != matches[0]: print "---" # put a seperator between records
                                                                                        # except the first

                        try:
                                print data[i].rstrip()
                                print data[i-1].rstrip()
                                print data[i-2].rstrip().expandtabs(1)
                        except IndexError:
                                pass

                if len(matches):                                        # only print newline if we had results
                        print

        if options.count_only:
                output = "count: %d" % len(matches)
        else:
                output = "count: %d, skipped: %d, total: %d" % ( len(matches), skipped, len(matches) + skipped )

        if options.time:
                now = time.strftime( "%Y-%m-%d %I:%M %p" )
                output += ", current time: %s" % now

        print output

if __name__ == "__main__":
        main(sys.argv)
Category: tech, Tags: email, python
Comments: 0
Click here to add a comment