265 lines
9.5 KiB
Python
Executable file
265 lines
9.5 KiB
Python
Executable file
#!/usr/bin/python
|
|
# vim: set fileencoding=utf-8 : sts=4 : et : sw=4
|
|
# This is a component of mailpie, a full-text search for email
|
|
#
|
|
# Copyright © 2008 Jeff Epler <jepler@unpythonic.net>
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
import sys, os, subprocess, shutil, getopt, tempfile, re, shlex
|
|
import mailpie.swishfilter, mailpie.parsedate, mailpie.log
|
|
import bsddb, atexit, hashlib, urllib
|
|
|
|
def usage(result=0):
|
|
print """mailpie-search: Put search results in a mailbox
|
|
Usage: %s [-o file|-p|-m] [-b date] [-a date] [-M mailreader] terms...
|
|
-B dir, --base=dir Choose the base location of the mailpie storage
|
|
Default: $HOME/.mailpie
|
|
|
|
Action:
|
|
-c, --count Write number of matches
|
|
-o FILE, --out=FILE Write result to FILE
|
|
-p, --stdout Write result to stdout (implies --mbox)
|
|
-r, --read Open mailbox in mailreader (default)
|
|
|
|
-m, --mbox Output in mbox format (default: maildir)
|
|
-i, --show-ids Show the mailpie message ID in a header
|
|
(implies --mbox)
|
|
|
|
-M prog, --mailreader=PROG
|
|
Mailreader to invoke
|
|
(default: "mutt -R -e'unmailboxes *' -f")
|
|
|
|
Searching:
|
|
-b DATE, --before=DATE Only include messages before DATE
|
|
-a DATE, --after=DATE Only include messages after DATE
|
|
Date formats are like "date -d=DATE"
|
|
|
|
-l NUM, --limit=NUM Return no more than NUM
|
|
|
|
-t, --thread Include full threads
|
|
-n, --no-thread Do not include threads (default)
|
|
|
|
terms... See SWISH-RUN(1) for details on search terms
|
|
""" % os.path.basename(sys.argv[0])
|
|
raise SystemExit, result
|
|
|
|
class SwishError(RuntimeError): pass
|
|
|
|
def run_swish(indexes, args, before=None, after=None):
|
|
if not args:
|
|
return
|
|
swishargs = ['swish-e', '-H0',
|
|
'-x<swishdocpath>\n',
|
|
'-w', " ".join(args)] + ['-f%s' % f for f in indexes]
|
|
if before or after:
|
|
if before is None: before = sys.maxint
|
|
if after is None: after = 0
|
|
swishargs.extend(["-L", "date", str(after), str(before)])
|
|
|
|
swish = subprocess.Popen(swishargs, stdout=subprocess.PIPE,
|
|
stdin=open("/dev/null", "r"))
|
|
output = [row.strip("\n") for row in swish.stdout]
|
|
exitcode = swish.wait()
|
|
if exitcode != 0:
|
|
if output and output[-1] == ".": del output[-1]
|
|
if output:
|
|
raise SwishError, (
|
|
"Swish exited with code %d and the following information:\n "
|
|
% exitcode
|
|
+ "\n ".join(output)+"\n\nSee 'man SWISH-RUN' for details on"
|
|
"swish search terms")
|
|
else:
|
|
raise SwishError, "Swish exited with code %d" % exitcode
|
|
return output
|
|
|
|
class Count: pass
|
|
|
|
def copymail(path, f, show_ids, mbox):
|
|
if not os.path.isfile(path): return 0
|
|
if not f: return 1
|
|
if not mbox:
|
|
path_out = os.path.join(f, 'cur', hashlib.sha1(path).hexdigest()+":2,S")
|
|
if not os.path.exists(path_out):
|
|
os.symlink(path, path_out)
|
|
return 1
|
|
return 0
|
|
i = open(path)
|
|
if show_ids:
|
|
printed = False
|
|
message_id = None
|
|
for line in i:
|
|
if line.lower().startswith("message-id:") and " " in line:
|
|
message_id = line.split()[1].strip("<>")
|
|
if (not printed) and (line == "\n" or line == "\r\n"):
|
|
f.write("X-Mailpie-ID: %s\n" % path.replace("/", "")[-40:])
|
|
if message_id:
|
|
message_id = urllib.quote(message_id, '')
|
|
f.write("X-Gmane-URL: http://mid.gmane.org/%s\n" % message_id)
|
|
printed = True
|
|
f.write(line)
|
|
else:
|
|
shutil.copyfileobj(i, f)
|
|
return 1
|
|
|
|
def maybe_progress(m, a, b):
|
|
a1 = a
|
|
while a1 > 100:
|
|
if a1 % 10: return
|
|
a1 /= 10
|
|
mailpie.log.progress(m, a, b)
|
|
|
|
def main(args):
|
|
before = after = target = None
|
|
if os.environ.get("MAILPIE_READER"):
|
|
mailreader = shlex.split(os.environ["MAILPIE_READER"])
|
|
else:
|
|
mailreader = ["mutt", "-R", "-eunmailboxes *", "-f"]
|
|
limit = 2000
|
|
thread = False
|
|
show_ids = False
|
|
format_mbox = False
|
|
|
|
base = os.path.expanduser("~/.mailpie")
|
|
|
|
try:
|
|
opts, args = getopt.getopt(args, "B:mco:prM:a:b:l:tnih?",
|
|
("base=", "count", "out=", "stdout", "read", "mailreader",
|
|
"help", "after=", "before=", "limit=", "thread", "no-thread",
|
|
"show-ids", "mbox"))
|
|
except getopt.GetoptError, detail:
|
|
usage(detail)
|
|
|
|
for k, v in opts:
|
|
if k in ("-o", "--out"): target = v
|
|
if k in ("-B", "--base"): base = v
|
|
elif k in ("-p", "--stdout"): target = "-"
|
|
elif k in ("-r", "--read"): target = None
|
|
elif k in ("-c", "--count"): target = Count
|
|
elif k in ("-M", "--mailreader"): mailreader = shlex.split(v)
|
|
elif k in ("-m", "--mbox"): format_mbox = True
|
|
elif k in ("-l", "--limit"): limit = int(v)
|
|
elif k in ("-a", "--after"): after = mailpie.parsedate.parsedate(v)
|
|
elif k in ("-b", "--before"): before = mailpie.parsedate.parsedate(v)
|
|
elif k in ("-t", "--thread"): thread = True
|
|
elif k in ("-n", "--no-thread"): thread = False
|
|
elif k in ("-l", "--limit"): limit = int(v)
|
|
elif k in ("-i", "--show-ids"): show_ids = True
|
|
elif k in ("--help", "-h", "-?"): usage()
|
|
|
|
if show_ids or target: format_mbox = True
|
|
|
|
indexes = mailpie.swishfilter.get_index_files(base)
|
|
|
|
if target is None:
|
|
if sys.stdout.isatty():
|
|
if format_mbox:
|
|
f = tempfile.NamedTemporaryFile(prefix="mailpie", suffix=".mbox")
|
|
else:
|
|
f = tempfile.mkdtemp(prefix="mailpie", suffix=".maildir")
|
|
os.mkdir(os.path.join(f, 'new'))
|
|
os.mkdir(os.path.join(f, 'cur'))
|
|
os.mkdir(os.path.join(f, 'tmp'))
|
|
atexit.register(shutil.rmtree, f)
|
|
else:
|
|
target = "-"
|
|
f = sys.stdout
|
|
elif target == "-":
|
|
f = sys.stdout
|
|
elif target is Count:
|
|
f = None
|
|
elif format_mbox:
|
|
f = open(target, "w")
|
|
else:
|
|
f = tempfile.mkdtemp(prefix="mailpie", suffix=".maildir")
|
|
os.mkdir(os.path.join(f, 'new'))
|
|
os.mkdir(os.path.join(f, 'cur'))
|
|
os.mkdir(os.path.join(f, 'tmp'))
|
|
atexit.register(shutil.rmtree, f)
|
|
count = 0
|
|
msgids = set()
|
|
|
|
def q(arg):
|
|
if "=" in arg:
|
|
a, b = arg.split("=", 1)
|
|
return '%s="%s"' % (a,b)
|
|
|
|
return '"%s"' % arg
|
|
|
|
if thread:
|
|
db = base + "-volatile/thread.db"
|
|
thread_db = bsddb.hashopen(db, "r")
|
|
|
|
sargs = []
|
|
for a in args:
|
|
if a.startswith("after="): after = mailpie.parsedate.parsedate(a[6:])
|
|
elif a.startswith("before="): before = mailpie.parsedate.parsedate(a[7:])
|
|
else: sargs.append(q(a))
|
|
|
|
if not sargs:
|
|
usage("At least one text search term is required")
|
|
|
|
mailpie.log.progress("Searching for messages")
|
|
try:
|
|
hashes = run_swish(indexes, sargs, before, after)
|
|
except SwishError, detail:
|
|
raise SystemExit, str(detail)
|
|
|
|
if len(hashes) > limit:
|
|
mailpie.log.log("Will only show the first %d of %d matches", limit, len(hashes))
|
|
hashes = set(hashes)
|
|
total = len(hashes)
|
|
for i, hash in enumerate(hashes):
|
|
if i > limit: break
|
|
maybe_progress("Retrieving message %d of %d", i, total)
|
|
path = os.path.join(base, hash[:2], hash[2:])
|
|
count += copymail(path, f, show_ids, format_mbox)
|
|
if count == limit: break
|
|
if thread:
|
|
msgids.add(thread_db.get("h:" + hash))
|
|
|
|
if thread:
|
|
printed = set(msgids)
|
|
visited = set()
|
|
while msgids and count < limit:
|
|
msgid = msgids.pop()
|
|
if msgid in visited: continue
|
|
mailpie.log.log("Chasing: [%d] %s", len(msgids), msgid)
|
|
visited.add(msgid)
|
|
th = set(thread_db.get("t:" + msgid, "").split()) - printed
|
|
msgids.update(th)
|
|
if not msgid in printed:
|
|
hash = thread_db.get('m:' + msgid)
|
|
if hash is None: continue
|
|
if hash in hashes: continue
|
|
hashes.add(hash)
|
|
path = os.path.join(base, hash[:2], hash[2:])
|
|
count += copymail(path, f, show_ids, format_mbox)
|
|
mailpie.log.progress_clear()
|
|
|
|
if target is None:
|
|
if count == 0:
|
|
mailpie.log.log("No match")
|
|
else:
|
|
if isinstance(f, str):
|
|
os.spawnvp(os.P_WAIT, mailreader[0], mailreader + [f])
|
|
else:
|
|
f.flush()
|
|
os.spawnvp(os.P_WAIT, mailreader[0], mailreader + [f.name])
|
|
|
|
if target is Count:
|
|
print count
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv[1:])
|