mailpie/scripts/mailpie-search
Jeff Epler e78b6010c0 fix location of thread database when searching
Signed-off-by: Jeff Epler <jepler@unpythonic.net>
2016-05-09 19:21:55 -05:00

265 lines
9.5 KiB
Python
Executable file

#!/usr/bin/python
# vim: set fileencoding=utf-8 : sts=4 : et : sw=4
# This is a component of mailpie, a full-text search for email
#
# Copyright © 2008 Jeff Epler <jepler@unpythonic.net>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import sys, os, subprocess, shutil, getopt, tempfile, re, shlex
import mailpie.swishfilter, mailpie.parsedate, mailpie.log
import bsddb, atexit, hashlib, urllib
def usage(result=0):
print """mailpie-search: Put search results in a mailbox
Usage: %s [-o file|-p|-m] [-b date] [-a date] [-M mailreader] terms...
-B dir, --base=dir Choose the base location of the mailpie storage
Default: $HOME/.mailpie
Action:
-c, --count Write number of matches
-o FILE, --out=FILE Write result to FILE
-p, --stdout Write result to stdout (implies --mbox)
-r, --read Open mailbox in mailreader (default)
-m, --mbox Output in mbox format (default: maildir)
-i, --show-ids Show the mailpie message ID in a header
(implies --mbox)
-M prog, --mailreader=PROG
Mailreader to invoke
(default: "mutt -R -e'unmailboxes *' -f")
Searching:
-b DATE, --before=DATE Only include messages before DATE
-a DATE, --after=DATE Only include messages after DATE
Date formats are like "date -d=DATE"
-l NUM, --limit=NUM Return no more than NUM
-t, --thread Include full threads
-n, --no-thread Do not include threads (default)
terms... See SWISH-RUN(1) for details on search terms
""" % os.path.basename(sys.argv[0])
raise SystemExit, result
class SwishError(RuntimeError): pass
def run_swish(indexes, args, before=None, after=None):
if not args:
return
swishargs = ['swish-e', '-H0',
'-x<swishdocpath>\n',
'-w', " ".join(args)] + ['-f%s' % f for f in indexes]
if before or after:
if before is None: before = sys.maxint
if after is None: after = 0
swishargs.extend(["-L", "date", str(after), str(before)])
swish = subprocess.Popen(swishargs, stdout=subprocess.PIPE,
stdin=open("/dev/null", "r"))
output = [row.strip("\n") for row in swish.stdout]
exitcode = swish.wait()
if exitcode != 0:
if output and output[-1] == ".": del output[-1]
if output:
raise SwishError, (
"Swish exited with code %d and the following information:\n "
% exitcode
+ "\n ".join(output)+"\n\nSee 'man SWISH-RUN' for details on"
"swish search terms")
else:
raise SwishError, "Swish exited with code %d" % exitcode
return output
class Count: pass
def copymail(path, f, show_ids, mbox):
if not os.path.isfile(path): return 0
if not f: return 1
if not mbox:
path_out = os.path.join(f, 'cur', hashlib.sha1(path).hexdigest()+":2,S")
if not os.path.exists(path_out):
os.symlink(path, path_out)
return 1
return 0
i = open(path)
if show_ids:
printed = False
message_id = None
for line in i:
if line.lower().startswith("message-id:") and " " in line:
message_id = line.split()[1].strip("<>")
if (not printed) and (line == "\n" or line == "\r\n"):
f.write("X-Mailpie-ID: %s\n" % path.replace("/", "")[-40:])
if message_id:
message_id = urllib.quote(message_id, '')
f.write("X-Gmane-URL: http://mid.gmane.org/%s\n" % message_id)
printed = True
f.write(line)
else:
shutil.copyfileobj(i, f)
return 1
def maybe_progress(m, a, b):
a1 = a
while a1 > 100:
if a1 % 10: return
a1 /= 10
mailpie.log.progress(m, a, b)
def main(args):
before = after = target = None
if os.environ.get("MAILPIE_READER"):
mailreader = shlex.split(os.environ["MAILPIE_READER"])
else:
mailreader = ["mutt", "-R", "-eunmailboxes *", "-f"]
limit = 2000
thread = False
show_ids = False
format_mbox = False
base = os.path.expanduser("~/.mailpie")
try:
opts, args = getopt.getopt(args, "B:mco:prM:a:b:l:tnih?",
("base=", "count", "out=", "stdout", "read", "mailreader",
"help", "after=", "before=", "limit=", "thread", "no-thread",
"show-ids", "mbox"))
except getopt.GetoptError, detail:
usage(detail)
for k, v in opts:
if k in ("-o", "--out"): target = v
if k in ("-B", "--base"): base = v
elif k in ("-p", "--stdout"): target = "-"
elif k in ("-r", "--read"): target = None
elif k in ("-c", "--count"): target = Count
elif k in ("-M", "--mailreader"): mailreader = shlex.split(v)
elif k in ("-m", "--mbox"): format_mbox = True
elif k in ("-l", "--limit"): limit = int(v)
elif k in ("-a", "--after"): after = mailpie.parsedate.parsedate(v)
elif k in ("-b", "--before"): before = mailpie.parsedate.parsedate(v)
elif k in ("-t", "--thread"): thread = True
elif k in ("-n", "--no-thread"): thread = False
elif k in ("-l", "--limit"): limit = int(v)
elif k in ("-i", "--show-ids"): show_ids = True
elif k in ("--help", "-h", "-?"): usage()
if show_ids or target: format_mbox = True
indexes = mailpie.swishfilter.get_index_files(base)
if target is None:
if sys.stdout.isatty():
if format_mbox:
f = tempfile.NamedTemporaryFile(prefix="mailpie", suffix=".mbox")
else:
f = tempfile.mkdtemp(prefix="mailpie", suffix=".maildir")
os.mkdir(os.path.join(f, 'new'))
os.mkdir(os.path.join(f, 'cur'))
os.mkdir(os.path.join(f, 'tmp'))
atexit.register(shutil.rmtree, f)
else:
target = "-"
f = sys.stdout
elif target == "-":
f = sys.stdout
elif target is Count:
f = None
elif format_mbox:
f = open(target, "w")
else:
f = tempfile.mkdtemp(prefix="mailpie", suffix=".maildir")
os.mkdir(os.path.join(f, 'new'))
os.mkdir(os.path.join(f, 'cur'))
os.mkdir(os.path.join(f, 'tmp'))
atexit.register(shutil.rmtree, f)
count = 0
msgids = set()
def q(arg):
if "=" in arg:
a, b = arg.split("=", 1)
return '%s="%s"' % (a,b)
return '"%s"' % arg
if thread:
db = base + "-volatile/thread.db"
thread_db = bsddb.hashopen(db, "r")
sargs = []
for a in args:
if a.startswith("after="): after = mailpie.parsedate.parsedate(a[6:])
elif a.startswith("before="): before = mailpie.parsedate.parsedate(a[7:])
else: sargs.append(q(a))
if not sargs:
usage("At least one text search term is required")
mailpie.log.progress("Searching for messages")
try:
hashes = run_swish(indexes, sargs, before, after)
except SwishError, detail:
raise SystemExit, str(detail)
if len(hashes) > limit:
mailpie.log.log("Will only show the first %d of %d matches", limit, len(hashes))
hashes = set(hashes)
total = len(hashes)
for i, hash in enumerate(hashes):
if i > limit: break
maybe_progress("Retrieving message %d of %d", i, total)
path = os.path.join(base, hash[:2], hash[2:])
count += copymail(path, f, show_ids, format_mbox)
if count == limit: break
if thread:
msgids.add(thread_db.get("h:" + hash))
if thread:
printed = set(msgids)
visited = set()
while msgids and count < limit:
msgid = msgids.pop()
if msgid in visited: continue
mailpie.log.log("Chasing: [%d] %s", len(msgids), msgid)
visited.add(msgid)
th = set(thread_db.get("t:" + msgid, "").split()) - printed
msgids.update(th)
if not msgid in printed:
hash = thread_db.get('m:' + msgid)
if hash is None: continue
if hash in hashes: continue
hashes.add(hash)
path = os.path.join(base, hash[:2], hash[2:])
count += copymail(path, f, show_ids, format_mbox)
mailpie.log.progress_clear()
if target is None:
if count == 0:
mailpie.log.log("No match")
else:
if isinstance(f, str):
os.spawnvp(os.P_WAIT, mailreader[0], mailreader + [f])
else:
f.flush()
os.spawnvp(os.P_WAIT, mailreader[0], mailreader + [f.name])
if target is Count:
print count
if __name__ == '__main__':
main(sys.argv[1:])