758 lines
20 KiB
C++
758 lines
20 KiB
C++
/*
|
||
* Copyright © 2013 Jeff Epler <jepler@unpythonic.net>
|
||
*
|
||
* This program is free software; you can redistribute it and/or modify
|
||
* it under the terms of the GNU General Public License as published by
|
||
* the Free Software Foundation; either version 2 of the License, or
|
||
* (at your option) any later version.
|
||
*
|
||
* This program is distributed in the hope that it will be useful,
|
||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
* GNU General Public License for more details.
|
||
*
|
||
* You should have received a copy of the GNU General Public License
|
||
* along with this program; if not, write to the Free Software
|
||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
*/
|
||
|
||
#ifdef ANA_AS_PYMODULE
|
||
#define PY_SSIZE_T_CLEAN
|
||
#include <Python.h>
|
||
#endif
|
||
|
||
#include <algorithm>
|
||
#include <cstdio>
|
||
#include <cstring>
|
||
#include <fstream>
|
||
#include <functional>
|
||
#include <inttypes.h>
|
||
#include <iomanip>
|
||
#include <iostream>
|
||
#include <sstream>
|
||
#include <stdexcept>
|
||
#include <stdint.h>
|
||
#include <string>
|
||
#include <unistd.h>
|
||
#include <vector>
|
||
|
||
using namespace std;
|
||
|
||
struct worddata
|
||
{
|
||
worddata()
|
||
{
|
||
m = 0;
|
||
l = 0;
|
||
memset(&c, 0, sizeof(c));
|
||
w[0] = 0;
|
||
}
|
||
|
||
operator bool() const { return m; }
|
||
|
||
uint32_t m;
|
||
unsigned char c[26];
|
||
uint16_t l;
|
||
char w[1];
|
||
|
||
private:
|
||
/* Note: Constructor assumes that storage for word has been
|
||
allocated! Doesn't work with stacked or regular new'd storage.
|
||
*/
|
||
worddata(const char *word)
|
||
{
|
||
m = 0;
|
||
l = 0;
|
||
strcpy(w, word);
|
||
memset(&c, 0, sizeof(c));
|
||
|
||
const char *s = word;
|
||
for(;*s;s++) {
|
||
if(!isalpha(*s)) continue;
|
||
int o = tolower(*s)-'a';
|
||
l++;
|
||
c[o]++;
|
||
m |= (1<<o);
|
||
}
|
||
}
|
||
|
||
|
||
static worddata *make_word(const char *word) {
|
||
size_t sz = sizeof(worddata) + strlen(word);
|
||
char *storage = new char[sz];
|
||
worddata *r = reinterpret_cast<worddata *>(storage);
|
||
new(r) worddata(word);
|
||
return r;
|
||
}
|
||
|
||
static void delete_word(worddata *word) {
|
||
delete[] reinterpret_cast<char*>(word);
|
||
}
|
||
|
||
friend struct wordholder;
|
||
friend struct dict;
|
||
};
|
||
|
||
struct wordholder
|
||
{
|
||
wordholder() : w(worddata::make_word("")) {}
|
||
wordholder(const wordholder &h) : w(worddata::make_word(h.w->w)) {}
|
||
wordholder(const char *s) : w(worddata::make_word(s)) {}
|
||
wordholder(const string &s)
|
||
: w(worddata::make_word(s.c_str())) {}
|
||
|
||
~wordholder() {
|
||
worddata::delete_word(w);
|
||
}
|
||
const worddata &value() const { return *w; }
|
||
worddata &value() { return *w; }
|
||
worddata *w;
|
||
private:
|
||
wordholder &operator=(const wordholder &o);
|
||
};
|
||
|
||
inline size_t lcnt(const struct worddata &w)
|
||
{
|
||
return w.l;
|
||
}
|
||
|
||
inline bool candidate(const worddata &a, const worddata &b)
|
||
{
|
||
if((a.m & b.m) != b.m) return false;
|
||
for(int i=0; i<26; i++)
|
||
if(a.c[i] < b.c[i]) return false;
|
||
return true;
|
||
}
|
||
|
||
inline worddata operator-(const worddata &a, const worddata &b)
|
||
{
|
||
worddata r;
|
||
|
||
for(int i=0; i<26; i++)
|
||
{
|
||
unsigned char tmp;
|
||
r.c[i] = tmp = a.c[i] - b.c[i];
|
||
if(tmp) {
|
||
r.m |= (1<<i);
|
||
}
|
||
}
|
||
r.l = a.l - b.l;
|
||
return r;
|
||
}
|
||
|
||
template<class T>
|
||
void bwrite(ostream &o, const T &t) {
|
||
o.write(reinterpret_cast<const char *>(&t), sizeof(t));
|
||
}
|
||
|
||
template<class T>
|
||
void bwrite(ostream &o, const T *t, size_t n) {
|
||
o.write(reinterpret_cast<const char *>(t), sizeof(T)*n);
|
||
}
|
||
|
||
template<class T>
|
||
void bread(istream &o, T &t) {
|
||
o.read(reinterpret_cast<char *>(&t), sizeof(t));
|
||
}
|
||
|
||
template<class T>
|
||
void bread(istream &o, T *t, size_t n) {
|
||
o.read(reinterpret_cast<char *>(t), sizeof(T)*n);
|
||
}
|
||
|
||
inline bool ascii(const string &s)
|
||
{
|
||
for(string::size_type i=0; i != s.size(); i++)
|
||
if(!isascii(s[i])) return false;
|
||
return true;
|
||
}
|
||
|
||
struct dict {
|
||
struct byinvwordlen {
|
||
byinvwordlen(const dict &d) : d(d) {}
|
||
bool operator()(size_t a, size_t b) const {
|
||
return lcnt(*reinterpret_cast<const worddata*>(&d.wdata[a]))
|
||
> lcnt(*reinterpret_cast<const worddata*>(&d.wdata[b]));
|
||
}
|
||
const dict &d;
|
||
};
|
||
|
||
static const int32_t signature = 0x414e4144;
|
||
static const int32_t signature2 = sizeof(size_t);
|
||
static const int32_t signature_rev = 0x44414e41;
|
||
vector<size_t> woff;
|
||
vector<char> wdata;
|
||
|
||
const worddata *getword(size_t i) const {
|
||
return reinterpret_cast<const worddata*>(&wdata[woff.at(i)]);
|
||
}
|
||
|
||
worddata *getword(size_t i) {
|
||
return reinterpret_cast<worddata*>(&wdata[woff.at(i)]);
|
||
}
|
||
|
||
// 16-align words
|
||
static size_t pad(size_t t) { return (t + 15) & ~size_t(15); }
|
||
|
||
size_t nwords() const { return woff.size(); }
|
||
|
||
void addword(const char *word) {
|
||
size_t i = woff.size();
|
||
size_t sz = pad(sizeof(worddata) + strlen(word));
|
||
size_t off = wdata.size();
|
||
wdata.resize(off+sz);
|
||
woff.push_back(off);
|
||
new(getword(i)) worddata(word);
|
||
}
|
||
|
||
void readdict(const char *dictpath) {
|
||
ifstream i(dictpath);
|
||
string w;
|
||
while((i >> w))
|
||
{
|
||
if(!ascii(w)) continue;
|
||
addword(w.c_str());
|
||
}
|
||
sort_me();
|
||
}
|
||
|
||
void sort_me() {
|
||
stable_sort(woff.begin(), woff.end(), byinvwordlen(*this));
|
||
}
|
||
|
||
void serialize(const char *ofn) const {
|
||
ofstream o(ofn, ios::binary);
|
||
int32_t s = signature;
|
||
bwrite(o, s);
|
||
s = signature2;
|
||
bwrite(o, s);
|
||
bwrite(o, woff.size());
|
||
bwrite(o, &woff[0], woff.size());
|
||
bwrite(o, wdata.size());
|
||
bwrite(o, &wdata[0], wdata.size());
|
||
}
|
||
|
||
void deserialize(const char *ifn) {
|
||
ifstream i(ifn, ios::binary);
|
||
int32_t sig;
|
||
bread(i, sig);
|
||
if(sig == signature_rev)
|
||
throw runtime_error("archive is for other-endian machine");
|
||
else if(sig != signature)
|
||
throw runtime_error("not an anagram dictionary archive");
|
||
|
||
bread(i, sig);
|
||
if(sig != signature2)
|
||
throw runtime_error("archive is different-size_t machine");
|
||
|
||
size_t sz;
|
||
bread(i, sz);
|
||
woff.resize(sz);
|
||
bread(i, &*(woff.begin()), sz);
|
||
|
||
bread(i, sz);
|
||
wdata.resize(sz);
|
||
bread(i, &*(wdata.begin()), sz);
|
||
}
|
||
};
|
||
|
||
void usage(const char *progname)
|
||
{
|
||
fprintf(stderr, "Usage: %s [-d dictionary] [-l len1,...] [-m minlen]\n\t"
|
||
"[-M maxlen] [-a] terms... -- required...\n", progname);
|
||
exit(1);
|
||
}
|
||
|
||
void print_stack(ostream &o, vector<worddata *> &s)
|
||
{
|
||
for(vector<worddata *>::const_iterator it = s.begin(); it != s.end(); it++)
|
||
{
|
||
o << (*it)->w;
|
||
if(it + 1 == s.end()) o << "\n";
|
||
else o << " ";
|
||
}
|
||
}
|
||
|
||
struct filterer
|
||
{
|
||
filterer(const worddata &a) : a(a) { }
|
||
bool operator()(const worddata *b) const { return candidate(a, *b); }
|
||
const worddata &a;
|
||
};
|
||
|
||
vector<size_t> parse_lengths(const char *l)
|
||
{
|
||
istringstream s(l);
|
||
vector<size_t> r;
|
||
size_t i;
|
||
while((s >> i)) {
|
||
r.push_back(i);
|
||
if(s.peek() == ',') s.get();
|
||
}
|
||
return r;
|
||
}
|
||
|
||
#include <sys/time.h>
|
||
#include <sys/resource.h>
|
||
double cputime()
|
||
{
|
||
struct rusage u;
|
||
getrusage(RUSAGE_SELF, &u);
|
||
return (u.ru_utime.tv_sec + u.ru_utime.tv_usec * 1e-6)
|
||
+ (u.ru_stime.tv_sec + u.ru_stime.tv_usec * 1e-6);
|
||
}
|
||
|
||
|
||
struct ana_cfg {
|
||
ana_cfg()
|
||
: apos(0), minlen(3), maxlen(10),
|
||
total_matches(0), max_matches(1000), total_searches(0),
|
||
max_searches(1000000) {}
|
||
|
||
ana_cfg(bool apos, size_t minlen, size_t maxlen, size_t max_matches,
|
||
size_t max_searches, const vector<size_t>& lengths,
|
||
const string &ws, const string &rs)
|
||
: apos(apos), minlen(minlen), maxlen(maxlen),
|
||
total_matches(0), max_matches(max_matches), total_searches(0),
|
||
max_searches(max_searches), lengths(lengths), rs(rs), ww(wordholder(ws).value() - wordholder(rs).value()) {
|
||
}
|
||
|
||
bool apos;
|
||
size_t minlen, maxlen;
|
||
size_t total_matches, max_matches, total_searches, max_searches;
|
||
vector<size_t> lengths;
|
||
std::string rs;
|
||
worddata ww;
|
||
};
|
||
|
||
struct ana_frame {
|
||
worddata l;
|
||
vector<const worddata*> c;
|
||
vector<const worddata*>::iterator st, en;
|
||
vector<size_t>::iterator lst, len;
|
||
};
|
||
|
||
struct ana_st {
|
||
ana_cfg cfg;
|
||
double t0;
|
||
vector<ana_frame> fr;
|
||
vector<const char *> words;
|
||
};
|
||
|
||
void setup(ana_st &st, const ana_cfg &cfg, const dict &d)
|
||
{
|
||
st.cfg = cfg;
|
||
st.t0 = cputime();
|
||
st.fr.clear();
|
||
st.fr.reserve(lcnt(cfg.ww));
|
||
st.fr.push_back(ana_frame());
|
||
ana_frame &f = st.fr.back();
|
||
f.l = st.cfg.ww;
|
||
f.lst = st.cfg.lengths.begin();
|
||
f.len = st.cfg.lengths.end();
|
||
filterer fi(f.l);
|
||
for(size_t i=0; i != d.nwords(); i++)
|
||
{
|
||
const worddata *it = d.getword(i);
|
||
if(lcnt(*it) < cfg.minlen || lcnt(*it) > cfg.maxlen) continue;
|
||
if(!cfg.apos && strchr(it->w, '\'')) continue;
|
||
if(!fi(it)) continue;
|
||
f.c.push_back(it);
|
||
}
|
||
f.st = f.c.begin();
|
||
f.en = f.c.end();
|
||
st.words.clear();
|
||
if(!st.cfg.rs.empty()) st.words.push_back(st.cfg.rs.c_str());
|
||
}
|
||
|
||
bool words_to_string(const vector<const char*> words, std::string &resultline) {
|
||
resultline.clear();
|
||
|
||
for(vector<const char *>::const_iterator it = words.begin(); it != words.end(); it++)
|
||
{
|
||
if(!resultline.empty()) resultline += ' ';
|
||
resultline += *it;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
bool step(ana_st &st, string &resultline) {
|
||
resultline = string();
|
||
|
||
if(st.cfg.total_matches == st.cfg.max_matches) {
|
||
ostringstream o;
|
||
o << "# Reached maximum of " << st.cfg.total_matches << " matches in " << setprecision(2) << (cputime() - st.t0) << "s";
|
||
resultline = o.str();
|
||
return false;
|
||
}
|
||
|
||
while(!st.fr.empty()) {
|
||
if(st.cfg.total_searches == st.cfg.max_searches) {
|
||
ostringstream o;
|
||
o << "# Reached maximum of " << st.cfg.total_searches << " searches in " << setprecision(2) << (cputime() - st.t0) << "s";
|
||
resultline = o.str();
|
||
return false;
|
||
}
|
||
|
||
st.cfg.total_searches ++;
|
||
|
||
ana_frame &f = st.fr.back();
|
||
|
||
if(!f.l) {
|
||
st.cfg.total_matches ++;
|
||
words_to_string(st.words, resultline);
|
||
st.words.pop_back();
|
||
st.fr.pop_back();
|
||
return true;
|
||
}
|
||
|
||
if(f.lst != f.len) {
|
||
size_t reqlen = *f.lst;
|
||
while(f.st != f.en && lcnt(**f.st) != reqlen) f.st ++;
|
||
}
|
||
|
||
if(f.st == f.en)
|
||
{
|
||
st.fr.pop_back();
|
||
st.words.pop_back();
|
||
continue;
|
||
}
|
||
|
||
st.fr.push_back(ana_frame());
|
||
st.words.push_back((*f.st)->w);
|
||
|
||
ana_frame &nf = st.fr.back(); // guaranteed not to move
|
||
nf.l = f.l - **f.st;
|
||
nf.c.clear();
|
||
if(f.lst != f.len) {
|
||
copy_if(f.c.begin(), f.en, back_inserter(nf.c), filterer(nf.l));
|
||
nf.lst = f.lst + 1;
|
||
nf.len = f.len;
|
||
} else {
|
||
copy_if(f.st, f.en, back_inserter(nf.c), filterer(nf.l));
|
||
nf.lst = nf.len = f.len;
|
||
}
|
||
nf.st = nf.c.begin();
|
||
nf.en = nf.c.end();
|
||
|
||
f.st++;
|
||
}
|
||
|
||
ostringstream o;
|
||
o << "# " << st.cfg.total_matches << " matches ("
|
||
<< st.cfg.total_searches << " searches) in "
|
||
<< setprecision(2) << (cputime() - st.t0) << "s";
|
||
resultline = o.str();
|
||
return false;
|
||
}
|
||
|
||
int run(dict &d, ostream &o, ana_cfg &cfg) {
|
||
ana_st st;
|
||
setup(st, cfg, d);
|
||
while(1) {
|
||
std::string line;
|
||
bool res = step(st, line);
|
||
o << line << endl;
|
||
if(!res) break;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
|
||
size_t maxsearch = 1000000;
|
||
int run(dict &d, ostream &o, bool apos, size_t minlen, size_t maxlen, size_t maxcount, vector<size_t> &lengths, string &aw, string &rw) {
|
||
ana_cfg cfg(apos, minlen, maxlen, maxcount, maxsearch, lengths, aw, rw);
|
||
return run(d, o, cfg);
|
||
}
|
||
|
||
void parse(const std::string &s, ana_cfg &st, bool def_apos, size_t def_minlen, size_t def_maxlen, size_t def_maxcount) {
|
||
string an;
|
||
string reqd;
|
||
|
||
bool apos = def_apos;
|
||
size_t minlen = def_minlen;
|
||
size_t maxlen = def_maxlen;
|
||
size_t maxcount = def_maxcount;
|
||
vector<size_t> lengths;
|
||
string aw, rw;
|
||
|
||
int c;
|
||
|
||
istringstream i(s);
|
||
|
||
while((c = i.peek()) != EOF)
|
||
{
|
||
switch(c) {
|
||
case '<':
|
||
(void) i.get();
|
||
i >> maxlen; break;
|
||
case '>':
|
||
(void) i.get();
|
||
i >> minlen; break;
|
||
case '0': case '1': case '2': case '3': case '4':
|
||
case '5': case '6': case '7': case '8': case '9':
|
||
{
|
||
size_t len;
|
||
i >> len;
|
||
lengths.push_back(len);
|
||
break;
|
||
}
|
||
case '+': case '=':
|
||
{
|
||
string s;
|
||
(void) i.get();
|
||
i >> s;
|
||
if(!rw.empty()) rw = rw + " " + s;
|
||
else rw = s;
|
||
break;
|
||
}
|
||
case '-':
|
||
{
|
||
(void) i.get();
|
||
i >> maxcount;
|
||
maxcount = min(def_maxcount, maxcount);
|
||
break;
|
||
}
|
||
case '\'':
|
||
(void) i.get();
|
||
apos = !apos;
|
||
break;
|
||
case '?':
|
||
(void) i.get();
|
||
break;
|
||
default:
|
||
{
|
||
if(isspace(c)) {
|
||
(void) i.get();
|
||
break;
|
||
}
|
||
string s;
|
||
i >> s;
|
||
if(!aw.empty()) aw = aw + " " + s;
|
||
else aw = s;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
st.~ana_cfg();
|
||
new(&st) ana_cfg(apos, minlen, maxlen, maxcount, maxsearch,
|
||
lengths, aw, rw);
|
||
}
|
||
|
||
void serve(istream &i, ostream &o, dict &d, bool def_apos, size_t def_minlen, size_t def_maxlen, size_t def_maxcount) {
|
||
string s;
|
||
while((getline(i, s))) {
|
||
ana_cfg cfg;
|
||
parse(s, cfg, def_apos, def_minlen, def_maxlen, def_maxcount);
|
||
run(d, o, cfg);
|
||
o.put('\n'); o.flush();
|
||
}
|
||
}
|
||
|
||
#ifdef ANA_AS_PYMODULE
|
||
struct dict_object {
|
||
PyObject_HEAD
|
||
dict d;
|
||
};
|
||
|
||
static PyTypeObject dict_type = {
|
||
PyObject_HEAD_INIT(NULL)
|
||
0,
|
||
"ana.anadict",
|
||
sizeof(dict_object),
|
||
};
|
||
|
||
struct search_object {
|
||
PyObject_HEAD;
|
||
dict_object *d;
|
||
ana_st *st;
|
||
};
|
||
|
||
static PyTypeObject search_type = {
|
||
PyObject_HEAD_INIT(NULL)
|
||
0,
|
||
"ana.results",
|
||
sizeof(search_object),
|
||
};
|
||
|
||
search_object *search_new(PyTypeObject *type, PyObject *args, PyObject *kw)
|
||
{
|
||
return reinterpret_cast<search_object*>(type->tp_alloc(type, 0));
|
||
}
|
||
|
||
PyObject *py_run(PyObject *self, PyObject *args, PyObject *keywds) {
|
||
dict_object *d = (dict_object*)self;
|
||
int apos = false;
|
||
Py_ssize_t minlen = 3;
|
||
Py_ssize_t maxlen = 11;
|
||
Py_ssize_t maxcount = 1000;
|
||
|
||
char *terms;
|
||
Py_ssize_t terms_sz;
|
||
static const char * kwlist[]
|
||
= {"terms", "apos", "minlen", "maxlen", "maxcount", NULL};
|
||
if(!PyArg_ParseTupleAndKeywords(args, keywds, "s#|innn:ana.dict.run",
|
||
const_cast<char**>(kwlist),
|
||
&terms, &terms_sz, &apos, &minlen, &maxlen, &maxcount))
|
||
return NULL;
|
||
|
||
string query(terms, terms+terms_sz);
|
||
|
||
search_object *o = search_new(&search_type, 0, 0);
|
||
o->d = d;
|
||
Py_INCREF(d);
|
||
|
||
ana_cfg cfg;
|
||
o->st = new ana_st;
|
||
parse(query, cfg, apos, minlen, maxlen, maxcount);
|
||
setup(*o->st, cfg, d->d);
|
||
|
||
return reinterpret_cast<PyObject*>(o);
|
||
}
|
||
|
||
static PyMethodDef dict_methods[] = {
|
||
{"run", reinterpret_cast<PyCFunction>(py_run), METH_VARARGS|METH_KEYWORDS,
|
||
"Run one anagram"},
|
||
{},
|
||
};
|
||
static PyObject *
|
||
dict_new(PyTypeObject *type, PyObject *args, PyObject *kw) {
|
||
dict_object *self = reinterpret_cast<dict_object*>(type->tp_alloc(type, 0));
|
||
new(&self->d) dict();
|
||
return (PyObject *)self;
|
||
}
|
||
|
||
static void
|
||
dict_dealloc(dict_object *self) {
|
||
self->d.~dict();
|
||
self->ob_type->tp_free((PyObject*)self);
|
||
}
|
||
|
||
static PyObject *
|
||
py_fromascii(PyObject *self, PyObject *args) {
|
||
PyObject *d = dict_new(&dict_type, NULL, NULL);
|
||
dict_object *r = reinterpret_cast<dict_object*>(d);
|
||
char *path;
|
||
if(!PyArg_ParseTuple(args, "s", &path)) return NULL;
|
||
r->d.readdict(path);
|
||
return d;
|
||
}
|
||
|
||
static PyObject *
|
||
py_frombin(PyObject *self, PyObject *args) {
|
||
PyObject *d = dict_new(&dict_type, NULL, NULL);
|
||
dict_object *r = reinterpret_cast<dict_object*>(d);
|
||
char *path;
|
||
if(!PyArg_ParseTuple(args, "s", &path)) return NULL;
|
||
r->d.deserialize(path);
|
||
return d;
|
||
}
|
||
|
||
static PyMethodDef methods[] = {
|
||
{"from_ascii", py_fromascii, METH_VARARGS, "Parse ASCII dictionary"},
|
||
{"from_binary", py_frombin, METH_VARARGS, "Parse binary dictionary"},
|
||
{},
|
||
};
|
||
|
||
static PyObject *
|
||
search_iter(PyObject *self) {
|
||
Py_INCREF(self);
|
||
return self;
|
||
}
|
||
|
||
// Note: it's up to the user to ensure no more than one thread is calling
|
||
// .next() on a specific search_object at the same time
|
||
static PyObject *
|
||
search_iternext(search_object *self) {
|
||
std::string result;
|
||
if(!self->st) return NULL;
|
||
Py_BEGIN_ALLOW_THREADS
|
||
bool res = step(*self->st, result);
|
||
if(!res) { delete self->st; self->st = NULL; }
|
||
Py_END_ALLOW_THREADS
|
||
return PyString_FromStringAndSize(result.data(), result.size());
|
||
}
|
||
|
||
static void
|
||
search_dealloc(search_object *self) {
|
||
Py_XDECREF(self->d);
|
||
delete self->st;
|
||
self->ob_type->tp_free((PyObject*)self);
|
||
}
|
||
|
||
PyMODINIT_FUNC
|
||
initana(void) {
|
||
PyObject *m;
|
||
m = Py_InitModule("ana", methods);
|
||
|
||
dict_type.tp_flags = Py_TPFLAGS_DEFAULT;
|
||
dict_type.tp_new = dict_new;
|
||
dict_type.tp_dealloc = reinterpret_cast<destructor>(dict_dealloc);
|
||
dict_type.tp_methods = dict_methods;
|
||
if(PyType_Ready(&dict_type) < 0) return;
|
||
|
||
search_type.tp_flags = Py_TPFLAGS_DEFAULT;
|
||
search_type.tp_new = reinterpret_cast<newfunc>(search_new);
|
||
search_type.tp_dealloc = reinterpret_cast<destructor>(search_dealloc);
|
||
search_type.tp_iter = reinterpret_cast<getiterfunc>(search_iter);
|
||
search_type.tp_iternext =
|
||
reinterpret_cast<iternextfunc>(search_iternext);
|
||
if(PyType_Ready(&search_type) < 0) return;
|
||
|
||
PyModule_AddObject(m, "anadict", (PyObject *)&dict_type);
|
||
}
|
||
|
||
#else
|
||
int main(int argc, char **argv)
|
||
{
|
||
const char *dictpath=0, *defdict="/usr/share/dict/words", *bindict=0;
|
||
int opt;
|
||
size_t minlen=3, maxlen=11, maxcount=1000;
|
||
bool apos=false, server=false;
|
||
vector<size_t> lengths;
|
||
string aw;
|
||
|
||
while((opt = getopt(argc, argv, "-aD:d:M:m:l:s")) != -1)
|
||
{
|
||
switch(opt)
|
||
{
|
||
case 'a': apos = !apos; break;
|
||
case 'L': maxcount = atoi(optarg); break;
|
||
case 'D': bindict = optarg; break;
|
||
case 'd': dictpath = optarg; break;
|
||
case 'M': maxlen = atoi(optarg); break;
|
||
case 'm': minlen = atoi(optarg); break;
|
||
case 'l': lengths = parse_lengths(optarg); break;
|
||
case 's': server = !server; break;
|
||
case 1: aw += optarg; break;
|
||
default: usage(argv[0]);
|
||
}
|
||
}
|
||
|
||
dict d;
|
||
if(bindict && dictpath) {
|
||
d.readdict(dictpath);
|
||
d.serialize(bindict);
|
||
cout << "# Read and serialized " << d.nwords() << " candidate words in " << setprecision(2) << cputime() << "s\n";
|
||
} else if(bindict) {
|
||
d.deserialize(bindict);
|
||
cout << "# Deserialized " << d.nwords() << " candidate words in " << setprecision(2) << cputime() << "s\n";
|
||
} else {
|
||
d.readdict(dictpath ? dictpath : defdict);
|
||
cout << "# Read " << d.nwords() << " candidate words in " << setprecision(2) << cputime() << "s\n";
|
||
}
|
||
|
||
if(server) {
|
||
serve(cin, cout, d, apos, minlen, maxlen, maxcount);
|
||
return 0;
|
||
} else {
|
||
string rw;
|
||
for(int i=optind; i<argc; i++)
|
||
{
|
||
if(rw.empty()) rw = i;
|
||
else rw = rw + " " + argv[i];
|
||
}
|
||
return run(d, cout, apos, minlen, maxlen, maxcount, lengths, aw, rw);
|
||
}
|
||
}
|
||
#endif
|