Start of using pcre_dfa_exec instead of pcre_exec. Should be much faster.

2017-12-26 22:13:08 -08:00
22 changed files with 97 additions and 337 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -1,14 +1,9 @@
 language: c
-dist: xenial
 sudo: false

 branches:
  only:
    - master
-    - ppc64le
-arch:
-  - amd64
-  - ppc64le

 compiler:
  - clang
@ -27,12 +22,12 @@ addons:

 env:
  global:
-    - LLVM_VERSION=6.0.1
+    - LLVM_VERSION=3.8.0
    - LLVM_PATH=$HOME/clang+llvm
    - CLANG_FORMAT=$LLVM_PATH/bin/clang-format

 before_install:
-  - wget http://llvm.org/releases/$LLVM_VERSION/clang+llvm-$LLVM_VERSION-x86_64-linux-gnu-ubuntu-16.04.tar.xz -O $LLVM_PATH.tar.xz
+  - wget http://llvm.org/releases/$LLVM_VERSION/clang+llvm-$LLVM_VERSION-x86_64-linux-gnu-ubuntu-14.04.tar.xz -O $LLVM_PATH.tar.xz
  - mkdir $LLVM_PATH
  - tar xf $LLVM_PATH.tar.xz -C $LLVM_PATH --strip-components=1
  - export PATH=$HOME/.local/bin:$PATH
@ -42,9 +37,3 @@ install:

 script:
  - ./build.sh && make test
-
-notifications:
-  irc: 'chat.freenode.net#ag'
-  on_success: change
-  on_failure: always
-  use_notice: true
--- a/README.md
+++ b/README.md
@ -76,18 +76,14 @@ or

        sbopkg -i the_silver_searcher

-* openSUSE
+* openSUSE:

        zypper install the_silver_searcher

-* CentOS
-
+* CentOS:
+        
        yum install the_silver_searcher

-* NixOS/Nix/Nixpkgs
-
-        nix-env -iA silver-searcher
-
 * SUSE Linux Enterprise: Follow [these simple instructions](https://software.opensuse.org/download.html?project=utilities&package=the_silver_searcher).


@ -106,20 +102,6 @@ or

  Unofficial daily builds are [available](https://github.com/k-takata/the_silver_searcher-win32).
  
-* winget
-
-        winget install "The Silver Searcher"
-  
-  Notes:
-  - This installs a [release](https://github.com/JFLarvoire/the_silver_searcher/releases) of ag.exe optimized for Windows.
-  - winget is intended to become the default package manager client for Windows.  
-    As of June 2020, it's still in beta, and can be installed using instructions [there](https://github.com/microsoft/winget-cli).
-  - The setup script in the Ag's winget package installs ag.exe in the first directory that matches one of these criteria:
-     1. Over a previous instance of ag.exe *from the same [origin](https://github.com/JFLarvoire/the_silver_searcher)* found in the PATH
-     2. In the directory defined in environment variable bindir_%PROCESSOR_ARCHITECTURE%
-     3. In the directory defined in environment variable bindir
-     4. In the directory defined in environment variable windir
-  
 * Chocolatey

        choco install ag
@ -150,7 +132,7 @@ or
    * CentOS:

            yum -y groupinstall "Development Tools"
-            yum -y install pcre-devel xz-devel zlib-devel
+            yum -y install pcre-devel xz-devel
    * openSUSE:

            zypper source-install --build-deps-only the_silver_searcher
@ -185,7 +167,7 @@ You may need to use `sudo` or run as root for the make install.

 ### Vim

-You can use Ag with [ack.vim](https://github.com/mileszs/ack.vim) by adding the following line to your `.vimrc`:
+You can use Ag with [ack.vim][] by adding the following line to your `.vimrc`:

    let g:ackprg = 'ag --nogroup --nocolor --column'

@ -208,10 +190,9 @@ TextMate users can use Ag with [my fork](https://github.com/ggreer/AckMate) of t

 ## Other stuff you might like

-* [Ack](https://github.com/petdance/ack3) - Better than grep. Without Ack, Ag would not exist.
+* [Ack](https://github.com/petdance/ack2) - Better than grep. Without Ack, Ag would not exist.
 * [ack.vim](https://github.com/mileszs/ack.vim)
 * [Exuberant Ctags](http://ctags.sourceforge.net/) - Faster than Ag, but it builds an index beforehand. Good for *really* big codebases.
 * [Git-grep](http://git-scm.com/docs/git-grep) - As fast as Ag but only works on git repos.
-* [fzf](https://github.com/junegunn/fzf) - A command-line fuzzy finder 
 * [ripgrep](https://github.com/BurntSushi/ripgrep)
 * [Sack](https://github.com/sampson-chen/sack) - A utility that wraps Ack and Ag. It removes a lot of repetition from searching and opening matching files.
--- a/ag.bashcomp.sh
+++ b/ag.bashcomp.sh
@ -67,7 +67,7 @@ _ag() {
    --parallel
    --passthrough
    --passthru
-    --path-to-ignore
+    --path-to-agignore
    --print-long-lines
    --print0
    --recurse
@ -106,7 +106,7 @@ _ag() {
    --ignore-dir) # directory completion
              _filedir -d
              return 0;;
-    --path-to-ignore) # file completion
+    --path-to-agignore) # file completion
              _filedir
              return 0;;
    --pager) # command completion
--- a/configure.ac
+++ b/configure.ac
@ -1,6 +1,6 @@
 AC_INIT(
    [the_silver_searcher],
-    [2.2.0],
+    [2.1.0],
    [https://github.com/ggreer/the_silver_searcher/issues],
    [the_silver_searcher],
    [https://github.com/ggreer/the_silver_searcher])
--- a/doc/ag.1.md
+++ b/doc/ag.1.md
@ -207,9 +207,6 @@ Recursively search for PATTERN in PATH. Like grep or ack, but faster.
  * `--workers NUM`:
    Use NUM worker threads. Default is the number of CPU cores, with a max of 8.

-  * `-W --width NUM`:
-    Truncate match lines after NUM characters.
-
  * `-z --search-zip`:
    Search contents of compressed files. Currently, gz and xz are supported.
    This option requires that ag is built with lzma and zlib.
--- a/src/ignore.c
+++ b/src/ignore.c
@ -20,8 +20,6 @@
 const int fnmatch_flags = FNM_PATHNAME;
 #endif

-ignores *root_ignores;
-
 /* TODO: build a huge-ass list of files we want to ignore by default (build cache stuff, pyc files, etc) */

 const char *evil_hardcoded_ignore_files[] = {
@ -201,14 +199,14 @@ static int ackmate_dir_match(const char *dir_name) {
    if (opts.ackmate_dir_filter == NULL) {
        return 0;
    }
+    int wspace[20];
    /* we just care about the match, not where the matches are */
-    return pcre_exec(opts.ackmate_dir_filter, NULL, dir_name, strlen(dir_name), 0, 0, NULL, 0);
+    return pcre_dfa_exec(opts.ackmate_dir_filter, NULL, dir_name, strlen(dir_name), 0, 0, NULL, 0, wspace, 20);
 }

 /* This is the hottest code in Ag. 10-15% of all execution time is spent here */
 static int path_ignore_search(const ignores *ig, const char *path, const char *filename) {
    char *temp;
-    int temp_start_pos;
    size_t i;
    int match_pos;

@ -219,12 +217,9 @@ static int path_ignore_search(const ignores *ig, const char *path, const char *f
    }

    ag_asprintf(&temp, "%s/%s", path[0] == '.' ? path + 1 : path, filename);
-    //ig->abs_path has its leading slash stripped, so we have to strip the leading slash
-    //of temp as well
-    temp_start_pos = (temp[0] == '/') ? 1 : 0;

-    if (strncmp(temp + temp_start_pos, ig->abs_path, ig->abs_path_len) == 0) {
-        char *slash_filename = temp + temp_start_pos + ig->abs_path_len;
+    if (strncmp(temp, ig->abs_path, ig->abs_path_len) == 0) {
+        char *slash_filename = temp + ig->abs_path_len;
        if (slash_filename[0] == '/') {
            slash_filename++;
        }
--- a/src/ignore.h
+++ b/src/ignore.h
@ -29,7 +29,7 @@ struct ignores {
 };
 typedef struct ignores ignores;

-extern ignores *root_ignores;
+ignores *root_ignores;

 extern const char *evil_hardcoded_ignore_files[];
 extern const char *ignore_pattern_files[];
--- a/src/lang.c
+++ b/src/lang.c
@ -10,44 +10,38 @@ lang_spec_t langs[] = {
    { "asciidoc", { "adoc", "ad", "asc", "asciidoc" } },
    { "apl", { "apl" } },
    { "asm", { "asm", "s" } },
-    { "asp", { "asp", "asa", "aspx", "asax", "ashx", "ascx", "asmx" } },
-    { "aspx", { "asp", "asa", "aspx", "asax", "ashx", "ascx", "asmx" } },
    { "batch", { "bat", "cmd" } },
-    { "bazel", { "bazel" } },
    { "bitbake", { "bb", "bbappend", "bbclass", "inc" } },
+    { "bro", { "bro", "bif" } },
    { "cc", { "c", "h", "xs" } },
    { "cfmx", { "cfc", "cfm", "cfml" } },
    { "chpl", { "chpl" } },
-    { "clojure", { "clj", "cljs", "cljc", "cljx", "edn" } },
+    { "clojure", { "clj", "cljs", "cljc", "cljx" } },
    { "coffee", { "coffee", "cjsx" } },
-    { "config", { "config" } },
    { "coq", { "coq", "g", "v" } },
    { "cpp", { "cpp", "cc", "C", "cxx", "m", "hpp", "hh", "h", "H", "hxx", "tpp" } },
    { "crystal", { "cr", "ecr" } },
    { "csharp", { "cs" } },
-    { "cshtml", { "cshtml" } },
    { "css", { "css" } },
    { "cython", { "pyx", "pxd", "pxi" } },
    { "delphi", { "pas", "int", "dfm", "nfm", "dof", "dpk", "dpr", "dproj", "groupproj", "bdsgroup", "bdsproj" } },
    { "dlang", { "d", "di" } },
    { "dot", { "dot", "gv" } },
-    { "dts", { "dts", "dtsi" } },
    { "ebuild", { "ebuild", "eclass" } },
    { "elisp", { "el" } },
    { "elixir", { "ex", "eex", "exs" } },
    { "elm", { "elm" } },
    { "erlang", { "erl", "hrl" } },
    { "factor", { "factor" } },
-    { "fortran", { "f", "F", "f77", "f90", "F90", "f95", "f03", "for", "ftn", "fpp", "FPP" } },
+    { "fortran", { "f", "f77", "f90", "f95", "f03", "for", "ftn", "fpp" } },
    { "fsharp", { "fs", "fsi", "fsx" } },
    { "gettext", { "po", "pot", "mo" } },
    { "glsl", { "vert", "tesc", "tese", "geom", "frag", "comp" } },
    { "go", { "go" } },
-    { "gradle", { "gradle" } },
    { "groovy", { "groovy", "gtmpl", "gpp", "grunit", "gradle" } },
    { "haml", { "haml" } },
    { "handlebars", { "hbs" } },
-    { "haskell", { "hs", "hsig", "lhs" } },
+    { "haskell", { "hs", "lhs" } },
    { "haxe", { "hx" } },
    { "hh", { "h" } },
    { "html", { "htm", "html", "shtml", "xhtml" } },
@ -78,7 +72,6 @@ lang_spec_t langs[] = {
    { "mathematica", { "m", "wl" } },
    { "md", { "markdown", "mdown", "mdwn", "mkdn", "mkd", "md" } },
    { "mercury", { "m", "moo" } },
-    { "naccess", { "asa", "rsa" } },
    { "nim", { "nim" } },
    { "nix", { "nix" } },
    { "objc", { "m", "h" } },
@ -87,22 +80,17 @@ lang_spec_t langs[] = {
    { "octave", { "m" } },
    { "org", { "org" } },
    { "parrot", { "pir", "pasm", "pmc", "ops", "pod", "pg", "tg" } },
-    { "pdb", { "pdb" } },
    { "perl", { "pl", "pm", "pm6", "pod", "t" } },
    { "php", { "php", "phpt", "php3", "php4", "php5", "phtml" } },
    { "pike", { "pike", "pmod" } },
    { "plist", { "plist" } },
    { "plone", { "pt", "cpt", "metadata", "cpy", "py", "xml", "zcml" } },
-    { "powershell", { "ps1" } },
    { "proto", { "proto" } },
-    { "ps1", { "ps1" } },
-    { "pug", { "pug" } },
    { "puppet", { "pp" } },
    { "python", { "py" } },
    { "qml", { "qml" } },
    { "racket", { "rkt", "ss", "scm" } },
    { "rake", { "Rakefile" } },
-    { "razor", { "cshtml" } },
    { "restructuredtext", { "rst" } },
    { "rs", { "rs" } },
    { "r", { "r", "R", "Rmd", "Rnw", "Rtex", "Rrst" } },
@ -117,13 +105,10 @@ lang_spec_t langs[] = {
    { "smalltalk", { "st" } },
    { "sml", { "sml", "fun", "mlb", "sig" } },
    { "sql", { "sql", "ctl" } },
-    { "stata", { "do", "ado" } },
    { "stylus", { "styl" } },
    { "swift", { "swift" } },
    { "tcl", { "tcl", "itcl", "itk" } },
-    { "terraform", { "tf", "tfvars" } },
    { "tex", { "tex", "cls", "sty" } },
-    { "thrift", { "thrift" } },
    { "tla", { "tla" } },
    { "tt", { "tt", "tt2", "ttml" } },
    { "toml", { "toml" } },
@ -132,17 +117,14 @@ lang_spec_t langs[] = {
    { "vala", { "vala", "vapi" } },
    { "vb", { "bas", "cls", "frm", "ctl", "vb", "resx" } },
    { "velocity", { "vm", "vtl", "vsl" } },
-    { "verilog", { "v", "vh", "sv", "svh" } },
+    { "verilog", { "v", "vh", "sv" } },
    { "vhdl", { "vhd", "vhdl" } },
    { "vim", { "vim" } },
-    { "vue", { "vue" } },
    { "wix", { "wxi", "wxs" } },
    { "wsdl", { "wsdl" } },
    { "wadl", { "wadl" } },
-    { "xml", { "xml", "dtd", "xsl", "xslt", "xsd", "ent", "tld", "plist", "wsdl" } },
-    { "yaml", { "yaml", "yml" } },
-    { "zeek", { "zeek", "bro", "bif" } },
-    { "zephir", { "zep" } }
+    { "xml", { "xml", "dtd", "xsl", "xslt", "ent", "tld", "plist" } },
+    { "yaml", { "yaml", "yml" } }
 };

 size_t get_lang_count() {
--- a/src/log.c
+++ b/src/log.c
@ -4,7 +4,6 @@
 #include "log.h"
 #include "util.h"

-pthread_mutex_t print_mtx = PTHREAD_MUTEX_INITIALIZER;
 static enum log_level log_threshold = LOG_LEVEL_ERR;

 void set_log_level(enum log_level threshold) {
--- a/src/log.h
+++ b/src/log.h
@ -9,7 +9,7 @@
 #include <pthread.h>
 #endif

-extern pthread_mutex_t print_mtx;
+pthread_mutex_t print_mtx;

 enum log_level {
    LOG_LEVEL_DEBUG = 10,
--- a/src/main.c
+++ b/src/main.c
@ -154,7 +154,7 @@ int main(int argc, char **argv) {
            }
 #if defined(HAVE_PTHREAD_SETAFFINITY_NP) && (defined(USE_CPU_SET) || defined(HAVE_SYS_CPUSET_H))
            if (opts.use_thread_affinity) {
-#if defined(__linux__) || defined(__midipix__)
+#ifdef __linux__
                cpu_set_t cpu_set;
 #elif __FreeBSD__
                cpuset_t cpu_set;
@ -185,7 +185,7 @@ int main(int argc, char **argv) {
            log_debug("searching path %s for %s", paths[i], opts.query);
            symhash = NULL;
            ignores *ig = init_ignore(root_ignores, "", 0);
-            struct stat s = { .st_dev = 0 };
+            struct stat s = {.st_dev = 0 };
 #ifndef _WIN32
            /* The device is ignored if opts.one_dev is false, so it's fine
             * to leave it at the default 0
@ -213,7 +213,7 @@ int main(int argc, char **argv) {
        double time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) -
                           ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec);
        time_diff /= 1000000;
-        printf("%zu matches\n%zu files contained matches\n%zu files searched\n%zu bytes searched\n%f seconds\n",
+        printf("%ld matches\n%ld files contained matches\n%ld files searched\n%ld bytes searched\n%f seconds\n",
               stats.total_matches, stats.total_file_matches, stats.total_files, stats.total_bytes, time_diff);
        pthread_mutex_destroy(&stats_mtx);
    }
--- a/src/options.c
+++ b/src/options.c
@ -20,8 +20,6 @@ const char *color_line_number = "\033[1;33m"; /* bold yellow */
 const char *color_match = "\033[30;43m";      /* black with yellow background */
 const char *color_path = "\033[1;32m";        /* bold green */

-cli_options opts;
-
 /* TODO: try to obey out_fd? */
 void usage(void) {
    printf("\n");
@ -145,14 +143,9 @@ void print_version(void) {
 }

 void init_options(void) {
-    char *term = getenv("TERM");
-
    memset(&opts, 0, sizeof(opts));
    opts.casing = CASE_DEFAULT;
    opts.color = TRUE;
-    if (term && !strcmp(term, "dumb")) {
-        opts.color = FALSE;
-    }
    opts.color_win_ansi = FALSE;
    opts.max_matches_per_file = 0;
    opts.max_search_depth = DEFAULT_MAX_SEARCH_DEPTH;
@ -453,9 +446,8 @@ void parse_options(int argc, char **argv, char **base_paths[], char **paths[]) {
                opts.casing = CASE_INSENSITIVE;
                break;
            case 'L':
-                opts.print_nonmatching_files = 1;
-                opts.print_path = PATH_PRINT_TOP;
-                break;
+                opts.invert_match = 1;
+            /* fall through */
            case 'l':
                needs_query = 0;
                opts.print_filename_only = 1;
@ -713,10 +705,8 @@ void parse_options(int argc, char **argv, char **base_paths[], char **paths[]) {
                const char *config_home = getenv("XDG_CONFIG_HOME");
                if (config_home) {
                    ag_asprintf(&gitconfig_res, "%s/%s", config_home, "git/ignore");
-                } else if (home_dir) {
-                    ag_asprintf(&gitconfig_res, "%s/%s", home_dir, ".config/git/ignore");
                } else {
-                    gitconfig_res = ag_strdup("");
+                    ag_asprintf(&gitconfig_res, "%s/%s", home_dir, ".config/git/ignore");
                }
            }
            log_debug("global core.excludesfile: %s", gitconfig_res);
--- a/src/options.h
+++ b/src/options.h
@ -60,7 +60,6 @@ typedef struct {
    int print_break;
    int print_count;
    int print_filename_only;
-    int print_nonmatching_files;
    int print_path;
    int print_all_paths;
    int print_line_numbers;
@ -92,7 +91,7 @@ typedef struct {
 } cli_options;

 /* global options. parse_options gives it sane values, everything else reads from it */
-extern cli_options opts;
+cli_options opts;

 typedef struct option option_t;

--- a/src/search.c
+++ b/src/search.c
@ -2,32 +2,18 @@
 #include "print.h"
 #include "scandir.h"

-size_t alpha_skip_lookup[256];
-size_t *find_skip_lookup;
-uint8_t h_table[H_SIZE] __attribute__((aligned(64)));
-
-work_queue_t *work_queue = NULL;
-work_queue_t *work_queue_tail = NULL;
-int done_adding_files = 0;
-pthread_cond_t files_ready = PTHREAD_COND_INITIALIZER;
-pthread_mutex_t stats_mtx = PTHREAD_MUTEX_INITIALIZER;
-pthread_mutex_t work_queue_mtx = PTHREAD_MUTEX_INITIALIZER;
-
-symdir_t *symhash = NULL;
-
-/* Returns: -1 if skipped, otherwise # of matches */
-ssize_t search_buf(const char *buf, const size_t buf_len,
-                   const char *dir_full_path) {
+void search_buf(const char *buf, const size_t buf_len,
+                const char *dir_full_path) {
    int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */
    size_t buf_offset = 0;

    if (opts.search_stream) {
        binary = 0;
-    } else if (!opts.search_binary_files && opts.mmap) { /* if not using mmap, binary files have already been skipped */
+    } else if (!opts.search_binary_files) {
        binary = is_binary((const void *)buf, buf_len);
        if (binary) {
            log_debug("File %s is binary. Skipping...", dir_full_path);
-            return -1;
+            return;
        }
    }

@ -113,9 +99,10 @@ ssize_t search_buf(const char *buf, const size_t buf_len,
        }
    } else {
        int offset_vector[3];
+        int wspace[20];
        if (opts.multiline) {
            while (buf_offset < buf_len &&
-                   (pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
+                   (pcre_dfa_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3, wspace, 20)) >= 0) {
                log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
                buf_offset = offset_vector[1];
                if (offset_vector[0] == offset_vector[1]) {
@ -143,7 +130,7 @@ ssize_t search_buf(const char *buf, const size_t buf_len,
                }
                size_t line_offset = 0;
                while (line_offset < line_len) {
-                    int rv = pcre_exec(opts.re, opts.re_extra, line, line_len, line_offset, 0, offset_vector, 3);
+                    int rv = pcre_dfa_exec(opts.re, opts.re_extra, line, line_len, line_offset, 0, offset_vector, 3, wspace, 20);
                    if (rv < 0) {
                        break;
                    }
@ -188,16 +175,25 @@ multiline_done:
        pthread_mutex_unlock(&stats_mtx);
    }

-    if (!opts.print_nonmatching_files && (matches_len > 0 || opts.print_all_paths)) {
+    if (matches_len > 0 || opts.print_all_paths) {
        if (binary == -1 && !opts.print_filename_only) {
            binary = is_binary((const void *)buf, buf_len);
        }
        pthread_mutex_lock(&print_mtx);
        if (opts.print_filename_only) {
-            if (opts.print_count) {
-                print_path_count(dir_full_path, opts.path_sep, (size_t)matches_len);
-            } else {
-                print_path(dir_full_path, opts.path_sep);
+            /* If the --files-without-matches or -L option is passed we should
+             * not print a matching line. This option currently sets
+             * opts.print_filename_only and opts.invert_match. Unfortunately
+             * setting the latter has the side effect of making matches.len = 1
+             * on a file-without-matches which is not desired behaviour. See
+             * GitHub issue 206 for the consequences if this behaviour is not
+             * checked. */
+            if (!opts.invert_match || matches_len < 2) {
+                if (opts.print_count) {
+                    print_path_count(dir_full_path, opts.path_sep, (size_t)matches_len);
+                } else {
+                    print_path(dir_full_path, opts.path_sep);
+                }
            }
        } else if (binary) {
            print_binary_file_matches(dir_full_path);
@ -219,16 +215,11 @@ multiline_done:
    if (matches_size > 0) {
        free(matches);
    }
-
-    /* FIXME: handle case where matches_len > SSIZE_MAX */
-    return (ssize_t)matches_len;
 }

-/* Return value: -1 if skipped, otherwise # of matches */
 /* TODO: this will only match single lines. multi-line regexes silently don't match */
-ssize_t search_stream(FILE *stream, const char *path) {
+void search_stream(FILE *stream, const char *path) {
    char *line = NULL;
-    ssize_t matches_count = 0;
    ssize_t line_len = 0;
    size_t line_cap = 0;
    size_t i;
@ -236,17 +227,8 @@ ssize_t search_stream(FILE *stream, const char *path) {
    print_init_context();

    for (i = 1; (line_len = getline(&line, &line_cap, stream)) > 0; i++) {
-        ssize_t result;
        opts.stream_line_num = i;
-        result = search_buf(line, line_len, path);
-        if (result > 0) {
-            if (matches_count == -1) {
-                matches_count = 0;
-            }
-            matches_count += result;
-        } else if (matches_count <= 0 && result == -1) {
-            matches_count = -1;
-        }
+        search_buf(line, line_len, path);
        if (line[line_len - 1] == '\n') {
            line_len--;
        }
@ -255,7 +237,6 @@ ssize_t search_stream(FILE *stream, const char *path) {

    free(line);
    print_cleanup_context();
-    return matches_count;
 }

 void search_file(const char *file_full_path) {
@ -264,7 +245,6 @@ void search_file(const char *file_full_path) {
    char *buf = NULL;
    struct stat statbuf;
    int rv = 0;
-    int matches_count = -1;
    FILE *fp = NULL;

    rv = stat(file_full_path, &statbuf);
@ -314,7 +294,7 @@ void search_file(const char *file_full_path) {
    if (statbuf.st_mode & S_IFIFO) {
        log_debug("%s is a named pipe. stream searching", file_full_path);
        fp = fdopen(fd, "r");
-        matches_count = search_stream(fp, file_full_path);
+        search_stream(fp, file_full_path);
        fclose(fp);
        goto cleanup;
    }
@ -323,7 +303,7 @@ void search_file(const char *file_full_path) {

    if (f_len == 0) {
        if (opts.query[0] == '.' && opts.query_len == 1 && !opts.literal && opts.search_all_files) {
-            matches_count = search_buf(buf, f_len, file_full_path);
+            search_buf(buf, f_len, file_full_path);
        } else {
            log_debug("Skipping %s: file is empty.", file_full_path);
        }
@ -331,7 +311,7 @@ void search_file(const char *file_full_path) {
    }

    if (!opts.literal && f_len > INT_MAX) {
-        log_err("Skipping %s: pcre_exec() can't handle files larger than %i bytes.", file_full_path, INT_MAX);
+        log_err("Skipping %s: pcre_dfa_exec() can't handle files larger than %i bytes.", file_full_path, INT_MAX);
        goto cleanup;
    }

@ -368,23 +348,9 @@ void search_file(const char *file_full_path) {
 #endif
    } else {
        buf = ag_malloc(f_len);
-
-        ssize_t bytes_read = 0;
-
-        if (!opts.search_binary_files) {
-            bytes_read += read(fd, buf, ag_min(f_len, 512));
-            // Optimization: If skipping binary files, don't read the whole buffer before checking if binary or not.
-            if (is_binary(buf, f_len)) {
-                log_debug("File %s is binary. Skipping...", file_full_path);
-                goto cleanup;
-            }
-        }
-
-        while (bytes_read < f_len) {
-            bytes_read += read(fd, buf + bytes_read, f_len);
-        }
-        if (bytes_read != f_len) {
-            die("File %s read(): expected to read %u bytes but read %u", file_full_path, f_len, bytes_read);
+        size_t bytes_read = read(fd, buf, f_len);
+        if ((off_t)bytes_read != f_len) {
+            die("expected to read %u bytes but read %u", f_len, bytes_read);
        }
    }
 #endif
@ -395,7 +361,7 @@ void search_file(const char *file_full_path) {
 #if HAVE_FOPENCOOKIE
            log_debug("%s is a compressed file. stream searching", file_full_path);
            fp = decompress_open(fd, "r", zip_type);
-            matches_count = search_stream(fp, file_full_path);
+            search_stream(fp, file_full_path);
            fclose(fp);
 #else
            int _buf_len = (int)f_len;
@ -404,24 +370,17 @@ void search_file(const char *file_full_path) {
                log_err("Cannot decompress zipped file %s", file_full_path);
                goto cleanup;
            }
-            matches_count = search_buf(_buf, _buf_len, file_full_path);
+            search_buf(_buf, _buf_len, file_full_path);
            free(_buf);
 #endif
            goto cleanup;
        }
    }

-    matches_count = search_buf(buf, f_len, file_full_path);
+    search_buf(buf, f_len, file_full_path);

 cleanup:

-    if (opts.print_nonmatching_files && matches_count == 0) {
-        pthread_mutex_lock(&print_mtx);
-        print_path(file_full_path, opts.path_sep);
-        pthread_mutex_unlock(&print_mtx);
-        opts.match_found = 1;
-    }
-
    print_cleanup_context();
    if (buf != NULL) {
 #ifdef _WIN32
@ -626,8 +585,9 @@ void search_dir(ignores *ig, const char *base_path, const char *path, const int

        if (!is_directory(path, dir)) {
            if (opts.file_search_regex) {
-                rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
-                               0, 0, offset_vector, 3);
+                int wspace[20];
+                rc = pcre_dfa_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
+                               0, 0, offset_vector, 3, wspace, 20);
                if (rc < 0) { /* no match */
                    log_debug("Skipping %s due to file_search_regex.", dir_full_path);
                    goto cleanup;
--- a/src/search.h
+++ b/src/search.h
@ -31,9 +31,9 @@
 #include "uthash.h"
 #include "util.h"

-extern size_t alpha_skip_lookup[256];
-extern size_t *find_skip_lookup;
-extern uint8_t h_table[H_SIZE] __attribute__((aligned(64)));
+size_t alpha_skip_lookup[256];
+size_t *find_skip_lookup;
+uint8_t h_table[H_SIZE] __attribute__((aligned(64)));

 struct work_queue_t {
    char *path;
@ -41,12 +41,12 @@ struct work_queue_t {
 };
 typedef struct work_queue_t work_queue_t;

-extern work_queue_t *work_queue;
-extern work_queue_t *work_queue_tail;
-extern int done_adding_files;
-extern pthread_cond_t files_ready;
-extern pthread_mutex_t stats_mtx;
-extern pthread_mutex_t work_queue_mtx;
+work_queue_t *work_queue;
+work_queue_t *work_queue_tail;
+int done_adding_files;
+pthread_cond_t files_ready;
+pthread_mutex_t stats_mtx;
+pthread_mutex_t work_queue_mtx;


 /* For symlink loop detection */
@ -64,11 +64,11 @@ typedef struct {
    UT_hash_handle hh;
 } symdir_t;

-extern symdir_t *symhash;
+symdir_t *symhash;

-ssize_t search_buf(const char *buf, const size_t buf_len,
-                   const char *dir_full_path);
-ssize_t search_stream(FILE *stream, const char *path);
+void search_buf(const char *buf, const size_t buf_len,
+                const char *dir_full_path);
+void search_stream(FILE *stream, const char *path);
 void search_file(const char *file_full_path);

 void *search_file_worker(void *i);
--- a/src/util.c
+++ b/src/util.c
@ -21,8 +21,6 @@
    }                                     \
    return ptr;

-FILE *out_fd = NULL;
-ag_stats stats;
 void *ag_malloc(size_t size) {
    void *ptr = malloc(size);
    CHECK_AND_RETURN(ptr)
@ -150,13 +148,6 @@ size_t ag_max(size_t a, size_t b) {
    return a;
 }

-size_t ag_min(size_t a, size_t b) {
-    if (b < a) {
-        return b;
-    }
-    return a;
-}
-
 void generate_hash(const char *find, const size_t f_len, uint8_t *h_table, const int case_sensitive) {
    int i;
    for (i = f_len - sizeof(uint16_t); i >= 0; i--) {
@ -518,7 +509,7 @@ int is_symlink(const char *path, const struct dirent *d) {

 int is_named_pipe(const char *path, const struct dirent *d) {
 #ifdef HAVE_DIRENT_DTYPE
-    if (d->d_type != DT_UNKNOWN && d->d_type != DT_LNK) {
+    if (d->d_type != DT_UNKNOWN) {
        return d->d_type == DT_FIFO || d->d_type == DT_SOCK;
    }
 #endif
@ -626,7 +617,7 @@ ssize_t getline(char **lineptr, size_t *n, FILE *stream) {
 ssize_t buf_getline(const char **line, const char *buf, const size_t buf_len, const size_t buf_offset) {
    const char *cur = buf + buf_offset;
    ssize_t i;
-    for (i = 0; (buf_offset + i < buf_len) && cur[i] != '\n'; i++) {
+    for (i = 0; cur[i] != '\n' && (buf_offset + i < buf_len); i++) {
    }
    *line = cur;
    return i;
--- a/src/util.h
+++ b/src/util.h
@ -12,7 +12,7 @@
 #include "log.h"
 #include "options.h"

-extern FILE *out_fd;
+FILE *out_fd;

 #ifndef TRUE
 #define TRUE 1
@ -42,16 +42,16 @@ typedef struct {
 } match_t;

 typedef struct {
-    size_t total_bytes;
-    size_t total_files;
-    size_t total_matches;
-    size_t total_file_matches;
+    long total_bytes;
+    long total_files;
+    long total_matches;
+    long total_file_matches;
    struct timeval time_start;
    struct timeval time_end;
 } ag_stats;


-extern ag_stats stats;
+ag_stats stats;

 /* Union to translate between chars and words without violating strict aliasing */
 typedef union {
@ -69,7 +69,6 @@ void generate_hash(const char *find, const size_t f_len, uint8_t *H, const int c

 /* max is already defined on spec-violating compilers such as MinGW */
 size_t ag_max(size_t a, size_t b);
-size_t ag_min(size_t a, size_t b);

 const char *boyer_moore_strnstr(const char *s, const char *find, const size_t s_len, const size_t f_len,
                                const size_t alpha_skip_lookup[], const size_t *find_skip_lookup, const int case_insensitive);
--- a/tests/empty_environment.t
+++ b/tests/empty_environment.t
@ -1,9 +0,0 @@
-Setup:
-
-  $ . $TESTDIR/setup.sh
-  $ printf "hello world\n" >test.txt
-
-Verify ag runs with an empty environment:
-
-  $ env -i $TESTDIR/../ag --noaffinity --nocolor --workers=1 --parallel hello
-  test.txt:1:hello world
--- a/tests/files_with_matches.t
+++ b/tests/files_with_matches.t
@ -3,10 +3,6 @@ Setup:
  $ . $TESTDIR/setup.sh
  $ printf 'foo\n' > ./foo.txt
  $ printf 'bar\n' > ./bar.txt
-  $ printf 'foo\nbar\nbaz\n' > ./baz.txt
-  $ printf 'duck\nanother duck\nyet another duck\n' > ./duck.txt
-  $ cp duck.txt goose.txt
-  $ echo "GOOSE!!!" >> ./goose.txt

 Files with matches:

@ -16,17 +12,8 @@ Files with matches:
  foo.txt
  $ ag --files-with-matches foo bar.txt
  [1]
-  $ ag --files-with-matches foo foo.txt bar.txt baz.txt
-  foo.txt
-  baz.txt
-  $ ag --files-with-matches bar foo.txt bar.txt baz.txt
-  bar.txt
-  baz.txt
-  $ ag --files-with-matches foo bar.txt baz.txt
-  baz.txt

 Files without matches:
-(Prints names of files in which no line matches query)

  $ ag --files-without-matches bar foo.txt
  foo.txt
@ -34,30 +21,3 @@ Files without matches:
  foo.txt
  $ ag --files-without-matches bar bar.txt
  [1]
-  $ ag --files-without-matches foo foo.txt bar.txt baz.txt
-  bar.txt
-  $ ag --files-without-matches bar foo.txt bar.txt baz.txt
-  foo.txt
-
-Files with inverted matches:
-(Prints names of files in which some line doesn't match query)
-
-  $ ag --files-with-matches --invert-match bar bar.txt
-  [1]
-  $ ag --files-with-matches --invert-match foo foo.txt bar.txt baz.txt
-  bar.txt
-  baz.txt
-  $ ag --files-with-matches --invert-match bar foo.txt bar.txt baz.txt
-  foo.txt
-  baz.txt
-
-Files without inverted matches:
-(Prints names of files in which no line doesn't match query,
- i.e. where every line matches query)
-
-  $ ag --files-without-matches --invert-match duck duck.txt
-  duck.txt
-  $ ag --files-without-matches --invert-match duck goose.txt
-  [1]
-  $ ag --files-without-matches --invert-match duck duck.txt goose.txt
-  duck.txt
--- a/tests/ignore_slash_in_subdir.t
+++ b/tests/ignore_slash_in_subdir.t
@ -1,19 +0,0 @@
-Setup:
-
-  $ . $TESTDIR/setup.sh
-  $ mkdir -p subdir/ignoredir
-  $ mkdir ignoredir
-  $ printf 'match1\n' > subdir/ignoredir/file1.txt
-  $ printf 'match1\n' > ignoredir/file1.txt
-  $ printf '/ignoredir\n' > subdir/.ignore
-
-Ignore file in subdir/ignoredir, but not in ignoredir:
-
-  $ ag match
-  ignoredir/file1.txt:1:match1
-
-From subdir, ignore file in subdir/ignoredir:
-
-  $ cd subdir
-  $ ag match
-  [1]
--- a/tests/list_file_types.t
+++ b/tests/list_file_types.t
@ -21,21 +21,15 @@ Language types are output:
    --asm
        .asm  .s
  
-    --asp
-        .asp  .asa  .aspx  .asax  .ashx  .ascx  .asmx
-  
-    --aspx
-        .asp  .asa  .aspx  .asax  .ashx  .ascx  .asmx
-  
    --batch
        .bat  .cmd
  
-    --bazel
-        .bazel
-  
    --bitbake
        .bb  .bbappend  .bbclass  .inc
  
+    --bro
+        .bro  .bif
+  
    --cc
        .c  .h  .xs
  
@ -46,14 +40,11 @@ Language types are output:
        .chpl
  
    --clojure
-        .clj  .cljs  .cljc  .cljx  .edn
+        .clj  .cljs  .cljc  .cljx
  
    --coffee
        .coffee  .cjsx
  
-    --config
-        .config
-  
    --coq
        .coq  .g  .v
  
@ -66,9 +57,6 @@ Language types are output:
    --csharp
        .cs
  
-    --cshtml
-        .cshtml
-  
    --css
        .css
  
@ -84,9 +72,6 @@ Language types are output:
    --dot
        .dot  .gv
  
-    --dts
-        .dts  .dtsi
-  
    --ebuild
        .ebuild  .eclass
  
@ -106,7 +91,7 @@ Language types are output:
        .factor
  
    --fortran
-        .f  .F  .f77  .f90  .F90  .f95  .f03  .for  .ftn  .fpp  .FPP
+        .f  .f77  .f90  .f95  .f03  .for  .ftn  .fpp
  
    --fsharp
        .fs  .fsi  .fsx
@ -120,9 +105,6 @@ Language types are output:
    --go
        .go
  
-    --gradle
-        .gradle
-  
    --groovy
        .groovy  .gtmpl  .gpp  .grunit  .gradle
  
@ -133,7 +115,7 @@ Language types are output:
        .hbs
  
    --haskell
-        .hs  .hsig  .lhs
+        .hs  .lhs
  
    --haxe
        .hx
@ -225,9 +207,6 @@ Language types are output:
    --mercury
        .m  .moo
  
-    --naccess
-        .asa  .rsa
-  
    --nim
        .nim
  
@ -252,9 +231,6 @@ Language types are output:
    --parrot
        .pir  .pasm  .pmc  .ops  .pod  .pg  .tg
  
-    --pdb
-        .pdb
-  
    --perl
        .pl  .pm  .pm6  .pod  .t
  
@ -270,18 +246,9 @@ Language types are output:
    --plone
        .pt  .cpt  .metadata  .cpy  .py  .xml  .zcml
  
-    --powershell
-        .ps1
-  
    --proto
        .proto
  
-    --ps1
-        .ps1
-  
-    --pug
-        .pug
-  
    --puppet
        .pp
  
@ -297,9 +264,6 @@ Language types are output:
    --rake
        .Rakefile
  
-    --razor
-        .cshtml
-  
    --restructuredtext
        .rst
  
@ -342,9 +306,6 @@ Language types are output:
    --sql
        .sql  .ctl
  
-    --stata
-        .do  .ado
-  
    --stylus
        .styl
  
@ -354,15 +315,9 @@ Language types are output:
    --tcl
        .tcl  .itcl  .itk
  
-    --terraform
-        .tf  .tfvars
-  
    --tex
        .tex  .cls  .sty
  
-    --thrift
-        .thrift
-  
    --tla
        .tla
  
@ -388,7 +343,7 @@ Language types are output:
        .vm  .vtl  .vsl
  
    --verilog
-        .v  .vh  .sv  .svh
+        .v  .vh  .sv
  
    --vhdl
        .vhd  .vhdl
@ -396,9 +351,6 @@ Language types are output:
    --vim
        .vim
  
-    --vue
-        .vue
-  
    --wix
        .wxi  .wxs
  
@ -409,14 +361,8 @@ Language types are output:
        .wadl
  
    --xml
-        .xml  .dtd  .xsl  .xslt  .xsd  .ent  .tld  .plist  .wsdl
+        .xml  .dtd  .xsl  .xslt  .ent  .tld  .plist
  
    --yaml
        .yaml  .yml
  
-    --zeek
-        .zeek  .bro  .bif
-  
-    --zephir
-        .zep
-  
--- a/the_silver_searcher.spec.in
+++ b/the_silver_searcher.spec.in
@ -1,5 +1,5 @@
 %define _bashcompdir %_sysconfdir/bash_completion.d
-%define _zshcompdir %{_datadir}/zsh/site-functions
+

 Name:		the_silver_searcher
 Version:	@VERSION@
@ -62,7 +62,7 @@ rm -rf ${RPM_BUILD_ROOT}
 %{_mandir}/*
 %config %{_bashcompdir}/ag.bashcomp.sh
 %config %{_datadir}/%{name}/completions/ag.bashcomp.sh
-%config %{_datadir}/zsh/site-functions/_the_silver_searcher
+

 %changelog
 * Thu Dec 5 2013 Emily Strickland <code@emily.st> - 0.18.1-1