Compare commits

...

9 commits

7 changed files with 191 additions and 94 deletions

View file

@ -316,3 +316,39 @@ const char *normalize_path(const char *path) {
}
return path;
}
void print_results(const char *buf, const size_t buf_len, const char *dir_full_path, search_results_t *sr) {
if (sr == NULL || sr->matches_len == 0) {
log_debug("No match in %s", dir_full_path);
if (opts.search_stream && opts.passthrough) {
fprintf(out_fd, "%s", buf);
}
return;
}
if (sr->binary == AG_BINARY_UNKNOWN && !opts.print_filename_only) {
sr->binary = is_binary((const void *)buf, buf_len) ? AG_BINARY_TRUE : AG_BINARY_FALSE;
}
pthread_mutex_lock(&print_mtx);
if (opts.print_filename_only) {
/* If the --files-without-matches or -L option is passed we should
* not print a matching line. This option currently sets
* opts.print_filename_only and opts.invert_match. Unfortunately
* setting the latter has the side effect of making matches.len = 1
* on a file-without-matches which is not desired behaviour. See
* GitHub issue 206 for the consequences if this behaviour is not
* checked. */
if (!opts.invert_match || sr->matches_len < 2) {
if (opts.print_count) {
print_path_count(dir_full_path, opts.path_sep, (size_t)sr->matches_len);
} else {
print_path(dir_full_path, opts.path_sep);
}
}
} else if (sr->binary == AG_BINARY_TRUE) {
print_binary_file_matches(dir_full_path);
} else {
print_file_matches(dir_full_path, buf, buf_len, sr->matches, sr->matches_len);
}
pthread_mutex_unlock(&print_mtx);
opts.match_found = 1;
}

View file

@ -3,16 +3,25 @@
#include "util.h"
#ifdef HAVE_PTHREAD_H
#include <pthread.h>
#endif
pthread_mutex_t print_mtx;
const char *normalize_path(const char *path);
void print_path(const char *path, const char sep);
void print_path_count(const char *path, const char sep, const size_t count);
void print_line(const char *buf, size_t buf_pos, size_t prev_line_offset);
void print_binary_file_matches(const char *path);
void print_file_matches(const char *path, const char *buf, const size_t buf_len, const match_t matches[], const size_t matches_len);
void print_line_number(size_t line, const char sep);
void print_column_number(const match_t matches[], size_t last_printed_match,
size_t prev_line_offset, const char sep);
void print_line(const char *buf, size_t buf_pos, size_t prev_line_offset);
void print_file_separator(void);
const char *normalize_path(const char *path);
void print_binary_file_matches(const char *path);
void print_file_matches(const char *path, const char *buf, const size_t buf_len, const match_t matches[], const size_t matches_len);
void print_results(const char *buf, const size_t buf_len, const char *dir_full_path, search_results_t *sr);
#ifdef _WIN32
void windows_use_ansi(int use_ansi);

View file

@ -1,24 +1,24 @@
#include "search.h"
#include "decompress.h"
#include "scandir.h"
void search_buf(const char *buf, const size_t buf_len,
const char *dir_full_path) {
int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */
search_results_t *search_buf(const char *buf, const size_t buf_len, const char *dir_full_path) {
size_t buf_offset = 0;
search_results_t sr;
sr.binary = AG_BINARY_UNKNOWN;
if (opts.search_stream) {
binary = 0;
sr.binary = AG_BINARY_FALSE;
} else if (!opts.search_binary_files) {
binary = is_binary((const void *)buf, buf_len);
int binary = is_binary((const void *)buf, buf_len);
if (binary) {
log_debug("File %s is binary. Skipping...", dir_full_path);
return;
return NULL;
}
sr.binary = binary ? AG_BINARY_TRUE : AG_BINARY_FALSE;
}
size_t matches_len = 0;
match_t *matches;
size_t matches_size;
sr.matches_len = 0;
size_t matches_spare;
if (opts.invert_match) {
@ -27,21 +27,22 @@ void search_buf(const char *buf, const size_t buf_len,
* sure we have a nonempty array; and make sure we always have spare
* capacity for one extra.
*/
matches_size = 100;
matches = ag_malloc(matches_size * sizeof(match_t));
sr.matches_size = 100;
sr.matches = ag_malloc(sr.matches_size * sizeof(match_t));
matches_spare = 1;
} else {
matches_size = 0;
matches = NULL;
sr.matches_size = 0;
sr.matches = NULL;
matches_spare = 0;
}
if (!opts.literal && opts.query_len == 1 && opts.query[0] == '.') {
matches_size = 1;
matches = matches == NULL ? ag_malloc(matches_size * sizeof(match_t)) : matches;
matches[0].start = 0;
matches[0].end = buf_len;
matches_len = 1;
/* Don't even PCRE, just match everything */
sr.matches_size = 1;
sr.matches = sr.matches == NULL ? ag_malloc(sr.matches_size * sizeof(match_t)) : sr.matches;
sr.matches[0].start = 0;
sr.matches[0].end = buf_len;
sr.matches_len = 1;
} else if (opts.literal) {
const char *match_ptr = buf;
strncmp_fp ag_strnstr_fp = get_strstr(opts.casing);
@ -72,16 +73,15 @@ void search_buf(const char *buf, const size_t buf_len,
}
}
realloc_matches(&matches, &matches_size, matches_len + matches_spare);
matches[matches_len].start = match_ptr - buf;
matches[matches_len].end = matches[matches_len].start + opts.query_len;
buf_offset = matches[matches_len].end;
log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start);
matches_len++;
realloc_matches(&sr, matches_spare);
sr.matches[sr.matches_len].start = match_ptr - buf;
sr.matches[sr.matches_len].end = sr.matches[sr.matches_len].start + opts.query_len;
buf_offset = sr.matches[sr.matches_len].end;
log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, sr.matches[sr.matches_len].start);
sr.matches_len++;
match_ptr += opts.query_len;
if (opts.max_matches_per_file > 0 && matches_len >= opts.max_matches_per_file) {
if (opts.max_matches_per_file > 0 && sr.matches_len >= opts.max_matches_per_file) {
log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
break;
}
@ -98,13 +98,12 @@ void search_buf(const char *buf, const size_t buf_len,
log_debug("Regex match is of length zero. Advancing offset one byte.");
}
realloc_matches(&matches, &matches_size, matches_len + matches_spare);
realloc_matches(&sr, matches_spare);
sr.matches[sr.matches_len].start = offset_vector[0];
sr.matches[sr.matches_len].end = offset_vector[1];
sr.matches_len++;
matches[matches_len].start = offset_vector[0];
matches[matches_len].end = offset_vector[1];
matches_len++;
if (opts.max_matches_per_file > 0 && matches_len >= opts.max_matches_per_file) {
if (opts.max_matches_per_file > 0 && sr.matches_len >= opts.max_matches_per_file) {
log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
break;
}
@ -130,13 +129,12 @@ void search_buf(const char *buf, const size_t buf_len,
log_debug("Regex match is of length zero. Advancing offset one byte.");
}
realloc_matches(&matches, &matches_size, matches_len + matches_spare);
realloc_matches(&sr, matches_spare);
sr.matches[sr.matches_len].start = offset_vector[0] + line_to_buf;
sr.matches[sr.matches_len].end = offset_vector[1] + line_to_buf;
sr.matches_len++;
matches[matches_len].start = offset_vector[0] + line_to_buf;
matches[matches_len].end = offset_vector[1] + line_to_buf;
matches_len++;
if (opts.max_matches_per_file > 0 && matches_len >= opts.max_matches_per_file) {
if (opts.max_matches_per_file > 0 && sr.matches_len >= opts.max_matches_per_file) {
log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
goto multiline_done;
}
@ -149,56 +147,26 @@ void search_buf(const char *buf, const size_t buf_len,
multiline_done:
if (opts.invert_match) {
matches_len = invert_matches(buf, buf_len, matches, matches_len);
sr.matches_len = invert_matches(buf, buf_len, sr.matches, sr.matches_len);
}
if (opts.stats) {
pthread_mutex_lock(&stats_mtx);
stats.total_bytes += buf_len;
stats.total_files++;
stats.total_matches += matches_len;
if (matches_len > 0) {
stats.total_matches += sr.matches_len;
if (sr.matches_len > 0) {
stats.total_file_matches++;
}
pthread_mutex_unlock(&stats_mtx);
}
if (matches_len > 0) {
if (binary == -1 && !opts.print_filename_only) {
binary = is_binary((const void *)buf, buf_len);
}
pthread_mutex_lock(&print_mtx);
if (opts.print_filename_only) {
/* If the --files-without-matches or -L option is passed we should
* not print a matching line. This option currently sets
* opts.print_filename_only and opts.invert_match. Unfortunately
* setting the latter has the side effect of making matches.len = 1
* on a file-without-matches which is not desired behaviour. See
* GitHub issue 206 for the consequences if this behaviour is not
* checked. */
if (!opts.invert_match || matches_len < 2) {
if (opts.print_count) {
print_path_count(dir_full_path, opts.path_sep, (size_t)matches_len);
} else {
print_path(dir_full_path, opts.path_sep);
}
}
} else if (binary) {
print_binary_file_matches(dir_full_path);
} else {
print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
}
pthread_mutex_unlock(&print_mtx);
opts.match_found = 1;
} else if (opts.search_stream && opts.passthrough) {
fprintf(out_fd, "%s", buf);
} else {
log_debug("No match in %s", dir_full_path);
}
if (matches_size > 0) {
free(matches);
}
search_results_t *sr_ptr = ag_malloc(sizeof(search_results_t));
sr_ptr->matches = sr.matches;
sr_ptr->matches_len = sr.matches_len;
sr_ptr->matches_size = sr.matches_size;
sr_ptr->binary = sr.binary;
return sr_ptr;
}
/* TODO: this will only match single lines. multi-line regexes silently don't match */
@ -207,13 +175,43 @@ void search_stream(FILE *stream, const char *path) {
ssize_t line_len = 0;
size_t line_cap = 0;
size_t i;
// search_results_t sr;
char **context_lines = NULL;
size_t context_lines_len = opts.before + opts.after;
if (context_lines_len) {
context_lines = ag_calloc(sizeof(char *), context_lines_len + 1);
}
for (i = 1; (line_len = getline(&line, &line_cap, stream)) > 0; i++) {
opts.stream_line_num = i;
search_buf(line, line_len, path);
search_results_t *line_results = search_buf(line, line_len, path);
if (context_lines_len == 0) {
print_results(line, line_len, path, line_results);
continue;
}
context_lines[i] = line;
/* getline will malloc/realloc this */
line = NULL;
char *lines;
size_t lines_len = 0;
size_t j;
for (j = 0; j < context_lines_len + 1; j++) {
ag_asprintf(&(lines), "%s%s", lines, context_lines[j]);
lines_len += strlen(context_lines[j]);
}
/* TODO: munge results */
print_results(lines, lines_len, path, line_results);
}
free(line);
if (context_lines_len) {
/* TODO */
for (i = 0; i < context_lines_len + 1; i++) {
free(context_lines[i]);
}
} else {
free(line);
}
}
void search_file(const char *file_full_path) {
@ -223,6 +221,7 @@ void search_file(const char *file_full_path) {
struct stat statbuf;
int rv = 0;
FILE *fp = NULL;
search_results_t *sr = NULL;
fd = open(file_full_path, O_RDONLY);
if (fd < 0) {
@ -307,13 +306,15 @@ void search_file(const char *file_full_path) {
log_err("Cannot decompress zipped file %s", file_full_path);
goto cleanup;
}
search_buf(_buf, _buf_len, file_full_path);
sr = search_buf(_buf, _buf_len, file_full_path);
print_results(_buf, _buf_len, file_full_path, sr);
free(_buf);
goto cleanup;
}
}
search_buf(buf, f_len, file_full_path);
sr = search_buf(buf, f_len, file_full_path);
print_results(buf, f_len, file_full_path, sr);
cleanup:
@ -324,6 +325,12 @@ cleanup:
munmap(buf, f_len);
#endif
}
if (sr != NULL) {
if (sr->matches_size > 0) {
free(sr->matches);
}
free(sr);
}
if (fd != -1) {
close(fd);
}

View file

@ -44,7 +44,6 @@ work_queue_t *work_queue;
work_queue_t *work_queue_tail;
int done_adding_files;
pthread_cond_t files_ready;
pthread_mutex_t print_mtx;
pthread_mutex_t stats_mtx;
pthread_mutex_t work_queue_mtx;
@ -66,8 +65,8 @@ typedef struct {
symdir_t *symhash;
void search_buf(const char *buf, const size_t buf_len,
const char *dir_full_path);
search_results_t *search_buf(const char *buf, const size_t buf_len, const char *dir_full_path);
void search_stream(FILE *stream, const char *path);
void search_file(const char *file_full_path);

View file

@ -259,13 +259,13 @@ size_t invert_matches(const char *buf, const size_t buf_len, match_t matches[],
return inverted_match_count;
}
void realloc_matches(match_t **matches, size_t *matches_size, size_t matches_len) {
if (matches_len < *matches_size) {
void realloc_matches(search_results_t *sr, size_t matches_spare) {
if (sr->matches_len + matches_spare < sr->matches_size) {
return;
}
/* TODO: benchmark initial size of matches. 100 may be too small/big */
*matches_size = *matches ? *matches_size * 2 : 100;
*matches = ag_realloc(*matches, *matches_size * sizeof(match_t));
sr->matches_size = sr->matches ? sr->matches_size * 2 : 100;
sr->matches = ag_realloc(sr->matches, sr->matches_size * sizeof(match_t));
}
void compile_study(pcre **re, pcre_extra **re_extra, char *q, const int pcre_opts, const int study_opts) {

View file

@ -28,11 +28,24 @@ void *ag_calloc(size_t nelem, size_t elsize);
char *ag_strdup(const char *s);
char *ag_strndup(const char *s, size_t size);
typedef enum {
AG_BINARY_UNKNOWN,
AG_BINARY_FALSE,
AG_BINARY_TRUE
} ag_binary_type;
typedef struct {
size_t start; /* Byte at which the match starts */
size_t end; /* and where it ends */
} match_t;
typedef struct {
match_t *matches;
size_t matches_len;
size_t matches_size;
ag_binary_type binary;
} search_results_t;
typedef struct {
long total_bytes;
long total_files;
@ -42,7 +55,6 @@ typedef struct {
struct timeval time_end;
} ag_stats;
ag_stats stats;
typedef const char *(*strncmp_fp)(const char *, const char *, const size_t, const size_t, const size_t[], const size_t *);
@ -65,7 +77,7 @@ const char *boyer_moore_strncasestr(const char *s, const char *find, const size_
strncmp_fp get_strstr(enum case_behavior opts);
size_t invert_matches(const char *buf, const size_t buf_len, match_t matches[], size_t matches_len);
void realloc_matches(match_t **matches, size_t *matches_size, size_t matches_len);
void realloc_matches(search_results_t *sr, size_t matches_spare);
void compile_study(pcre **re, pcre_extra **re_extra, char *q, const int pcre_opts, const int study_opts);

34
tests/stream_context.t Normal file
View file

@ -0,0 +1,34 @@
Setup:
$ . $TESTDIR/setup.sh
$ unalias ag
$ alias ag="$TESTDIR/../ag --nocolor --workers=1"
Print context when searching stdin:
$ echo "before1\nbefore2\nblah\nafter1\nafter2" | ag -C blah
before1
before2
blah
after1
after2
$ echo "before1\nbefore2\nbefore3\nblah\nbetween1\nblah\nafter1" | ag -C blah
before2
before3
blah
between1
blah
after1
Print only a line before when searching stdin:
$ echo "before1\nbefore2\nblah\nafter1\nafter2" | ag -B1 blah
before2
blah
Print only a line after when searching stdin:
$ echo "before1\nbefore2\nblah\nafter1\nafter2" | ag -A1 blah
blah
after1