Compare commits
9 commits
master
...
stream_con
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4304379038 | ||
|
|
fa35a7f86e | ||
|
|
f6170cc557 | ||
|
|
1987c8c8bd | ||
|
|
6aa9c80e25 | ||
|
|
614afe2cab | ||
|
|
119c9c1111 | ||
|
|
d19e6f8498 | ||
|
|
21291ffbe3 |
7 changed files with 191 additions and 94 deletions
36
src/print.c
36
src/print.c
|
|
@ -316,3 +316,39 @@ const char *normalize_path(const char *path) {
|
|||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
void print_results(const char *buf, const size_t buf_len, const char *dir_full_path, search_results_t *sr) {
|
||||
if (sr == NULL || sr->matches_len == 0) {
|
||||
log_debug("No match in %s", dir_full_path);
|
||||
if (opts.search_stream && opts.passthrough) {
|
||||
fprintf(out_fd, "%s", buf);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (sr->binary == AG_BINARY_UNKNOWN && !opts.print_filename_only) {
|
||||
sr->binary = is_binary((const void *)buf, buf_len) ? AG_BINARY_TRUE : AG_BINARY_FALSE;
|
||||
}
|
||||
pthread_mutex_lock(&print_mtx);
|
||||
if (opts.print_filename_only) {
|
||||
/* If the --files-without-matches or -L option is passed we should
|
||||
* not print a matching line. This option currently sets
|
||||
* opts.print_filename_only and opts.invert_match. Unfortunately
|
||||
* setting the latter has the side effect of making matches.len = 1
|
||||
* on a file-without-matches which is not desired behaviour. See
|
||||
* GitHub issue 206 for the consequences if this behaviour is not
|
||||
* checked. */
|
||||
if (!opts.invert_match || sr->matches_len < 2) {
|
||||
if (opts.print_count) {
|
||||
print_path_count(dir_full_path, opts.path_sep, (size_t)sr->matches_len);
|
||||
} else {
|
||||
print_path(dir_full_path, opts.path_sep);
|
||||
}
|
||||
}
|
||||
} else if (sr->binary == AG_BINARY_TRUE) {
|
||||
print_binary_file_matches(dir_full_path);
|
||||
} else {
|
||||
print_file_matches(dir_full_path, buf, buf_len, sr->matches, sr->matches_len);
|
||||
}
|
||||
pthread_mutex_unlock(&print_mtx);
|
||||
opts.match_found = 1;
|
||||
}
|
||||
|
|
|
|||
17
src/print.h
17
src/print.h
|
|
@ -3,16 +3,25 @@
|
|||
|
||||
#include "util.h"
|
||||
|
||||
#ifdef HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
pthread_mutex_t print_mtx;
|
||||
|
||||
const char *normalize_path(const char *path);
|
||||
|
||||
void print_path(const char *path, const char sep);
|
||||
void print_path_count(const char *path, const char sep, const size_t count);
|
||||
void print_line(const char *buf, size_t buf_pos, size_t prev_line_offset);
|
||||
void print_binary_file_matches(const char *path);
|
||||
void print_file_matches(const char *path, const char *buf, const size_t buf_len, const match_t matches[], const size_t matches_len);
|
||||
void print_line_number(size_t line, const char sep);
|
||||
void print_column_number(const match_t matches[], size_t last_printed_match,
|
||||
size_t prev_line_offset, const char sep);
|
||||
void print_line(const char *buf, size_t buf_pos, size_t prev_line_offset);
|
||||
void print_file_separator(void);
|
||||
const char *normalize_path(const char *path);
|
||||
void print_binary_file_matches(const char *path);
|
||||
void print_file_matches(const char *path, const char *buf, const size_t buf_len, const match_t matches[], const size_t matches_len);
|
||||
|
||||
void print_results(const char *buf, const size_t buf_len, const char *dir_full_path, search_results_t *sr);
|
||||
|
||||
#ifdef _WIN32
|
||||
void windows_use_ansi(int use_ansi);
|
||||
|
|
|
|||
169
src/search.c
169
src/search.c
|
|
@ -1,24 +1,24 @@
|
|||
#include "search.h"
|
||||
#include "decompress.h"
|
||||
#include "scandir.h"
|
||||
|
||||
void search_buf(const char *buf, const size_t buf_len,
|
||||
const char *dir_full_path) {
|
||||
int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */
|
||||
search_results_t *search_buf(const char *buf, const size_t buf_len, const char *dir_full_path) {
|
||||
size_t buf_offset = 0;
|
||||
search_results_t sr;
|
||||
sr.binary = AG_BINARY_UNKNOWN;
|
||||
|
||||
if (opts.search_stream) {
|
||||
binary = 0;
|
||||
sr.binary = AG_BINARY_FALSE;
|
||||
} else if (!opts.search_binary_files) {
|
||||
binary = is_binary((const void *)buf, buf_len);
|
||||
int binary = is_binary((const void *)buf, buf_len);
|
||||
if (binary) {
|
||||
log_debug("File %s is binary. Skipping...", dir_full_path);
|
||||
return;
|
||||
return NULL;
|
||||
}
|
||||
sr.binary = binary ? AG_BINARY_TRUE : AG_BINARY_FALSE;
|
||||
}
|
||||
|
||||
size_t matches_len = 0;
|
||||
match_t *matches;
|
||||
size_t matches_size;
|
||||
sr.matches_len = 0;
|
||||
size_t matches_spare;
|
||||
|
||||
if (opts.invert_match) {
|
||||
|
|
@ -27,21 +27,22 @@ void search_buf(const char *buf, const size_t buf_len,
|
|||
* sure we have a nonempty array; and make sure we always have spare
|
||||
* capacity for one extra.
|
||||
*/
|
||||
matches_size = 100;
|
||||
matches = ag_malloc(matches_size * sizeof(match_t));
|
||||
sr.matches_size = 100;
|
||||
sr.matches = ag_malloc(sr.matches_size * sizeof(match_t));
|
||||
matches_spare = 1;
|
||||
} else {
|
||||
matches_size = 0;
|
||||
matches = NULL;
|
||||
sr.matches_size = 0;
|
||||
sr.matches = NULL;
|
||||
matches_spare = 0;
|
||||
}
|
||||
|
||||
if (!opts.literal && opts.query_len == 1 && opts.query[0] == '.') {
|
||||
matches_size = 1;
|
||||
matches = matches == NULL ? ag_malloc(matches_size * sizeof(match_t)) : matches;
|
||||
matches[0].start = 0;
|
||||
matches[0].end = buf_len;
|
||||
matches_len = 1;
|
||||
/* Don't even PCRE, just match everything */
|
||||
sr.matches_size = 1;
|
||||
sr.matches = sr.matches == NULL ? ag_malloc(sr.matches_size * sizeof(match_t)) : sr.matches;
|
||||
sr.matches[0].start = 0;
|
||||
sr.matches[0].end = buf_len;
|
||||
sr.matches_len = 1;
|
||||
} else if (opts.literal) {
|
||||
const char *match_ptr = buf;
|
||||
strncmp_fp ag_strnstr_fp = get_strstr(opts.casing);
|
||||
|
|
@ -72,16 +73,15 @@ void search_buf(const char *buf, const size_t buf_len,
|
|||
}
|
||||
}
|
||||
|
||||
realloc_matches(&matches, &matches_size, matches_len + matches_spare);
|
||||
|
||||
matches[matches_len].start = match_ptr - buf;
|
||||
matches[matches_len].end = matches[matches_len].start + opts.query_len;
|
||||
buf_offset = matches[matches_len].end;
|
||||
log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start);
|
||||
matches_len++;
|
||||
realloc_matches(&sr, matches_spare);
|
||||
sr.matches[sr.matches_len].start = match_ptr - buf;
|
||||
sr.matches[sr.matches_len].end = sr.matches[sr.matches_len].start + opts.query_len;
|
||||
buf_offset = sr.matches[sr.matches_len].end;
|
||||
log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, sr.matches[sr.matches_len].start);
|
||||
sr.matches_len++;
|
||||
match_ptr += opts.query_len;
|
||||
|
||||
if (opts.max_matches_per_file > 0 && matches_len >= opts.max_matches_per_file) {
|
||||
if (opts.max_matches_per_file > 0 && sr.matches_len >= opts.max_matches_per_file) {
|
||||
log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
|
||||
break;
|
||||
}
|
||||
|
|
@ -98,13 +98,12 @@ void search_buf(const char *buf, const size_t buf_len,
|
|||
log_debug("Regex match is of length zero. Advancing offset one byte.");
|
||||
}
|
||||
|
||||
realloc_matches(&matches, &matches_size, matches_len + matches_spare);
|
||||
realloc_matches(&sr, matches_spare);
|
||||
sr.matches[sr.matches_len].start = offset_vector[0];
|
||||
sr.matches[sr.matches_len].end = offset_vector[1];
|
||||
sr.matches_len++;
|
||||
|
||||
matches[matches_len].start = offset_vector[0];
|
||||
matches[matches_len].end = offset_vector[1];
|
||||
matches_len++;
|
||||
|
||||
if (opts.max_matches_per_file > 0 && matches_len >= opts.max_matches_per_file) {
|
||||
if (opts.max_matches_per_file > 0 && sr.matches_len >= opts.max_matches_per_file) {
|
||||
log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
|
||||
break;
|
||||
}
|
||||
|
|
@ -130,13 +129,12 @@ void search_buf(const char *buf, const size_t buf_len,
|
|||
log_debug("Regex match is of length zero. Advancing offset one byte.");
|
||||
}
|
||||
|
||||
realloc_matches(&matches, &matches_size, matches_len + matches_spare);
|
||||
realloc_matches(&sr, matches_spare);
|
||||
sr.matches[sr.matches_len].start = offset_vector[0] + line_to_buf;
|
||||
sr.matches[sr.matches_len].end = offset_vector[1] + line_to_buf;
|
||||
sr.matches_len++;
|
||||
|
||||
matches[matches_len].start = offset_vector[0] + line_to_buf;
|
||||
matches[matches_len].end = offset_vector[1] + line_to_buf;
|
||||
matches_len++;
|
||||
|
||||
if (opts.max_matches_per_file > 0 && matches_len >= opts.max_matches_per_file) {
|
||||
if (opts.max_matches_per_file > 0 && sr.matches_len >= opts.max_matches_per_file) {
|
||||
log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
|
||||
goto multiline_done;
|
||||
}
|
||||
|
|
@ -149,56 +147,26 @@ void search_buf(const char *buf, const size_t buf_len,
|
|||
multiline_done:
|
||||
|
||||
if (opts.invert_match) {
|
||||
matches_len = invert_matches(buf, buf_len, matches, matches_len);
|
||||
sr.matches_len = invert_matches(buf, buf_len, sr.matches, sr.matches_len);
|
||||
}
|
||||
|
||||
if (opts.stats) {
|
||||
pthread_mutex_lock(&stats_mtx);
|
||||
stats.total_bytes += buf_len;
|
||||
stats.total_files++;
|
||||
stats.total_matches += matches_len;
|
||||
if (matches_len > 0) {
|
||||
stats.total_matches += sr.matches_len;
|
||||
if (sr.matches_len > 0) {
|
||||
stats.total_file_matches++;
|
||||
}
|
||||
pthread_mutex_unlock(&stats_mtx);
|
||||
}
|
||||
|
||||
if (matches_len > 0) {
|
||||
if (binary == -1 && !opts.print_filename_only) {
|
||||
binary = is_binary((const void *)buf, buf_len);
|
||||
}
|
||||
pthread_mutex_lock(&print_mtx);
|
||||
if (opts.print_filename_only) {
|
||||
/* If the --files-without-matches or -L option is passed we should
|
||||
* not print a matching line. This option currently sets
|
||||
* opts.print_filename_only and opts.invert_match. Unfortunately
|
||||
* setting the latter has the side effect of making matches.len = 1
|
||||
* on a file-without-matches which is not desired behaviour. See
|
||||
* GitHub issue 206 for the consequences if this behaviour is not
|
||||
* checked. */
|
||||
if (!opts.invert_match || matches_len < 2) {
|
||||
if (opts.print_count) {
|
||||
print_path_count(dir_full_path, opts.path_sep, (size_t)matches_len);
|
||||
} else {
|
||||
print_path(dir_full_path, opts.path_sep);
|
||||
}
|
||||
}
|
||||
} else if (binary) {
|
||||
print_binary_file_matches(dir_full_path);
|
||||
} else {
|
||||
print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
|
||||
}
|
||||
pthread_mutex_unlock(&print_mtx);
|
||||
opts.match_found = 1;
|
||||
} else if (opts.search_stream && opts.passthrough) {
|
||||
fprintf(out_fd, "%s", buf);
|
||||
} else {
|
||||
log_debug("No match in %s", dir_full_path);
|
||||
}
|
||||
|
||||
if (matches_size > 0) {
|
||||
free(matches);
|
||||
}
|
||||
search_results_t *sr_ptr = ag_malloc(sizeof(search_results_t));
|
||||
sr_ptr->matches = sr.matches;
|
||||
sr_ptr->matches_len = sr.matches_len;
|
||||
sr_ptr->matches_size = sr.matches_size;
|
||||
sr_ptr->binary = sr.binary;
|
||||
return sr_ptr;
|
||||
}
|
||||
|
||||
/* TODO: this will only match single lines. multi-line regexes silently don't match */
|
||||
|
|
@ -207,13 +175,43 @@ void search_stream(FILE *stream, const char *path) {
|
|||
ssize_t line_len = 0;
|
||||
size_t line_cap = 0;
|
||||
size_t i;
|
||||
// search_results_t sr;
|
||||
char **context_lines = NULL;
|
||||
size_t context_lines_len = opts.before + opts.after;
|
||||
|
||||
if (context_lines_len) {
|
||||
context_lines = ag_calloc(sizeof(char *), context_lines_len + 1);
|
||||
}
|
||||
|
||||
for (i = 1; (line_len = getline(&line, &line_cap, stream)) > 0; i++) {
|
||||
opts.stream_line_num = i;
|
||||
search_buf(line, line_len, path);
|
||||
search_results_t *line_results = search_buf(line, line_len, path);
|
||||
if (context_lines_len == 0) {
|
||||
print_results(line, line_len, path, line_results);
|
||||
continue;
|
||||
}
|
||||
context_lines[i] = line;
|
||||
/* getline will malloc/realloc this */
|
||||
line = NULL;
|
||||
char *lines;
|
||||
size_t lines_len = 0;
|
||||
size_t j;
|
||||
for (j = 0; j < context_lines_len + 1; j++) {
|
||||
ag_asprintf(&(lines), "%s%s", lines, context_lines[j]);
|
||||
lines_len += strlen(context_lines[j]);
|
||||
}
|
||||
/* TODO: munge results */
|
||||
print_results(lines, lines_len, path, line_results);
|
||||
}
|
||||
|
||||
free(line);
|
||||
if (context_lines_len) {
|
||||
/* TODO */
|
||||
for (i = 0; i < context_lines_len + 1; i++) {
|
||||
free(context_lines[i]);
|
||||
}
|
||||
} else {
|
||||
free(line);
|
||||
}
|
||||
}
|
||||
|
||||
void search_file(const char *file_full_path) {
|
||||
|
|
@ -223,6 +221,7 @@ void search_file(const char *file_full_path) {
|
|||
struct stat statbuf;
|
||||
int rv = 0;
|
||||
FILE *fp = NULL;
|
||||
search_results_t *sr = NULL;
|
||||
|
||||
fd = open(file_full_path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
|
|
@ -307,13 +306,15 @@ void search_file(const char *file_full_path) {
|
|||
log_err("Cannot decompress zipped file %s", file_full_path);
|
||||
goto cleanup;
|
||||
}
|
||||
search_buf(_buf, _buf_len, file_full_path);
|
||||
sr = search_buf(_buf, _buf_len, file_full_path);
|
||||
print_results(_buf, _buf_len, file_full_path, sr);
|
||||
free(_buf);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
search_buf(buf, f_len, file_full_path);
|
||||
sr = search_buf(buf, f_len, file_full_path);
|
||||
print_results(buf, f_len, file_full_path, sr);
|
||||
|
||||
cleanup:
|
||||
|
||||
|
|
@ -324,6 +325,12 @@ cleanup:
|
|||
munmap(buf, f_len);
|
||||
#endif
|
||||
}
|
||||
if (sr != NULL) {
|
||||
if (sr->matches_size > 0) {
|
||||
free(sr->matches);
|
||||
}
|
||||
free(sr);
|
||||
}
|
||||
if (fd != -1) {
|
||||
close(fd);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@ work_queue_t *work_queue;
|
|||
work_queue_t *work_queue_tail;
|
||||
int done_adding_files;
|
||||
pthread_cond_t files_ready;
|
||||
pthread_mutex_t print_mtx;
|
||||
pthread_mutex_t stats_mtx;
|
||||
pthread_mutex_t work_queue_mtx;
|
||||
|
||||
|
|
@ -66,8 +65,8 @@ typedef struct {
|
|||
|
||||
symdir_t *symhash;
|
||||
|
||||
void search_buf(const char *buf, const size_t buf_len,
|
||||
const char *dir_full_path);
|
||||
search_results_t *search_buf(const char *buf, const size_t buf_len, const char *dir_full_path);
|
||||
|
||||
void search_stream(FILE *stream, const char *path);
|
||||
void search_file(const char *file_full_path);
|
||||
|
||||
|
|
|
|||
|
|
@ -259,13 +259,13 @@ size_t invert_matches(const char *buf, const size_t buf_len, match_t matches[],
|
|||
return inverted_match_count;
|
||||
}
|
||||
|
||||
void realloc_matches(match_t **matches, size_t *matches_size, size_t matches_len) {
|
||||
if (matches_len < *matches_size) {
|
||||
void realloc_matches(search_results_t *sr, size_t matches_spare) {
|
||||
if (sr->matches_len + matches_spare < sr->matches_size) {
|
||||
return;
|
||||
}
|
||||
/* TODO: benchmark initial size of matches. 100 may be too small/big */
|
||||
*matches_size = *matches ? *matches_size * 2 : 100;
|
||||
*matches = ag_realloc(*matches, *matches_size * sizeof(match_t));
|
||||
sr->matches_size = sr->matches ? sr->matches_size * 2 : 100;
|
||||
sr->matches = ag_realloc(sr->matches, sr->matches_size * sizeof(match_t));
|
||||
}
|
||||
|
||||
void compile_study(pcre **re, pcre_extra **re_extra, char *q, const int pcre_opts, const int study_opts) {
|
||||
|
|
|
|||
16
src/util.h
16
src/util.h
|
|
@ -28,11 +28,24 @@ void *ag_calloc(size_t nelem, size_t elsize);
|
|||
char *ag_strdup(const char *s);
|
||||
char *ag_strndup(const char *s, size_t size);
|
||||
|
||||
typedef enum {
|
||||
AG_BINARY_UNKNOWN,
|
||||
AG_BINARY_FALSE,
|
||||
AG_BINARY_TRUE
|
||||
} ag_binary_type;
|
||||
|
||||
typedef struct {
|
||||
size_t start; /* Byte at which the match starts */
|
||||
size_t end; /* and where it ends */
|
||||
} match_t;
|
||||
|
||||
typedef struct {
|
||||
match_t *matches;
|
||||
size_t matches_len;
|
||||
size_t matches_size;
|
||||
ag_binary_type binary;
|
||||
} search_results_t;
|
||||
|
||||
typedef struct {
|
||||
long total_bytes;
|
||||
long total_files;
|
||||
|
|
@ -42,7 +55,6 @@ typedef struct {
|
|||
struct timeval time_end;
|
||||
} ag_stats;
|
||||
|
||||
|
||||
ag_stats stats;
|
||||
|
||||
typedef const char *(*strncmp_fp)(const char *, const char *, const size_t, const size_t, const size_t[], const size_t *);
|
||||
|
|
@ -65,7 +77,7 @@ const char *boyer_moore_strncasestr(const char *s, const char *find, const size_
|
|||
strncmp_fp get_strstr(enum case_behavior opts);
|
||||
|
||||
size_t invert_matches(const char *buf, const size_t buf_len, match_t matches[], size_t matches_len);
|
||||
void realloc_matches(match_t **matches, size_t *matches_size, size_t matches_len);
|
||||
void realloc_matches(search_results_t *sr, size_t matches_spare);
|
||||
void compile_study(pcre **re, pcre_extra **re_extra, char *q, const int pcre_opts, const int study_opts);
|
||||
|
||||
|
||||
|
|
|
|||
34
tests/stream_context.t
Normal file
34
tests/stream_context.t
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
Setup:
|
||||
|
||||
$ . $TESTDIR/setup.sh
|
||||
$ unalias ag
|
||||
$ alias ag="$TESTDIR/../ag --nocolor --workers=1"
|
||||
|
||||
Print context when searching stdin:
|
||||
|
||||
$ echo "before1\nbefore2\nblah\nafter1\nafter2" | ag -C blah
|
||||
before1
|
||||
before2
|
||||
blah
|
||||
after1
|
||||
after2
|
||||
|
||||
$ echo "before1\nbefore2\nbefore3\nblah\nbetween1\nblah\nafter1" | ag -C blah
|
||||
before2
|
||||
before3
|
||||
blah
|
||||
between1
|
||||
blah
|
||||
after1
|
||||
|
||||
Print only a line before when searching stdin:
|
||||
|
||||
$ echo "before1\nbefore2\nblah\nafter1\nafter2" | ag -B1 blah
|
||||
before2
|
||||
blah
|
||||
|
||||
Print only a line after when searching stdin:
|
||||
|
||||
$ echo "before1\nbefore2\nblah\nafter1\nafter2" | ag -A1 blah
|
||||
blah
|
||||
after1
|
||||
Loading…
Reference in a new issue