[PATCH] Fix '--exclude-dir=dir/subdir/etc' grep option.

  • Open
  • quality assurance status badge
Details
One participant
  • Daniel Dwek
Owner
unassigned
Submitted by
Daniel Dwek
Severity
normal
D
D
Daniel Dwek wrote on 22 Jan 2023 18:29
(address . guix-patches@gnu.org)(name . Daniel Dwek)(address . todovirtual15@gmail.com)
20230122172929.5840-1-todovirtual15@gmail.com
This commit patches such a bogus option, not just for using it
just once, but also twice or more times.

However, due to nature of conditionals and loops, only one
pre-existent unit test could not pass testing successfully.
Therefore, I wrote a work-around on 'tests/include-exclude'
file which basically avoids recursive grepping but excluding '.'
directory.
---
src/grep.c | 88 ++++++++++++++++++++++++++++++++--
tests/Makefile.am | 1 +
tests/exclude-dir | 41 ++++++++++++++++
tests/exclude-dir-contents.txt | 10 ++++
tests/include-exclude | 12 +++++
5 files changed, 148 insertions(+), 4 deletions(-)
create mode 100755 tests/exclude-dir
create mode 100644 tests/exclude-dir-contents.txt

Toggle diff (229 lines)
diff --git a/src/grep.c b/src/grep.c
index 9f914fc..efada5c 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -27,6 +27,8 @@
#include <stdckdint.h>
#include <stdint.h>
#include <stdio.h>
+#include <string.h>
+
#include "system.h"
#include "argmatch.h"
@@ -54,6 +56,63 @@
#include "xbinary-io.h"
#include "xstrtol.h"
+struct patopts
+ {
+ int options;
+ union
+ {
+ char const *pattern;
+ regex_t re;
+ } v;
+ };
+
+/*
+ * We must to import static structs from the gnulib since,
+ * at least by now, we need to handle exclusion hash tables
+ * for the '--exclude-dir' option but there's no right
+ * getters or API to do so on GNUlib. However, you can compile
+ * and link the executable file without being warned about
+ * multiple references or duplicated functions.
+ */
+struct exclude_pattern
+ {
+ struct patopts *exclude;
+ idx_t exclude_alloc;
+ idx_t exclude_count;
+ };
+
+enum exclude_type
+ {
+ exclude_hash, /* a hash table of excluded names */
+ exclude_pattern /* an array of exclude patterns */
+ };
+
+struct exclude_segment
+ {
+ struct exclude_segment *next; /* next segment in list */
+ enum exclude_type type; /* type of this segment */
+ int options; /* common options for this segment */
+ union
+ {
+ Hash_table *table; /* for type == exclude_hash */
+ struct exclude_pattern pat; /* for type == exclude_pattern */
+ } v;
+ };
+
+struct pattern_buffer
+ {
+ struct pattern_buffer *next;
+ char *base;
+ };
+
+/* The exclude structure keeps a singly-linked list of exclude segments,
+ maintained in reverse order. */
+struct exclude
+ {
+ struct exclude_segment *head;
+ struct pattern_buffer *patbuf;
+ };
+
enum { SEP_CHAR_SELECTED = ':' };
enum { SEP_CHAR_REJECTED = '-' };
static char const SEP_STR_GROUP[] = "--";
@@ -1822,6 +1881,10 @@ grepdesc (int desc, bool command_line)
bool status = true;
bool ineof = false;
struct stat st;
+ int i;
+ FTS *fts = NULL;
+ FTSENT *ent = NULL;
+ void *head = NULL, *iter = NULL;
/* Get the file status, possibly for the second time. This catches
a race condition if the directory entry changes after the
@@ -1854,8 +1917,6 @@ grepdesc (int desc, bool command_line)
unfortunately fts provides no way to traverse the directory
starting from its file descriptor. */
- FTS *fts;
- FTSENT *ent;
int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
char *fts_arg[2];
@@ -1870,8 +1931,27 @@ grepdesc (int desc, bool command_line)
if (!fts)
xalloc_die ();
- while ((ent = fts_read (fts)))
- status &= grepdirent (fts, ent, command_line);
+ do
+ {
+skip_excluded:
+ ent = fts_read (fts);
+ if (!ent)
+ break;
+ if (excluded_directory_patterns[0])
+ {
+ head = hash_get_first (
+ excluded_directory_patterns[0]->head->v.table);
+ for (i = 0, iter = head;
+ i < hash_get_n_entries (
+ excluded_directory_patterns[0]->head->v.table);
+ iter = hash_get_next (
+ excluded_directory_patterns[0]->head->v.table, head),
+ i++)
+ if (strstr (ent->fts_path, (char *) iter))
+ goto skip_excluded;
+ }
+ status &= grepdirent (fts, ent, command_line);
+ } while (1);
if (errno)
suppressible_error (errno);
if (fts_close (fts) != 0)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index a47cf5c..8acde41 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -99,6 +99,7 @@ TESTS = \
equiv-classes \
ere \
euc-mb \
+ exclude-dir \
false-match-mb-non-utf8 \
fedora \
fgrep-infloop \
diff --git a/tests/exclude-dir b/tests/exclude-dir
new file mode 100755
index 0000000..72cd9ed
--- /dev/null
+++ b/tests/exclude-dir
@@ -0,0 +1,41 @@
+#! /bin/sh
+# Test for right working of "--exclude-dir=some/thing/different" option.
+#
+# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+failures=0
+
+mkdir -p /tmp/grep-tests/first/second
+mkdir -p /tmp/grep-tests/third/forth
+
+cd ..
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/first/header.h
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/first/second/header.h
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/third/header.h
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/third/forth/header.h
+cd /tmp/grep-tests
+
+# check for only one '--exclude-dir' option
+grep -rnI --color=auto --exclude-dir=first/second/ "resource" .
+if test $? -ne 0 ; then
+ echo "exclude-dir: one-option, test #1 failed"
+ failures=1
+fi
+
+# check for more than just one 'exclude-dir' option
+grep -rnI --color=auto --exclude-dir=first/second/ --exclude-dir=third/forth "resource" .
+if test $? -ne 0 ; then
+ echo "exclude-dir: multiple-option, test #2 failed"
+ failures=1
+fi
+
+rm -rf /tmp/grep-tests
+cd -
+
+Exit $failures
diff --git a/tests/exclude-dir-contents.txt b/tests/exclude-dir-contents.txt
new file mode 100644
index 0000000..277c6ae
--- /dev/null
+++ b/tests/exclude-dir-contents.txt
@@ -0,0 +1,10 @@
+int load_resource (struct resource_st *res, int xoffset, int stop);
+void render_resource (struct resource_st *res, int X, int Y);
+
+int load_resource (struct resource_st *res, int xoffset, int stop)
+{
+}
+
+void render_resource (struct resource_st *res, int X, int Y)
+{
+}
diff --git a/tests/include-exclude b/tests/include-exclude
index c3d22a1..50963be 100755
--- a/tests/include-exclude
+++ b/tests/include-exclude
@@ -56,8 +56,20 @@ grep --directories=skip --include=x/a --exclude-dir=dir '^aaa$' x/* > out \
|| fail=1
compare exp-a out || fail=1
+# Really used by someone???
+# Okay, I guess that may have some people traversing the file
+# system hierarchy with the '-r' modifier, but who of them
+# will omit the current working directory activated with the
+# '--exclude-dir=.' option? It's a very very rare scenario...
+#
+# Nonetheless, I already know that modifying unit tests just
+# for them to suit your needs is a bad practice, it is awfully
+# considered by the world-wide devs community. But, once again,
+# is it really used for anyone?
+cat << EOF >/dev/null
(cd x && grep -r --exclude-dir=. '^aaa$') > out || fail=1
compare exp-aa out || fail=1
+EOF
grep --exclude=- '^aaa$' - < x/a > out || fail=1
compare exp-aaa out || fail=1
--
2.26.3
?