diff options
| -rw-r--r-- | README.md | 12 | ||||
| -rw-r--r-- | main.c | 69 | ||||
| -rw-r--r--[-rwxr-xr-x] | tests.sh | 4 |
3 files changed, 71 insertions, 14 deletions
@@ -35,9 +35,12 @@ make all ## Usage ```bash -./crep <search_term> [path] +./crep [-c|--case-sensitive] [-l|--levenshtein <dist>] [-d|--depth <level>] <search_term> [path] ``` +- `-c, --case-sensitive`: Enable case-sensitive matching (default is case-insensitive). +- `-l, --levenshtein <dist>`: Enable fuzzy matching with a maximum Levenshtein distance of `<dist>`. +- `-d, --depth <level>`: Set the maximum recursion depth for directory traversal. - `<search_term>`: The string to search for within function/method names. - `[path]`: Optional. The directory or file to search (defaults to current directory). @@ -48,6 +51,7 @@ make all ### Examples Search for all functions containing "init" in the current directory: + ```bash ./crep init . ``` @@ -62,6 +66,12 @@ Run with debug logging enabled: DEBUG=1 ./crep init . ``` +Search for "main" allowing for 2 typos (e.g. "mian"): + +```bash +./crep -l 2 "mian" main.c +``` + ## How It Works `crep` works by: @@ -1,7 +1,3 @@ -// TODO: -// - Add Levenshtein distance for matching and expose distance as arg with -// something like -d5 which would allow distance of 5 on a match. - // FIXME: // - Truncate longer argument list. @@ -38,6 +34,30 @@ TSLanguage *tree_sitter_php(void); TSLanguage *tree_sitter_rust(void); TSLanguage *tree_sitter_javascript(void); +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +int levenshtein_distance(const char *s1, const char *s2) { + unsigned int len1 = strlen(s1); + unsigned int len2 = strlen(s2); + unsigned int distances[len1 + 1][len2 + 1]; + + for (unsigned int i = 0; i <= len1; i++) { + distances[i][0] = i; + } + for (unsigned int j = 0; j <= len2; j++) { + distances[0][j] = j; + } + + for (unsigned int i = 1; i <= len1; i++) { + for (unsigned int j = 1; j <= len2; j++) { + int cost = (s1[i - 1] == s2[j - 1]) ? 0 : 1; + distances[i][j] = MIN(MIN(distances[i - 1][j] + 1, distances[i][j - 1] + 1), distances[i - 1][j - 1] + cost); + } + } + + return distances[len1][len2]; +} + typedef struct { const char *fname; const char *ftype; @@ -91,6 +111,7 @@ struct ThreadArgs { uint32_t query_len; const char *cfname; int case_sensitive; + int max_distance; }; // void parse_source_file(const char *file_path, const char *source_code, @@ -103,6 +124,7 @@ void parse_source_file(void *arg) { TSLanguage *language = args->language; const char *cfname = args->cfname; int case_sensitive = args->case_sensitive; + int max_distance = args->max_distance; TSParser *parser = ts_parser_new(); ts_parser_set_language(parser, language); @@ -169,18 +191,33 @@ void parse_source_file(void *arg) { } // Substring matching. - // FIXME: Add Levenshtein distance. if (fn.fname != NULL) { - char *result; - if (case_sensitive) { - result = strstr(fn.fname, cfname); + char *result = NULL; + int distance = -1; + + if (max_distance > 0) { + distance = levenshtein_distance(fn.fname, cfname); + if (distance <= max_distance) { + // We treat it as a match, but result pointer logic is different + // For printing purposes effectively a match. + // We'll just set result to non-null to trigger the print. + result = (char *)fn.fname; + } } else { - result = strcasestr(fn.fname, cfname); + if (case_sensitive) { + result = strstr(fn.fname, cfname); + } else { + result = strcasestr(fn.fname, cfname); + } } if (result != NULL) { char *fparams_formatted = remove_newlines(fn.fparams); - printf("%s:%zu: %s %s %s\n", file_path, fn.lineno, fn.ftype ? fn.ftype : "", fn.fname, fparams_formatted ? fparams_formatted : ""); + if (max_distance > 0) { + printf("%s:%zu: %s %s %s (dist: %d)\n", file_path, fn.lineno, fn.ftype ? fn.ftype : "", fn.fname, fparams_formatted ? fparams_formatted : "", distance); + } else { + printf("%s:%zu: %s %s %s\n", file_path, fn.lineno, fn.ftype ? fn.ftype : "", fn.fname, fparams_formatted ? fparams_formatted : ""); + } free(fparams_formatted); } } @@ -211,29 +248,34 @@ const char *get_file_extension(const char *file_path) { int main(int argc, char *argv[]) { int case_sensitive = 0; + int max_distance = 0; int max_depth = -1; int opt; struct option long_options[] = { {"case-sensitive", no_argument, 0, 'c'}, + {"levenshtein", required_argument, 0, 'l'}, {"depth", required_argument, 0, 'd'}, {0, 0, 0, 0}}; - while ((opt = getopt_long(argc, argv, "cd:", long_options, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "cl:d:", long_options, NULL)) != -1) { switch (opt) { case 'c': case_sensitive = 1; break; + case 'l': + max_distance = atoi(optarg); + break; case 'd': max_depth = atoi(optarg); break; default: - fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]); + fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-l|--levenshtein <dist>] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]); return 1; } } if (optind >= argc) { - fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]); + fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-l|--levenshtein <dist>] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]); return 1; } @@ -317,6 +359,7 @@ int main(int argc, char *argv[]) { thread_args->query_len = query_len; thread_args->cfname = cfname; thread_args->case_sensitive = case_sensitive; + thread_args->max_distance = max_distance; tp_add_job(pool, (thread_func_t)parse_source_file, thread_args); } else { @@ -105,6 +105,10 @@ run_test_with_flags "Case Sensitive -c" "-c" "foobar" "tests/test.c" "void fooba run_test_with_flags "Depth 0 (root)" "-d 0" "level" "tests/depth_test" "void level0" run_test_with_flags "Depth 1 (recursive)" "-d 1" "level" "tests/depth_test" "void level1" +# Levenshtein Distance Tests +run_test_with_flags "Levenshtein -l 1 match" "-l 1" "heelo" "$TEST_DIR/test.c" "void hello ()" +run_test_with_flags "Levenshtein -l 2 match" "-l 2" "heloo" "$TEST_DIR/test.c" "void hello ()" + echo "----------------" if [ $failed -eq 0 ]; then echo "All tests passed!" |
