summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-01-22 02:35:46 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-01-22 02:35:46 +0100
commit5ed59329be62c0b3a59fb47a89fd8c00a74bed5d (patch)
treee3aea8013c7ef1aa54d1beb36f0ea7d10004241d
parent069301a603ad07338179bafed0c35b6fe3b372f2 (diff)
downloadcrep-5ed59329be62c0b3a59fb47a89fd8c00a74bed5d.tar.gz
Add Levenshtein distance
-rw-r--r--README.md12
-rw-r--r--main.c69
-rw-r--r--[-rwxr-xr-x]tests.sh4
3 files changed, 71 insertions, 14 deletions
diff --git a/README.md b/README.md
index 6401b69..c001382 100644
--- a/README.md
+++ b/README.md
@@ -35,9 +35,12 @@ make all
## Usage
```bash
-./crep <search_term> [path]
+./crep [-c|--case-sensitive] [-l|--levenshtein <dist>] [-d|--depth <level>] <search_term> [path]
```
+- `-c, --case-sensitive`: Enable case-sensitive matching (default is case-insensitive).
+- `-l, --levenshtein <dist>`: Enable fuzzy matching with a maximum Levenshtein distance of `<dist>`.
+- `-d, --depth <level>`: Set the maximum recursion depth for directory traversal.
- `<search_term>`: The string to search for within function/method names.
- `[path]`: Optional. The directory or file to search (defaults to current directory).
@@ -48,6 +51,7 @@ make all
### Examples
Search for all functions containing "init" in the current directory:
+
```bash
./crep init .
```
@@ -62,6 +66,12 @@ Run with debug logging enabled:
DEBUG=1 ./crep init .
```
+Search for "main" allowing for 2 typos (e.g. "mian"):
+
+```bash
+./crep -l 2 "mian" main.c
+```
+
## How It Works
`crep` works by:
diff --git a/main.c b/main.c
index 49fe5a3..5c0fae6 100644
--- a/main.c
+++ b/main.c
@@ -1,7 +1,3 @@
-// TODO:
-// - Add Levenshtein distance for matching and expose distance as arg with
-// something like -d5 which would allow distance of 5 on a match.
-
// FIXME:
// - Truncate longer argument list.
@@ -38,6 +34,30 @@ TSLanguage *tree_sitter_php(void);
TSLanguage *tree_sitter_rust(void);
TSLanguage *tree_sitter_javascript(void);
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+int levenshtein_distance(const char *s1, const char *s2) {
+ unsigned int len1 = strlen(s1);
+ unsigned int len2 = strlen(s2);
+ unsigned int distances[len1 + 1][len2 + 1];
+
+ for (unsigned int i = 0; i <= len1; i++) {
+ distances[i][0] = i;
+ }
+ for (unsigned int j = 0; j <= len2; j++) {
+ distances[0][j] = j;
+ }
+
+ for (unsigned int i = 1; i <= len1; i++) {
+ for (unsigned int j = 1; j <= len2; j++) {
+ int cost = (s1[i - 1] == s2[j - 1]) ? 0 : 1;
+ distances[i][j] = MIN(MIN(distances[i - 1][j] + 1, distances[i][j - 1] + 1), distances[i - 1][j - 1] + cost);
+ }
+ }
+
+ return distances[len1][len2];
+}
+
typedef struct {
const char *fname;
const char *ftype;
@@ -91,6 +111,7 @@ struct ThreadArgs {
uint32_t query_len;
const char *cfname;
int case_sensitive;
+ int max_distance;
};
// void parse_source_file(const char *file_path, const char *source_code,
@@ -103,6 +124,7 @@ void parse_source_file(void *arg) {
TSLanguage *language = args->language;
const char *cfname = args->cfname;
int case_sensitive = args->case_sensitive;
+ int max_distance = args->max_distance;
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, language);
@@ -169,18 +191,33 @@ void parse_source_file(void *arg) {
}
// Substring matching.
- // FIXME: Add Levenshtein distance.
if (fn.fname != NULL) {
- char *result;
- if (case_sensitive) {
- result = strstr(fn.fname, cfname);
+ char *result = NULL;
+ int distance = -1;
+
+ if (max_distance > 0) {
+ distance = levenshtein_distance(fn.fname, cfname);
+ if (distance <= max_distance) {
+ // We treat it as a match, but result pointer logic is different
+ // For printing purposes effectively a match.
+ // We'll just set result to non-null to trigger the print.
+ result = (char *)fn.fname;
+ }
} else {
- result = strcasestr(fn.fname, cfname);
+ if (case_sensitive) {
+ result = strstr(fn.fname, cfname);
+ } else {
+ result = strcasestr(fn.fname, cfname);
+ }
}
if (result != NULL) {
char *fparams_formatted = remove_newlines(fn.fparams);
- printf("%s:%zu: %s %s %s\n", file_path, fn.lineno, fn.ftype ? fn.ftype : "", fn.fname, fparams_formatted ? fparams_formatted : "");
+ if (max_distance > 0) {
+ printf("%s:%zu: %s %s %s (dist: %d)\n", file_path, fn.lineno, fn.ftype ? fn.ftype : "", fn.fname, fparams_formatted ? fparams_formatted : "", distance);
+ } else {
+ printf("%s:%zu: %s %s %s\n", file_path, fn.lineno, fn.ftype ? fn.ftype : "", fn.fname, fparams_formatted ? fparams_formatted : "");
+ }
free(fparams_formatted);
}
}
@@ -211,29 +248,34 @@ const char *get_file_extension(const char *file_path) {
int main(int argc, char *argv[]) {
int case_sensitive = 0;
+ int max_distance = 0;
int max_depth = -1;
int opt;
struct option long_options[] = {
{"case-sensitive", no_argument, 0, 'c'},
+ {"levenshtein", required_argument, 0, 'l'},
{"depth", required_argument, 0, 'd'},
{0, 0, 0, 0}};
- while ((opt = getopt_long(argc, argv, "cd:", long_options, NULL)) != -1) {
+ while ((opt = getopt_long(argc, argv, "cl:d:", long_options, NULL)) != -1) {
switch (opt) {
case 'c':
case_sensitive = 1;
break;
+ case 'l':
+ max_distance = atoi(optarg);
+ break;
case 'd':
max_depth = atoi(optarg);
break;
default:
- fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]);
+ fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-l|--levenshtein <dist>] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]);
return 1;
}
}
if (optind >= argc) {
- fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]);
+ fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-l|--levenshtein <dist>] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]);
return 1;
}
@@ -317,6 +359,7 @@ int main(int argc, char *argv[]) {
thread_args->query_len = query_len;
thread_args->cfname = cfname;
thread_args->case_sensitive = case_sensitive;
+ thread_args->max_distance = max_distance;
tp_add_job(pool, (thread_func_t)parse_source_file, thread_args);
} else {
diff --git a/tests.sh b/tests.sh
index f191e87..9ed8e9e 100755..100644
--- a/tests.sh
+++ b/tests.sh
@@ -105,6 +105,10 @@ run_test_with_flags "Case Sensitive -c" "-c" "foobar" "tests/test.c" "void fooba
run_test_with_flags "Depth 0 (root)" "-d 0" "level" "tests/depth_test" "void level0"
run_test_with_flags "Depth 1 (recursive)" "-d 1" "level" "tests/depth_test" "void level1"
+# Levenshtein Distance Tests
+run_test_with_flags "Levenshtein -l 1 match" "-l 1" "heelo" "$TEST_DIR/test.c" "void hello ()"
+run_test_with_flags "Levenshtein -l 2 match" "-l 2" "heloo" "$TEST_DIR/test.c" "void hello ()"
+
echo "----------------"
if [ $failed -eq 0 ]; then
echo "All tests passed!"