Add Levenshtein distance

Author Mitja Felicijan <mitja.felicijan@gmail.com> 2026-01-22 02:35:46 +0100
Committer Mitja Felicijan <mitja.felicijan@gmail.com> 2026-01-22 02:35:46 +0100
Commit 5ed59329be62c0b3a59fb47a89fd8c00a74bed5d (patch)
-rw-r--r-- README.md 12
-rw-r--r-- main.c 69
-rw-r--r-- tests.sh 4
3 files changed, 71 insertions, 14 deletions
diff --git a/README.md b/README.md
...
35
## Usage
35
## Usage
36
  
36
  
37
```bash
37
```bash
38
./crep <search_term> [path]
38
./crep [-c|--case-sensitive] [-l|--levenshtein <dist>] [-d|--depth <level>] <search_term> [path]
39
```
39
```
40
  
40
  
  
41
- `-c, --case-sensitive`: Enable case-sensitive matching (default is case-insensitive).
  
42
- `-l, --levenshtein <dist>`: Enable fuzzy matching with a maximum Levenshtein distance of `<dist>`.
  
43
- `-d, --depth <level>`: Set the maximum recursion depth for directory traversal.
41
- `<search_term>`: The string to search for within function/method names.
44
- `<search_term>`: The string to search for within function/method names.
42
- `[path]`: Optional. The directory or file to search (defaults to current directory).
45
- `[path]`: Optional. The directory or file to search (defaults to current directory).
43
  
46
  
...
48
### Examples
51
### Examples
49
  
52
  
50
Search for all functions containing "init" in the current directory:
53
Search for all functions containing "init" in the current directory:
  
54
  
51
```bash
55
```bash
52
./crep init .
56
./crep init .
53
```
57
```
...
60
Run with debug logging enabled:
64
Run with debug logging enabled:
61
```bash
65
```bash
62
DEBUG=1 ./crep init .
66
DEBUG=1 ./crep init .
  
67
```
  
68
  
  
69
Search for "main" allowing for 2 typos (e.g. "mian"):
  
70
  
  
71
```bash
  
72
./crep -l 2 "mian" main.c
63
```
73
```
64
  
74
  
65
## How It Works
75
## How It Works
...
diff --git a/main.c b/main.c
1
// TODO:
  
2
//  - Add Levenshtein distance for matching and expose distance as arg with
  
3
//    something like -d5 which would allow distance of 5 on a match.
  
4
  
  
5
// FIXME:
1
// FIXME:
6
//  - Truncate longer argument list.
2
//  - Truncate longer argument list.
7
  
3
  
...
38
TSLanguage *tree_sitter_rust(void);
34
TSLanguage *tree_sitter_rust(void);
39
TSLanguage *tree_sitter_javascript(void);
35
TSLanguage *tree_sitter_javascript(void);
40
  
36
  
  
37
#define MIN(a, b) ((a) < (b) ? (a) : (b))
  
38
  
  
39
int levenshtein_distance(const char *s1, const char *s2) {
  
40
	unsigned int len1 = strlen(s1);
  
41
	unsigned int len2 = strlen(s2);
  
42
	unsigned int distances[len1 + 1][len2 + 1];
  
43
  
  
44
	for (unsigned int i = 0; i <= len1; i++) {
  
45
		distances[i][0] = i;
  
46
	}
  
47
	for (unsigned int j = 0; j <= len2; j++) {
  
48
		distances[0][j] = j;
  
49
	}
  
50
  
  
51
	for (unsigned int i = 1; i <= len1; i++) {
  
52
		for (unsigned int j = 1; j <= len2; j++) {
  
53
			int cost = (s1[i - 1] == s2[j - 1]) ? 0 : 1;
  
54
			distances[i][j] = MIN(MIN(distances[i - 1][j] + 1, distances[i][j - 1] + 1), distances[i - 1][j - 1] + cost);
  
55
		}
  
56
	}
  
57
  
  
58
	return distances[len1][len2];
  
59
}
  
60
  
41
typedef struct {
61
typedef struct {
42
	const char *fname;
62
	const char *fname;
43
	const char *ftype;
63
	const char *ftype;
...
91
	uint32_t query_len;
111
	uint32_t query_len;
92
	const char *cfname;
112
	const char *cfname;
93
	int case_sensitive;
113
	int case_sensitive;
  
114
	int max_distance;
94
};
115
};
95
  
116
  
96
// void parse_source_file(const char *file_path, const char *source_code,
117
// void parse_source_file(const char *file_path, const char *source_code,
...
103
	TSLanguage *language = args->language;
124
	TSLanguage *language = args->language;
104
	const char *cfname = args->cfname;
125
	const char *cfname = args->cfname;
105
	int case_sensitive = args->case_sensitive;
126
	int case_sensitive = args->case_sensitive;
  
127
	int max_distance = args->max_distance;
106
  
128
  
107
	TSParser *parser = ts_parser_new();
129
	TSParser *parser = ts_parser_new();
108
	ts_parser_set_language(parser, language);
130
	ts_parser_set_language(parser, language);
...
169
		}
191
		}
170
  
192
  
171
		// Substring matching.
193
		// Substring matching.
172
		// FIXME: Add Levenshtein distance.
  
173
		if (fn.fname != NULL) {
194
		if (fn.fname != NULL) {
174
			char *result;
195
			char *result = NULL;
175
			if (case_sensitive) {
196
			int distance = -1;
176
				result = strstr(fn.fname, cfname);
197
  
  
198
			if (max_distance > 0) {
  
199
				distance = levenshtein_distance(fn.fname, cfname);
  
200
				if (distance <= max_distance) {
  
201
					// We treat it as a match, but result pointer logic is different
  
202
					// For printing purposes effectively a match.
  
203
					// We'll just set result to non-null to trigger the print.
  
204
					result = (char *)fn.fname; 
  
205
				}
177
			} else {
206
			} else {
178
				result = strcasestr(fn.fname, cfname);
207
				if (case_sensitive) {
  
208
					result = strstr(fn.fname, cfname);
  
209
				} else {
  
210
					result = strcasestr(fn.fname, cfname);
  
211
				}
179
			}
212
			}
180
  
213
  
181
			if (result != NULL) {
214
			if (result != NULL) {
182
				char *fparams_formatted = remove_newlines(fn.fparams);
215
				char *fparams_formatted = remove_newlines(fn.fparams);
183
				printf("%s:%zu: %s %s %s\n", file_path, fn.lineno, fn.ftype ? fn.ftype : "", fn.fname, fparams_formatted ? fparams_formatted : "");
216
				if (max_distance > 0) {
  
217
					printf("%s:%zu: %s %s %s (dist: %d)\n", file_path, fn.lineno, fn.ftype ? fn.ftype : "", fn.fname, fparams_formatted ? fparams_formatted : "", distance);
  
218
				} else {
  
219
					printf("%s:%zu: %s %s %s\n", file_path, fn.lineno, fn.ftype ? fn.ftype : "", fn.fname, fparams_formatted ? fparams_formatted : "");
  
220
				}
184
				free(fparams_formatted);
221
				free(fparams_formatted);
185
			}
222
			}
186
		}
223
		}
...
211
  
248
  
212
int main(int argc, char *argv[]) {
249
int main(int argc, char *argv[]) {
213
	int case_sensitive = 0;
250
	int case_sensitive = 0;
  
251
	int max_distance = 0;
214
	int max_depth = -1;
252
	int max_depth = -1;
215
	int opt;
253
	int opt;
216
	struct option long_options[] = {
254
	struct option long_options[] = {
217
		{"case-sensitive", no_argument, 0, 'c'},
255
		{"case-sensitive", no_argument, 0, 'c'},
  
256
		{"levenshtein", required_argument, 0, 'l'},
218
		{"depth", required_argument, 0, 'd'},
257
		{"depth", required_argument, 0, 'd'},
219
		{0, 0, 0, 0}};
258
		{0, 0, 0, 0}};
220
  
259
  
221
	while ((opt = getopt_long(argc, argv, "cd:", long_options, NULL)) != -1) {
260
	while ((opt = getopt_long(argc, argv, "cl:d:", long_options, NULL)) != -1) {
222
		switch (opt) {
261
		switch (opt) {
223
		case 'c':
262
		case 'c':
224
			case_sensitive = 1;
263
			case_sensitive = 1;
  
264
			break;
  
265
		case 'l':
  
266
			max_distance = atoi(optarg);
225
			break;
267
			break;
226
		case 'd':
268
		case 'd':
227
			max_depth = atoi(optarg);
269
			max_depth = atoi(optarg);
228
			break;
270
			break;
229
		default:
271
		default:
230
			fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]);
272
			fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-l|--levenshtein <dist>] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]);
231
			return 1;
273
			return 1;
232
		}
274
		}
233
	}
275
	}
234
  
276
  
235
	if (optind >= argc) {
277
	if (optind >= argc) {
236
		fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]);
278
		fprintf(stderr, "Usage: %s [-c|--case-sensitive] [-l|--levenshtein <dist>] [-d|--depth <level>] <search term> [directory|file]\n", argv[0]);
237
		return 1;
279
		return 1;
238
	}
280
	}
239
  
281
  
...
317
				thread_args->query_len = query_len;
359
				thread_args->query_len = query_len;
318
				thread_args->cfname = cfname;
360
				thread_args->cfname = cfname;
319
				thread_args->case_sensitive = case_sensitive;
361
				thread_args->case_sensitive = case_sensitive;
  
362
				thread_args->max_distance = max_distance;
320
  
363
  
321
				tp_add_job(pool, (thread_func_t)parse_source_file, thread_args);
364
				tp_add_job(pool, (thread_func_t)parse_source_file, thread_args);
322
			} else {
365
			} else {
...
diff --git a/tests.sh b/tests.sh
...
105
run_test_with_flags "Depth 0 (root)" "-d 0" "level" "tests/depth_test" "void level0"
105
run_test_with_flags "Depth 0 (root)" "-d 0" "level" "tests/depth_test" "void level0"
106
run_test_with_flags "Depth 1 (recursive)" "-d 1" "level" "tests/depth_test" "void level1"
106
run_test_with_flags "Depth 1 (recursive)" "-d 1" "level" "tests/depth_test" "void level1"
107
  
107
  
  
108
# Levenshtein Distance Tests
  
109
run_test_with_flags "Levenshtein -l 1 match" "-l 1" "heelo" "$TEST_DIR/test.c" "void hello ()"
  
110
run_test_with_flags "Levenshtein -l 2 match" "-l 2" "heloo" "$TEST_DIR/test.c" "void hello ()"
  
111
  
108
echo "----------------"
112
echo "----------------"
109
if [ $failed -eq 0 ]; then
113
if [ $failed -eq 0 ]; then
110
    echo "All tests passed!"
114
    echo "All tests passed!"
...