diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:22:16 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:22:16 +0100 |
| commit | c7ab12bba64d9c20ccd79b132dac475f7bc3923e (patch) | |
| tree | abf2891f9bd1bfa549ed460b288e2c19348bc230 | |
| parent | 2b3d92e401f0065e440b51da9a6532695b37ef84 (diff) | |
| download | crep-c7ab12bba64d9c20ccd79b132dac475f7bc3923e.tar.gz | |
Re-enable multi-threading
| -rw-r--r-- | .clang-format | 4 | ||||
| -rw-r--r-- | .editorconfig | 23 | ||||
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | file.c | 81 | ||||
| -rw-r--r-- | file.h | 6 | ||||
| -rw-r--r-- | list.c | 98 | ||||
| -rw-r--r-- | list.h | 12 | ||||
| -rw-r--r-- | main.c | 392 | ||||
| -rw-r--r-- | tpool.c | 141 | ||||
| -rw-r--r-- | tpool.h | 21 |
10 files changed, 472 insertions, 308 deletions
diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..de3693a --- /dev/null +++ b/.clang-format @@ -0,0 +1,4 @@ +UseTab: Always +IndentWidth: 4 +TabWidth: 4 +ColumnLimit: 0 diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index 364f1f3..0000000 --- a/.editorconfig +++ /dev/null @@ -1,23 +0,0 @@ -root = true - -[*] -charset = utf-8 -trim_trailing_whitespace = true -insert_final_newline = true -end_of_line = lf - -[Makefile] -indent_style = tab -indent_size = 4 - -[*.c] -indent_style = space -indent_size = 2 - -[*.{css,html,js,django}] -indent_style = space -indent_size = 2 - -[*.go] -indent_style = tab -indent_size = 4 @@ -1,5 +1,5 @@ TARGET = crep -SOURCES = $(wildcard *.c) +SOURCES = $(wildcard *.c *.h) TS_ALIBS = $(shell find vendor -name "*.a" -print) VENDOR_DIRS = $(wildcard vendor/*) CFLAGS = $(EXTRA_FLAGS) -Wall -Wextra -std=gnu99 -pedantic -ggdb -O3 @@ -3,42 +3,47 @@ #include "file.h" -struct FileContent read_entire_file(const char* file_path) { - struct FileContent file_data; - file_data.content = NULL; - file_data.count = 0; - - FILE* file = fopen(file_path, "rb"); - if (file == NULL) { - perror("Error opening file"); - return file_data; - } - - fseek(file, 0, SEEK_END); - long file_size = ftell(file); - fseek(file, 0, SEEK_SET); - - if (file_size == -1) { - perror("Error getting file size"); - return file_data; - } - - file_data.content = (const char*)malloc(file_size); - if (file_data.content == NULL) { - perror("Error allocating memory"); - return file_data; - } - - size_t bytes_read = fread((void*)file_data.content, 1, file_size, file); - if (bytes_read != (size_t)file_size) { - perror("Error reading file"); - free((void*)file_data.content); - file_data.content = NULL; - return file_data; - } - - file_data.count = bytes_read; - - fclose(file); - return file_data; +struct FileContent read_entire_file(const char *file_path) { + struct FileContent file_data; + file_data.content = NULL; + file_data.count = 0; + + FILE *file = fopen(file_path, "rb"); + if (file == NULL) { + perror("Error opening file"); + return file_data; + } + + fseek(file, 0, SEEK_END); + long raw_size = ftell(file); + fseek(file, 0, SEEK_SET); + + if (raw_size == -1) { + perror("Error getting file size"); + fclose(file); + return file_data; + } + + size_t file_size = (size_t)raw_size; + char *content = (char *)malloc(file_size + 1); + if (content == NULL) { + perror("Error allocating memory"); + fclose(file); + return file_data; + } + + size_t bytes_read = fread(content, 1, file_size, file); + if (bytes_read != file_size) { + perror("Error reading file"); + free(content); + fclose(file); + return file_data; + } + + content[file_size] = '\0'; + file_data.content = content; + file_data.count = file_size; + + fclose(file); + return file_data; } @@ -4,10 +4,10 @@ #include <stdio.h> struct FileContent { - const char* content; - size_t count; + const char *content; + size_t count; }; -struct FileContent read_entire_file(const char* file_path); +struct FileContent read_entire_file(const char *file_path); #endif @@ -6,60 +6,72 @@ #include "list.h" -void add_file_path(Node** head, char* file_path) { - Node* new = (Node*)malloc(sizeof(Node)); - new->file_path = strdup(file_path); - new->next = *head; - *head = new; +void add_file_path(Node **head, char *file_path) { + Node *new = (Node *)malloc(sizeof(Node)); + if (new == NULL) { + perror("malloc"); + exit(EXIT_FAILURE); + } + new->file_path = strdup(file_path); + if (new->file_path == NULL) { + perror("strdup"); + free(new); + exit(EXIT_FAILURE); + } + new->next = *head; + *head = new; } -void list_files_recursively(char* base_path, Node** head) { - char path[1000]; - struct dirent* dp; - DIR* dir = opendir(base_path); +void list_files_recursively(char *base_path, Node **head) { + char path[2048]; + struct dirent *dp; + DIR *dir = opendir(base_path); - if (!dir) - return; + if (!dir) + return; - while ((dp = readdir(dir)) != NULL) { - if (strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0) { - strcpy(path, base_path); - strcat(path, "/"); - strcat(path, dp->d_name); + while ((dp = readdir(dir)) != NULL) { + if (strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0) { + int ret = snprintf(path, sizeof(path), "%s%s%s", base_path, (base_path[strlen(base_path) - 1] == '/' ? "" : "/"), dp->d_name); - struct stat statbuf; - if (stat(path, &statbuf) != -1) { - if (S_ISDIR(statbuf.st_mode)) { - list_files_recursively(path, head); - } else { - add_file_path(head, path); - } - } - } - } + if (ret >= (int)sizeof(path)) { + fprintf(stderr, "Path too long: %s/%s\n", base_path, dp->d_name); + continue; + } - closedir(dir); + struct stat statbuf; + if (stat(path, &statbuf) != -1) { + if (S_ISDIR(statbuf.st_mode)) { + list_files_recursively(path, head); + } else { + add_file_path(head, path); + } + } + } + } + + closedir(dir); } -void free_file_list(Node* head) { - Node* tmp; +void free_file_list(Node *head) { + Node *tmp; - while (head != NULL) { - tmp = head; - head = head->next; - free(tmp->file_path); - free(tmp); - } + while (head != NULL) { + tmp = head; + head = head->next; + free(tmp->file_path); + free(tmp); + } } -int size_of_file_list(Node* head) { - int count = 0; +int size_of_file_list(Node *head) { + int count = 0; - Node* current = head; - while (current != NULL) { - count++; - current = current->next; - } + Node *current = head; + while (current != NULL) { + count++; + current = current->next; + } - return count; + return count; } @@ -2,13 +2,13 @@ #define LIST_H typedef struct node { - char* file_path; - struct node* next; + char *file_path; + struct node *next; } Node; -void add_file_path(Node** head, char* file_path); -void list_files_recursively(char* base_path, Node** head); -void free_file_list(Node* head); -int size_of_file_list(Node* head); +void add_file_path(Node **head, char *file_path); +void list_files_recursively(char *base_path, Node **head); +void free_file_list(Node *head); +int size_of_file_list(Node *head); #endif @@ -19,216 +19,220 @@ #include "file.h" #include "list.h" +#include "tpool.h" -#define DEBUG 1 +#define DEBUG 0 typedef struct { - const char* fname; - const char* ftype; - const char* fparams; - size_t lineno; + const char *fname; + const char *ftype; + const char *fparams; + size_t lineno; } Function; -const char* extract_value(TSNode captured_node, const char* source_code) { - size_t start = ts_node_start_byte(captured_node); - size_t end = ts_node_end_byte(captured_node); - size_t length = end - start; - char* buffer = malloc(length + 1); // +1 for the null terminator - - if (buffer != NULL) { - snprintf(buffer, length + 1, "%.*s", (int)length, &source_code[start]); - return buffer; - } - - return NULL; +const char *extract_value(TSNode captured_node, const char *source_code) { + size_t start = ts_node_start_byte(captured_node); + size_t end = ts_node_end_byte(captured_node); + size_t length = end - start; + char *buffer = malloc(length + 1); // +1 for the null terminator + + if (buffer != NULL) { + snprintf(buffer, length + 1, "%.*s", (int)length, &source_code[start]); + return buffer; + } else { + perror("malloc"); + exit(EXIT_FAILURE); + } + + return NULL; } -char* remove_newlines(const char* str) { - size_t length = strlen(str); - char* result = (char*)malloc(length + 1); // +1 for the null terminator - if (result == NULL) { - fprintf(stderr, "Memory allocation failed\n"); - exit(1); - } - - size_t j = 0; - for (size_t i = 0; i < length; i++) { - if (str[i] != '\n') { - result[j++] = str[i]; - } - } - - result[j] = '\0'; - return result; +char *remove_newlines(const char *str) { + if (str == NULL) + return NULL; + size_t length = strlen(str); + char *result = (char *)malloc(length + 1); // +1 for the null terminator + if (result == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + size_t j = 0; + for (size_t i = 0; i < length; i++) { + if (str[i] != '\n') { + result[j++] = str[i]; + } + } + + result[j] = '\0'; + return result; } struct ThreadArgs { - const char* file_path; - const char* source_code; - TSLanguage* language; - const char* cfname; + const char *file_path; + const char *source_code; + TSLanguage *language; + const char *cfname; }; // void parse_source_file(const char *file_path, const char *source_code, // TSLanguage *language, const char *cfname) { -void* parse_source_file(void* arg) { - struct ThreadArgs* args = (struct ThreadArgs*)arg; - - const char* file_path = args->file_path; - const char* source_code = args->source_code; - TSLanguage* language = args->language; - const char* cfname = args->cfname; - - TSParser* parser = ts_parser_new(); - ts_parser_set_language(parser, language); - - TSTree* tree = - ts_parser_parse_string(parser, NULL, source_code, strlen(source_code)); - TSNode root_node = ts_tree_root_node(tree); - - const char* query_string = - "(function_definition type: (primitive_type) @ftype declarator: " - "(function_declarator declarator: (identifier) @fname parameters: " - "(parameter_list) @fparams))"; - - uint32_t error_offset; - TSQueryError error_type; - TSQuery* query = ts_query_new(language, query_string, strlen(query_string), - &error_offset, &error_type); - - TSQueryCursor* query_cursor = ts_query_cursor_new(); - ts_query_cursor_exec(query_cursor, query, root_node); - - if (query != NULL) { - TSQueryMatch match; - while (ts_query_cursor_next_match(query_cursor, &match)) { - Function fn = {0}; - - for (unsigned i = 0; i < match.capture_count; i++) { - TSQueryCapture capture = match.captures[i]; - TSNode captured_node = capture.node; - - uint32_t capture_name_length; - const char* capture_name = ts_query_capture_name_for_id( - query, capture.index, &capture_name_length); - - if (strcmp(capture_name, "fname") == 0) { - fn.fname = extract_value(captured_node, source_code); - - TSPoint start_point = ts_node_start_point(captured_node); - fn.lineno = start_point.row; - } - - if (strcmp(capture_name, "ftype") == 0) { - fn.ftype = extract_value(captured_node, source_code); - } - - if (strcmp(capture_name, "fparams") == 0) { - fn.fparams = extract_value(captured_node, source_code); - } - } - - // Substring matching. - // FIXME: Add Levenshtein distance. - char* result = strstr(fn.fname, cfname); - if (result != NULL) { - char* fparams_formatted = remove_newlines(fn.fparams); - printf("%s:%zu:\t%s %s %s\n", file_path, fn.lineno, fn.ftype, fn.fname, - fparams_formatted); - } - } - } else { - if (DEBUG) { - printf("Query creation failed at offset %u with error type %d\n", - error_offset, error_type); - } - } - - ts_query_cursor_delete(query_cursor); - ts_query_delete(query); - ts_tree_delete(tree); - ts_parser_delete(parser); - - return NULL; +void parse_source_file(void *arg) { + struct ThreadArgs *args = (struct ThreadArgs *)arg; + + const char *file_path = args->file_path; + const char *source_code = args->source_code; + TSLanguage *language = args->language; + const char *cfname = args->cfname; + + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, language); + + TSTree *tree = + ts_parser_parse_string(parser, NULL, source_code, strlen(source_code)); + TSNode root_node = ts_tree_root_node(tree); + + const char *query_string = + "(function_definition type: (_) @ftype declarator: (function_declarator declarator: (identifier) @fname parameters: (parameter_list) @fparams))" + "(function_definition type: (_) @ftype declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @fname parameters: (parameter_list) @fparams)))" + "(declaration type: (_) @ftype declarator: (function_declarator declarator: (identifier) @fname parameters: (parameter_list) @fparams))" + "(declaration type: (_) @ftype declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @fname parameters: (parameter_list) @fparams)))"; + + uint32_t error_offset; + TSQueryError error_type; + TSQuery *query = ts_query_new(language, query_string, strlen(query_string), &error_offset, &error_type); + + TSQueryCursor *query_cursor = ts_query_cursor_new(); + ts_query_cursor_exec(query_cursor, query, root_node); + + if (query != NULL) { + TSQueryMatch match; + while (ts_query_cursor_next_match(query_cursor, &match)) { + Function fn = {0}; + + for (unsigned i = 0; i < match.capture_count; i++) { + TSQueryCapture capture = match.captures[i]; + TSNode captured_node = capture.node; + + uint32_t capture_name_length; + const char *capture_name = ts_query_capture_name_for_id( + query, capture.index, &capture_name_length); + + if (strcmp(capture_name, "fname") == 0) { + fn.fname = extract_value(captured_node, source_code); + + TSPoint start_point = ts_node_start_point(captured_node); + fn.lineno = start_point.row; + } + + if (strcmp(capture_name, "ftype") == 0) { + fn.ftype = extract_value(captured_node, source_code); + } + + if (strcmp(capture_name, "fparams") == 0) { + fn.fparams = extract_value(captured_node, source_code); + } + } + + // Substring matching. + // FIXME: Add Levenshtein distance. + if (fn.fname != NULL) { + char *result = strstr(fn.fname, cfname); + if (result != NULL) { + char *fparams_formatted = remove_newlines(fn.fparams); + printf("%s:%zu:\t%s %s %s\n", file_path, fn.lineno, fn.ftype, fn.fname, fparams_formatted); + free(fparams_formatted); + } + } + + // Free captured values + free((void *)fn.fname); + free((void *)fn.ftype); + free((void *)fn.fparams); + } + } else { + if (DEBUG) { + printf("Query creation failed at offset %u with error type %d\n", error_offset, error_type); + } + } + + ts_query_cursor_delete(query_cursor); + ts_query_delete(query); + ts_tree_delete(tree); + ts_parser_delete(parser); + + // Cleanup thread arguments + free((void *)source_code); + free(args); } -const char* get_file_extension(const char* file_path) { - const char* extension = strrchr(file_path, '.'); - if (extension != NULL) { - return extension + 1; - } - return NULL; +const char *get_file_extension(const char *file_path) { + const char *extension = strrchr(file_path, '.'); + if (extension != NULL) { + return extension + 1; + } + return NULL; } -int main(int argc, char* argv[]) { - if (argc < 3) { - printf("Usage: %s <search term> <directory>\n", argv[0]); - return 1; - } - - char* cfname = argv[1]; - char* directory = argv[2]; - - TSLanguage* tree_sitter_c(void); - TSLanguage* tree_sitter_python(void); - - Node* head = NULL; - list_files_recursively(directory, &head); - int list_size = size_of_file_list(head); - /* pthread_t threads[list_size]; */ - - if (DEBUG) { - printf("Scanning %d files\n", list_size); - } - - Node* current = head; - // int thread_index = 0; - while (current != NULL) { - const char* file_path = current->file_path; - const char* extension = get_file_extension(file_path); - struct FileContent source_file = read_entire_file(file_path); - - if (source_file.content != NULL) { - if (extension != NULL) { - if (strcmp(extension, "c") == 0 || strcmp(extension, "h") == 0) { - /* parse_source_file(file_path, source_file.content, tree_sitter_c(), - * cfname); */ - - struct ThreadArgs thread_args; - thread_args.file_path = file_path; - thread_args.source_code = source_file.content; - thread_args.language = tree_sitter_c(); - thread_args.cfname = cfname; - - parse_source_file(&thread_args); - - /* printf("> creating thread #%d\n", thread_index); */ - /* if (pthread_create(&threads[thread_index], NULL, parse_source_file, - * &thread_args) != 0) { */ - /* fprintf(stderr, "Error creating thread %d\n", thread_index); */ - /* return 1; */ - /* } */ - } - } - free((void*)source_file.content); - } else { - if (DEBUG) { - fprintf(stderr, "Failed to read file.\n"); - } - } - current = current->next; - // thread_index++; - } - - // Collecting threads. - /* for (int i = 0; i < list_size; i++) { */ - /* printf("> collecting thread #%d\n", thread_index); */ - /* if (pthread_join(threads[i], NULL) != 0) { */ - /* fprintf(stderr, "Error joining thread %d\n", i); */ - /* return 1; */ - /* } */ - /* } */ - - free_file_list(head); - return 0; +int main(int argc, char *argv[]) { + if (argc < 3) { + printf("Usage: %s <search term> <directory>\n", argv[0]); + return 1; + } + + const char *cfname = argv[1]; + char *directory = argv[2]; + + TSLanguage *tree_sitter_c(void); + TSLanguage *tree_sitter_python(void); + + Node *head = NULL; + list_files_recursively(directory, &head); + int list_size = size_of_file_list(head); + + if (DEBUG) { + printf("Scanning %d files\n", list_size); + } + + ThreadPool *pool = tp_create(8); + if (!pool) { + perror("Failed to create thread pool"); + return 1; + } + + Node *current = head; + while (current != NULL) { + const char *file_path = current->file_path; + const char *extension = get_file_extension(file_path); + + if (extension != NULL && (strcmp(extension, "c") == 0 || strcmp(extension, "h") == 0)) { + struct FileContent source_file = read_entire_file(file_path); + if (source_file.content != NULL) { + struct ThreadArgs *thread_args = malloc(sizeof(struct ThreadArgs)); + if (!thread_args) { + perror("Failed to allocate thread args"); + free((void *)source_file.content); + continue; + } + + thread_args->file_path = file_path; + thread_args->source_code = source_file.content; + thread_args->language = tree_sitter_c(); + thread_args->cfname = cfname; + + tp_add_job(pool, (thread_func_t)parse_source_file, thread_args); + } else { + if (DEBUG) { + fprintf(stderr, "Failed to read file: %s\n", file_path); + } + } + } + + current = current->next; + } + + tp_wait(pool); + tp_destroy(pool); + free_file_list(head); + return 0; } @@ -0,0 +1,141 @@ +#include "tpool.h" +#include <stdio.h> +#include <stdlib.h> + +typedef struct ThreadPoolJobNode { + ThreadPoolJob job; + struct ThreadPoolJobNode *next; +} ThreadPoolJobNode; + +struct ThreadPool { + pthread_mutex_t lock; + pthread_cond_t notify; + pthread_cond_t working_cond; + + pthread_t *threads; + int num_threads; + + ThreadPoolJobNode *queue_head; + ThreadPoolJobNode *queue_tail; + + int active_jobs; // Jobs currently running + int queued_jobs; // Jobs waiting in queue + bool stop; +}; + +static void *tp_worker(void *arg) { + ThreadPool *pool = (ThreadPool *)arg; + + while (1) { + pthread_mutex_lock(&pool->lock); + + while (pool->queue_head == NULL && !pool->stop) { + pthread_cond_wait(&pool->notify, &pool->lock); + } + + if (pool->stop && pool->queue_head == NULL) { + pthread_mutex_unlock(&pool->lock); + break; + } + + ThreadPoolJobNode *node = pool->queue_head; + pool->queue_head = node->next; + if (pool->queue_head == NULL) { + pool->queue_tail = NULL; + } + + pool->queued_jobs--; + pool->active_jobs++; + + pthread_mutex_unlock(&pool->lock); + + // Execute job + if (node->job.function) { + node->job.function(node->job.arg); + } + free(node); + + pthread_mutex_lock(&pool->lock); + pool->active_jobs--; + if (pool->active_jobs == 0 && pool->queue_head == NULL) { + pthread_cond_signal(&pool->working_cond); + } + pthread_mutex_unlock(&pool->lock); + } + + return NULL; +} + +ThreadPool *tp_create(int num_threads) { + ThreadPool *pool = (ThreadPool *)malloc(sizeof(ThreadPool)); + if (pool == NULL) + return NULL; + + pool->num_threads = num_threads; + pool->queue_head = NULL; + pool->queue_tail = NULL; + pool->active_jobs = 0; + pool->queued_jobs = 0; + pool->stop = false; + + pthread_mutex_init(&pool->lock, NULL); + pthread_cond_init(&pool->notify, NULL); + pthread_cond_init(&pool->working_cond, NULL); + + pool->threads = (pthread_t *)malloc(sizeof(pthread_t) * num_threads); + for (int i = 0; i < num_threads; i++) { + pthread_create(&pool->threads[i], NULL, tp_worker, pool); + } + + return pool; +} + +void tp_add_job(ThreadPool *pool, thread_func_t function, void *arg) { + ThreadPoolJobNode *node = (ThreadPoolJobNode *)malloc(sizeof(ThreadPoolJobNode)); + if (node == NULL) { + perror("malloc"); + exit(EXIT_FAILURE); + } + node->job.function = function; + node->job.arg = arg; + node->next = NULL; + + pthread_mutex_lock(&pool->lock); + + if (pool->queue_tail) { + pool->queue_tail->next = node; + } else { + pool->queue_head = node; + } + pool->queue_tail = node; + + pool->queued_jobs++; + pthread_cond_signal(&pool->notify); + + pthread_mutex_unlock(&pool->lock); +} + +void tp_wait(ThreadPool *pool) { + pthread_mutex_lock(&pool->lock); + while (pool->active_jobs > 0 || pool->queue_head != NULL) { + pthread_cond_wait(&pool->working_cond, &pool->lock); + } + pthread_mutex_unlock(&pool->lock); +} + +void tp_destroy(ThreadPool *pool) { + pthread_mutex_lock(&pool->lock); + pool->stop = true; + pthread_cond_broadcast(&pool->notify); + pthread_mutex_unlock(&pool->lock); + + for (int i = 0; i < pool->num_threads; i++) { + pthread_join(pool->threads[i], NULL); + } + + free(pool->threads); + pthread_mutex_destroy(&pool->lock); + pthread_cond_destroy(&pool->notify); + pthread_cond_destroy(&pool->working_cond); + free(pool); +} @@ -0,0 +1,21 @@ +#ifndef THREAD_POOL_H +#define THREAD_POOL_H + +#include <pthread.h> +#include <stdbool.h> + +typedef void (*thread_func_t)(void *arg); + +typedef struct { + thread_func_t function; + void *arg; +} ThreadPoolJob; + +typedef struct ThreadPool ThreadPool; + +ThreadPool *tp_create(int num_threads); +void tp_add_job(ThreadPool *pool, thread_func_t function, void *arg); +void tp_wait(ThreadPool *pool); +void tp_destroy(ThreadPool *pool); + +#endif |
