diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..de3693a989da3de2e37492af4c3f1bec886f3675 --- /dev/null +++ b/.clang-format @@ -0,0 +1,4 @@ +UseTab: Always +IndentWidth: 4 +TabWidth: 4 +ColumnLimit: 0 diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index 364f1f378780de23016ff4eb65143e6284901474..0000000000000000000000000000000000000000 --- a/.editorconfig +++ /dev/null @@ -1,23 +0,0 @@ -root = true - -[*] -charset = utf-8 -trim_trailing_whitespace = true -insert_final_newline = true -end_of_line = lf - -[Makefile] -indent_style = tab -indent_size = 4 - -[*.c] -indent_style = space -indent_size = 2 - -[*.{css,html,js,django}] -indent_style = space -indent_size = 2 - -[*.go] -indent_style = tab -indent_size = 4 diff --git a/Makefile b/Makefile index 10c5f5ee4fa58b8f60fc407d4ec6bf2eb829d3f1..54dcd92c682dc4c400c78d01e765888f7fc0528a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ TARGET = crep -SOURCES = $(wildcard *.c) +SOURCES = $(wildcard *.c *.h) TS_ALIBS = $(shell find vendor -name "*.a" -print) VENDOR_DIRS = $(wildcard vendor/*) CFLAGS = $(EXTRA_FLAGS) -Wall -Wextra -std=gnu99 -pedantic -ggdb -O3 diff --git a/file.c b/file.c index 1909946212a832e2efbe2acde8498b855498aa4f..2c04ff6017ad09575937097c12ceb0f962dd6b37 100644 --- a/file.c +++ b/file.c @@ -3,42 +3,47 @@ #include #include "file.h" -struct FileContent read_entire_file(const char* file_path) { - struct FileContent file_data; - file_data.content = NULL; - file_data.count = 0; +struct FileContent read_entire_file(const char *file_path) { + struct FileContent file_data; + file_data.content = NULL; + file_data.count = 0; - FILE* file = fopen(file_path, "rb"); - if (file == NULL) { - perror("Error opening file"); - return file_data; - } + FILE *file = fopen(file_path, "rb"); + if (file == NULL) { + perror("Error opening file"); + return file_data; + } - fseek(file, 0, SEEK_END); - long file_size = ftell(file); - fseek(file, 0, SEEK_SET); + fseek(file, 0, SEEK_END); + long raw_size = ftell(file); + fseek(file, 0, SEEK_SET); - if (file_size == -1) { - perror("Error getting file size"); - return file_data; - } + if (raw_size == -1) { + perror("Error getting file size"); + fclose(file); + return file_data; + } - file_data.content = (const char*)malloc(file_size); - if (file_data.content == NULL) { - perror("Error allocating memory"); - return file_data; - } + size_t file_size = (size_t)raw_size; + char *content = (char *)malloc(file_size + 1); + if (content == NULL) { + perror("Error allocating memory"); + fclose(file); + return file_data; + } - size_t bytes_read = fread((void*)file_data.content, 1, file_size, file); - if (bytes_read != (size_t)file_size) { - perror("Error reading file"); - free((void*)file_data.content); - file_data.content = NULL; - return file_data; - } + size_t bytes_read = fread(content, 1, file_size, file); + if (bytes_read != file_size) { + perror("Error reading file"); + free(content); + fclose(file); + return file_data; + } - file_data.count = bytes_read; + content[file_size] = '\0'; + file_data.content = content; + file_data.count = file_size; - fclose(file); - return file_data; + fclose(file); + return file_data; } diff --git a/file.h b/file.h index 97843b2de69549a7bca5059a3d9a2a8d9483d04b..f923ce04a90e02a82b37f01c26818da0f9d82eae 100644 --- a/file.h +++ b/file.h @@ -4,10 +4,10 @@ #include struct FileContent { - const char* content; - size_t count; + const char *content; + size_t count; }; -struct FileContent read_entire_file(const char* file_path); +struct FileContent read_entire_file(const char *file_path); #endif diff --git a/list.c b/list.c index f958ea25d71f93260f7b62824a5eab1d6fe0ae3b..8c5b478303df65b412df0868929f2d78b9b6f277 100644 --- a/list.c +++ b/list.c @@ -6,60 +6,72 @@ #include #include "list.h" -void add_file_path(Node** head, char* file_path) { - Node* new = (Node*)malloc(sizeof(Node)); - new->file_path = strdup(file_path); - new->next = *head; - *head = new; +void add_file_path(Node **head, char *file_path) { + Node *new = (Node *)malloc(sizeof(Node)); + if (new == NULL) { + perror("malloc"); + exit(EXIT_FAILURE); + } + new->file_path = strdup(file_path); + if (new->file_path == NULL) { + perror("strdup"); + free(new); + exit(EXIT_FAILURE); + } + new->next = *head; + *head = new; } -void list_files_recursively(char* base_path, Node** head) { - char path[1000]; - struct dirent* dp; - DIR* dir = opendir(base_path); +void list_files_recursively(char *base_path, Node **head) { + char path[2048]; + struct dirent *dp; + DIR *dir = opendir(base_path); - if (!dir) - return; + if (!dir) + return; - while ((dp = readdir(dir)) != NULL) { - if (strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0) { - strcpy(path, base_path); - strcat(path, "/"); - strcat(path, dp->d_name); + while ((dp = readdir(dir)) != NULL) { + if (strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0) { + int ret = snprintf(path, sizeof(path), "%s%s%s", base_path, (base_path[strlen(base_path) - 1] == '/' ? "" : "/"), dp->d_name); - struct stat statbuf; - if (stat(path, &statbuf) != -1) { - if (S_ISDIR(statbuf.st_mode)) { - list_files_recursively(path, head); - } else { - add_file_path(head, path); - } - } - } - } + if (ret >= (int)sizeof(path)) { + fprintf(stderr, "Path too long: %s/%s\n", base_path, dp->d_name); + continue; + } - closedir(dir); + struct stat statbuf; + if (stat(path, &statbuf) != -1) { + if (S_ISDIR(statbuf.st_mode)) { + list_files_recursively(path, head); + } else { + add_file_path(head, path); + } + } + } + } + + closedir(dir); } -void free_file_list(Node* head) { - Node* tmp; +void free_file_list(Node *head) { + Node *tmp; - while (head != NULL) { - tmp = head; - head = head->next; - free(tmp->file_path); - free(tmp); - } + while (head != NULL) { + tmp = head; + head = head->next; + free(tmp->file_path); + free(tmp); + } } -int size_of_file_list(Node* head) { - int count = 0; +int size_of_file_list(Node *head) { + int count = 0; - Node* current = head; - while (current != NULL) { - count++; - current = current->next; - } + Node *current = head; + while (current != NULL) { + count++; + current = current->next; + } - return count; + return count; } diff --git a/list.h b/list.h index 24705e14e51309376f782424b4f93504f02afcbe..c958836baa9c2be576a317a4633f668a650122a9 100644 --- a/list.h +++ b/list.h @@ -2,13 +2,13 @@ #ifndef LIST_H #define LIST_H typedef struct node { - char* file_path; - struct node* next; + char *file_path; + struct node *next; } Node; -void add_file_path(Node** head, char* file_path); -void list_files_recursively(char* base_path, Node** head); -void free_file_list(Node* head); -int size_of_file_list(Node* head); +void add_file_path(Node **head, char *file_path); +void list_files_recursively(char *base_path, Node **head); +void free_file_list(Node *head); +int size_of_file_list(Node *head); #endif diff --git a/main.c b/main.c index 0a6b2315fcc10270ed33023a8413bdb27a979d86..f4eaa8e5c1e3ec69cb29e8a18a7af24ab1003a81 100644 --- a/main.c +++ b/main.c @@ -19,216 +19,220 @@ #include #include "file.h" #include "list.h" +#include "tpool.h" -#define DEBUG 1 +#define DEBUG 0 typedef struct { - const char* fname; - const char* ftype; - const char* fparams; - size_t lineno; + const char *fname; + const char *ftype; + const char *fparams; + size_t lineno; } Function; -const char* extract_value(TSNode captured_node, const char* source_code) { - size_t start = ts_node_start_byte(captured_node); - size_t end = ts_node_end_byte(captured_node); - size_t length = end - start; - char* buffer = malloc(length + 1); // +1 for the null terminator +const char *extract_value(TSNode captured_node, const char *source_code) { + size_t start = ts_node_start_byte(captured_node); + size_t end = ts_node_end_byte(captured_node); + size_t length = end - start; + char *buffer = malloc(length + 1); // +1 for the null terminator - if (buffer != NULL) { - snprintf(buffer, length + 1, "%.*s", (int)length, &source_code[start]); - return buffer; - } + if (buffer != NULL) { + snprintf(buffer, length + 1, "%.*s", (int)length, &source_code[start]); + return buffer; + } else { + perror("malloc"); + exit(EXIT_FAILURE); + } - return NULL; + return NULL; } -char* remove_newlines(const char* str) { - size_t length = strlen(str); - char* result = (char*)malloc(length + 1); // +1 for the null terminator - if (result == NULL) { - fprintf(stderr, "Memory allocation failed\n"); - exit(1); - } +char *remove_newlines(const char *str) { + if (str == NULL) + return NULL; + size_t length = strlen(str); + char *result = (char *)malloc(length + 1); // +1 for the null terminator + if (result == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } - size_t j = 0; - for (size_t i = 0; i < length; i++) { - if (str[i] != '\n') { - result[j++] = str[i]; - } - } + size_t j = 0; + for (size_t i = 0; i < length; i++) { + if (str[i] != '\n') { + result[j++] = str[i]; + } + } - result[j] = '\0'; - return result; + result[j] = '\0'; + return result; } struct ThreadArgs { - const char* file_path; - const char* source_code; - TSLanguage* language; - const char* cfname; + const char *file_path; + const char *source_code; + TSLanguage *language; + const char *cfname; }; // void parse_source_file(const char *file_path, const char *source_code, // TSLanguage *language, const char *cfname) { -void* parse_source_file(void* arg) { - struct ThreadArgs* args = (struct ThreadArgs*)arg; +void parse_source_file(void *arg) { + struct ThreadArgs *args = (struct ThreadArgs *)arg; - const char* file_path = args->file_path; - const char* source_code = args->source_code; - TSLanguage* language = args->language; - const char* cfname = args->cfname; + const char *file_path = args->file_path; + const char *source_code = args->source_code; + TSLanguage *language = args->language; + const char *cfname = args->cfname; - TSParser* parser = ts_parser_new(); - ts_parser_set_language(parser, language); + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, language); - TSTree* tree = - ts_parser_parse_string(parser, NULL, source_code, strlen(source_code)); - TSNode root_node = ts_tree_root_node(tree); + TSTree *tree = + ts_parser_parse_string(parser, NULL, source_code, strlen(source_code)); + TSNode root_node = ts_tree_root_node(tree); - const char* query_string = - "(function_definition type: (primitive_type) @ftype declarator: " - "(function_declarator declarator: (identifier) @fname parameters: " - "(parameter_list) @fparams))"; + const char *query_string = + "(function_definition type: (_) @ftype declarator: (function_declarator declarator: (identifier) @fname parameters: (parameter_list) @fparams))" + "(function_definition type: (_) @ftype declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @fname parameters: (parameter_list) @fparams)))" + "(declaration type: (_) @ftype declarator: (function_declarator declarator: (identifier) @fname parameters: (parameter_list) @fparams))" + "(declaration type: (_) @ftype declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @fname parameters: (parameter_list) @fparams)))"; - uint32_t error_offset; - TSQueryError error_type; - TSQuery* query = ts_query_new(language, query_string, strlen(query_string), - &error_offset, &error_type); + uint32_t error_offset; + TSQueryError error_type; + TSQuery *query = ts_query_new(language, query_string, strlen(query_string), &error_offset, &error_type); - TSQueryCursor* query_cursor = ts_query_cursor_new(); - ts_query_cursor_exec(query_cursor, query, root_node); + TSQueryCursor *query_cursor = ts_query_cursor_new(); + ts_query_cursor_exec(query_cursor, query, root_node); - if (query != NULL) { - TSQueryMatch match; - while (ts_query_cursor_next_match(query_cursor, &match)) { - Function fn = {0}; + if (query != NULL) { + TSQueryMatch match; + while (ts_query_cursor_next_match(query_cursor, &match)) { + Function fn = {0}; - for (unsigned i = 0; i < match.capture_count; i++) { - TSQueryCapture capture = match.captures[i]; - TSNode captured_node = capture.node; + for (unsigned i = 0; i < match.capture_count; i++) { + TSQueryCapture capture = match.captures[i]; + TSNode captured_node = capture.node; - uint32_t capture_name_length; - const char* capture_name = ts_query_capture_name_for_id( - query, capture.index, &capture_name_length); + uint32_t capture_name_length; + const char *capture_name = ts_query_capture_name_for_id( + query, capture.index, &capture_name_length); - if (strcmp(capture_name, "fname") == 0) { - fn.fname = extract_value(captured_node, source_code); + if (strcmp(capture_name, "fname") == 0) { + fn.fname = extract_value(captured_node, source_code); - TSPoint start_point = ts_node_start_point(captured_node); - fn.lineno = start_point.row; - } + TSPoint start_point = ts_node_start_point(captured_node); + fn.lineno = start_point.row; + } - if (strcmp(capture_name, "ftype") == 0) { - fn.ftype = extract_value(captured_node, source_code); - } + if (strcmp(capture_name, "ftype") == 0) { + fn.ftype = extract_value(captured_node, source_code); + } - if (strcmp(capture_name, "fparams") == 0) { - fn.fparams = extract_value(captured_node, source_code); - } - } + if (strcmp(capture_name, "fparams") == 0) { + fn.fparams = extract_value(captured_node, source_code); + } + } - // Substring matching. - // FIXME: Add Levenshtein distance. - char* result = strstr(fn.fname, cfname); - if (result != NULL) { - char* fparams_formatted = remove_newlines(fn.fparams); - printf("%s:%zu:\t%s %s %s\n", file_path, fn.lineno, fn.ftype, fn.fname, - fparams_formatted); - } - } - } else { - if (DEBUG) { - printf("Query creation failed at offset %u with error type %d\n", - error_offset, error_type); - } - } + // Substring matching. + // FIXME: Add Levenshtein distance. + if (fn.fname != NULL) { + char *result = strstr(fn.fname, cfname); + if (result != NULL) { + char *fparams_formatted = remove_newlines(fn.fparams); + printf("%s:%zu:\t%s %s %s\n", file_path, fn.lineno, fn.ftype, fn.fname, fparams_formatted); + free(fparams_formatted); + } + } - ts_query_cursor_delete(query_cursor); - ts_query_delete(query); - ts_tree_delete(tree); - ts_parser_delete(parser); + // Free captured values + free((void *)fn.fname); + free((void *)fn.ftype); + free((void *)fn.fparams); + } + } else { + if (DEBUG) { + printf("Query creation failed at offset %u with error type %d\n", error_offset, error_type); + } + } + + ts_query_cursor_delete(query_cursor); + ts_query_delete(query); + ts_tree_delete(tree); + ts_parser_delete(parser); - return NULL; + // Cleanup thread arguments + free((void *)source_code); + free(args); } -const char* get_file_extension(const char* file_path) { - const char* extension = strrchr(file_path, '.'); - if (extension != NULL) { - return extension + 1; - } - return NULL; +const char *get_file_extension(const char *file_path) { + const char *extension = strrchr(file_path, '.'); + if (extension != NULL) { + return extension + 1; + } + return NULL; } -int main(int argc, char* argv[]) { - if (argc < 3) { - printf("Usage: %s \n", argv[0]); - return 1; - } +int main(int argc, char *argv[]) { + if (argc < 3) { + printf("Usage: %s \n", argv[0]); + return 1; + } - char* cfname = argv[1]; - char* directory = argv[2]; + const char *cfname = argv[1]; + char *directory = argv[2]; - TSLanguage* tree_sitter_c(void); - TSLanguage* tree_sitter_python(void); + TSLanguage *tree_sitter_c(void); + TSLanguage *tree_sitter_python(void); - Node* head = NULL; - list_files_recursively(directory, &head); - int list_size = size_of_file_list(head); - /* pthread_t threads[list_size]; */ + Node *head = NULL; + list_files_recursively(directory, &head); + int list_size = size_of_file_list(head); - if (DEBUG) { - printf("Scanning %d files\n", list_size); - } + if (DEBUG) { + printf("Scanning %d files\n", list_size); + } - Node* current = head; - // int thread_index = 0; - while (current != NULL) { - const char* file_path = current->file_path; - const char* extension = get_file_extension(file_path); - struct FileContent source_file = read_entire_file(file_path); + ThreadPool *pool = tp_create(8); + if (!pool) { + perror("Failed to create thread pool"); + return 1; + } - if (source_file.content != NULL) { - if (extension != NULL) { - if (strcmp(extension, "c") == 0 || strcmp(extension, "h") == 0) { - /* parse_source_file(file_path, source_file.content, tree_sitter_c(), - * cfname); */ + Node *current = head; + while (current != NULL) { + const char *file_path = current->file_path; + const char *extension = get_file_extension(file_path); - struct ThreadArgs thread_args; - thread_args.file_path = file_path; - thread_args.source_code = source_file.content; - thread_args.language = tree_sitter_c(); - thread_args.cfname = cfname; + if (extension != NULL && (strcmp(extension, "c") == 0 || strcmp(extension, "h") == 0)) { + struct FileContent source_file = read_entire_file(file_path); + if (source_file.content != NULL) { + struct ThreadArgs *thread_args = malloc(sizeof(struct ThreadArgs)); + if (!thread_args) { + perror("Failed to allocate thread args"); + free((void *)source_file.content); + continue; + } - parse_source_file(&thread_args); + thread_args->file_path = file_path; + thread_args->source_code = source_file.content; + thread_args->language = tree_sitter_c(); + thread_args->cfname = cfname; - /* printf("> creating thread #%d\n", thread_index); */ - /* if (pthread_create(&threads[thread_index], NULL, parse_source_file, - * &thread_args) != 0) { */ - /* fprintf(stderr, "Error creating thread %d\n", thread_index); */ - /* return 1; */ - /* } */ - } - } - free((void*)source_file.content); - } else { - if (DEBUG) { - fprintf(stderr, "Failed to read file.\n"); - } - } - current = current->next; - // thread_index++; - } + tp_add_job(pool, (thread_func_t)parse_source_file, thread_args); + } else { + if (DEBUG) { + fprintf(stderr, "Failed to read file: %s\n", file_path); + } + } + } - // Collecting threads. - /* for (int i = 0; i < list_size; i++) { */ - /* printf("> collecting thread #%d\n", thread_index); */ - /* if (pthread_join(threads[i], NULL) != 0) { */ - /* fprintf(stderr, "Error joining thread %d\n", i); */ - /* return 1; */ - /* } */ - /* } */ + current = current->next; + } - free_file_list(head); - return 0; + tp_wait(pool); + tp_destroy(pool); + free_file_list(head); + return 0; } diff --git a/tpool.c b/tpool.c new file mode 100644 index 0000000000000000000000000000000000000000..382a9c744ea5c12927ca806ea0f4a4c077d86c15 --- /dev/null +++ b/tpool.c @@ -0,0 +1,141 @@ +#include "tpool.h" +#include +#include + +typedef struct ThreadPoolJobNode { + ThreadPoolJob job; + struct ThreadPoolJobNode *next; +} ThreadPoolJobNode; + +struct ThreadPool { + pthread_mutex_t lock; + pthread_cond_t notify; + pthread_cond_t working_cond; + + pthread_t *threads; + int num_threads; + + ThreadPoolJobNode *queue_head; + ThreadPoolJobNode *queue_tail; + + int active_jobs; // Jobs currently running + int queued_jobs; // Jobs waiting in queue + bool stop; +}; + +static void *tp_worker(void *arg) { + ThreadPool *pool = (ThreadPool *)arg; + + while (1) { + pthread_mutex_lock(&pool->lock); + + while (pool->queue_head == NULL && !pool->stop) { + pthread_cond_wait(&pool->notify, &pool->lock); + } + + if (pool->stop && pool->queue_head == NULL) { + pthread_mutex_unlock(&pool->lock); + break; + } + + ThreadPoolJobNode *node = pool->queue_head; + pool->queue_head = node->next; + if (pool->queue_head == NULL) { + pool->queue_tail = NULL; + } + + pool->queued_jobs--; + pool->active_jobs++; + + pthread_mutex_unlock(&pool->lock); + + // Execute job + if (node->job.function) { + node->job.function(node->job.arg); + } + free(node); + + pthread_mutex_lock(&pool->lock); + pool->active_jobs--; + if (pool->active_jobs == 0 && pool->queue_head == NULL) { + pthread_cond_signal(&pool->working_cond); + } + pthread_mutex_unlock(&pool->lock); + } + + return NULL; +} + +ThreadPool *tp_create(int num_threads) { + ThreadPool *pool = (ThreadPool *)malloc(sizeof(ThreadPool)); + if (pool == NULL) + return NULL; + + pool->num_threads = num_threads; + pool->queue_head = NULL; + pool->queue_tail = NULL; + pool->active_jobs = 0; + pool->queued_jobs = 0; + pool->stop = false; + + pthread_mutex_init(&pool->lock, NULL); + pthread_cond_init(&pool->notify, NULL); + pthread_cond_init(&pool->working_cond, NULL); + + pool->threads = (pthread_t *)malloc(sizeof(pthread_t) * num_threads); + for (int i = 0; i < num_threads; i++) { + pthread_create(&pool->threads[i], NULL, tp_worker, pool); + } + + return pool; +} + +void tp_add_job(ThreadPool *pool, thread_func_t function, void *arg) { + ThreadPoolJobNode *node = (ThreadPoolJobNode *)malloc(sizeof(ThreadPoolJobNode)); + if (node == NULL) { + perror("malloc"); + exit(EXIT_FAILURE); + } + node->job.function = function; + node->job.arg = arg; + node->next = NULL; + + pthread_mutex_lock(&pool->lock); + + if (pool->queue_tail) { + pool->queue_tail->next = node; + } else { + pool->queue_head = node; + } + pool->queue_tail = node; + + pool->queued_jobs++; + pthread_cond_signal(&pool->notify); + + pthread_mutex_unlock(&pool->lock); +} + +void tp_wait(ThreadPool *pool) { + pthread_mutex_lock(&pool->lock); + while (pool->active_jobs > 0 || pool->queue_head != NULL) { + pthread_cond_wait(&pool->working_cond, &pool->lock); + } + pthread_mutex_unlock(&pool->lock); +} + +void tp_destroy(ThreadPool *pool) { + pthread_mutex_lock(&pool->lock); + pool->stop = true; + pthread_cond_broadcast(&pool->notify); + pthread_mutex_unlock(&pool->lock); + + for (int i = 0; i < pool->num_threads; i++) { + pthread_join(pool->threads[i], NULL); + } + + free(pool->threads); + pthread_mutex_destroy(&pool->lock); + pthread_cond_destroy(&pool->notify); + pthread_cond_destroy(&pool->working_cond); + free(pool); +} diff --git a/tpool.h b/tpool.h new file mode 100644 index 0000000000000000000000000000000000000000..d03f70aac82ede1c9470129054f62fc75b837b6c --- /dev/null +++ b/tpool.h @@ -0,0 +1,21 @@ +#ifndef THREAD_POOL_H +#define THREAD_POOL_H + +#include +#include + +typedef void (*thread_func_t)(void *arg); + +typedef struct { + thread_func_t function; + void *arg; +} ThreadPoolJob; + +typedef struct ThreadPool ThreadPool; + +ThreadPool *tp_create(int num_threads); +void tp_add_job(ThreadPool *pool, thread_func_t function, void *arg); +void tp_wait(ThreadPool *pool); +void tp_destroy(ThreadPool *pool); + +#endif