Added basic parsing of C code

Author Mitja Felicijan <mitja.felicijan@gmail.com> 2023-11-08 23:53:57 +0100
Committer Mitja Felicijan <mitja.felicijan@gmail.com> 2023-11-08 23:53:57 +0100
Commit 842f5df169f6e576fa462d414c2645f108bfb9b6 (patch)
-rw-r--r-- main.c 151
1 files changed, 125 insertions, 26 deletions
diff --git a/main.c b/main.c
1
#include <stdio.h>
1
#include <stdio.h>
  
2
#include <stdlib.h>
2
#include <assert.h>
3
#include <assert.h>
3
#include <string.h>
4
#include <string.h>
  
5
  
4
#include <tree_sitter/api.h>
6
#include <tree_sitter/api.h>
5
  
7
  
6
TSLanguage *tree_sitter_json();
8
#include "file.h"
7
  
9
  
8
int main() {
10
#define DEBUG 1
9
  printf("Hi, Mark\n");
  
10
  
  
11
  TSParser *parser = ts_parser_new();
  
12
  ts_parser_set_language(parser, tree_sitter_json());
  
13
  
11
  
14
  const char *source_code = "[1, null, [1,2,3]]";
12
typedef struct {
15
  TSTree *tree = ts_parser_parse_string(
13
  const char *fname;
16
    parser,
14
  const char *ftype;
17
    NULL,
15
  const char *fparams;
18
    source_code,
16
  size_t lineno;
19
    strlen(source_code)
17
} Function;
20
  );
  
21
  
18
  
  
19
const char *extract_value(TSNode captured_node, const char *source_code) {
  
20
  size_t start = ts_node_start_byte(captured_node);
  
21
  size_t end = ts_node_end_byte(captured_node);
  
22
  size_t length = end - start;
  
23
  char *buffer = malloc(length + 1);  // +1 for the null terminator
  
24
  
  
25
  if (buffer != NULL) {
  
26
    snprintf(buffer, length + 1, "%.*s", (int)length, &source_code[start]);
  
27
    return buffer;
  
28
  }
  
29
  
  
30
  return NULL;
  
31
}
  
32
  
  
33
void parse_source_file(const char *file_path, const char *source_code, TSLanguage *language) {
  
34
  TSParser *parser = ts_parser_new();
  
35
  ts_parser_set_language(parser, language);
  
36
  
  
37
  TSTree *tree = ts_parser_parse_string(parser, NULL, source_code, strlen(source_code));
22
  TSNode root_node = ts_tree_root_node(tree);
38
  TSNode root_node = ts_tree_root_node(tree);
23
  
39
  
24
  TSNode array_node = ts_node_named_child(root_node, 0);
40
  const char *query_string = "(function_definition type: (primitive_type) @ftype declarator: (function_declarator declarator: (identifier) @fname parameters: (parameter_list) @fparams))";
25
  TSNode number_node = ts_node_named_child(array_node, 0);
41
  
  
42
  uint32_t error_offset;
  
43
  TSQueryError error_type;
  
44
  TSQuery *query = ts_query_new(language, query_string, strlen(query_string), &error_offset, &error_type);
  
45
  
  
46
  TSQueryCursor *query_cursor = ts_query_cursor_new();
  
47
  ts_query_cursor_exec(query_cursor, query, root_node);
  
48
  
  
49
  if (query != NULL) {
  
50
    TSQueryMatch match;
  
51
    while (ts_query_cursor_next_match(query_cursor, &match)) {
  
52
      Function fn = {0};
  
53
  
  
54
      for (unsigned i = 0; i < match.capture_count; i++) {
  
55
        TSQueryCapture capture = match.captures[i];
  
56
        TSNode captured_node = capture.node;
  
57
  
  
58
        /* fprintf(stderr, "Query: %p, Capture index: %u\n", (void *)query, capture.index); */
  
59
  
  
60
        uint32_t capture_name_length;
  
61
        const char *capture_name = ts_query_capture_name_for_id(query, capture.index, &capture_name_length);
  
62
  
  
63
        if (strcmp(capture_name, "fname") == 0) {
  
64
          fn.fname = extract_value(captured_node, source_code);
  
65
  
  
66
          TSPoint start_point = ts_node_start_point(captured_node);
  
67
          fn.lineno = start_point.row;
  
68
        }
  
69
  
  
70
        if (strcmp(capture_name, "ftype") == 0) {
  
71
          fn.ftype = extract_value(captured_node, source_code);
  
72
        }
26
  
73
  
27
  assert(strcmp(ts_node_type(root_node), "document") == 0);
74
        if (strcmp(capture_name, "fparams") == 0) {
28
  assert(strcmp(ts_node_type(array_node), "array") == 0);
75
          fn.fparams = extract_value(captured_node, source_code);
29
  assert(strcmp(ts_node_type(number_node), "number") == 0);
76
        }
  
77
      }
30
  
78
  
31
  /* assert(ts_node_child_count(root_node) == 1); */
79
      printf("%s:%zu\t%s %s %s\n", file_path, fn.lineno, fn.ftype, fn.fname, fn.fparams);
32
  /* assert(ts_node_child_count(array_node) == 5); */
  
33
  /* assert(ts_node_named_child_count(array_node) == 2); */
  
34
  /* assert(ts_node_child_count(number_node) == 0); */
  
35
  
80
  
36
  char *string = ts_node_string(root_node);
81
    }
37
  printf("Syntax tree: %s\n", string);
82
  } else {
  
83
    if (DEBUG) {
  
84
      printf("Query creation failed at offset %u with error type %d\n", error_offset, error_type);
  
85
    }
  
86
  }
38
  
87
  
39
  free(string);
88
  ts_query_cursor_delete(query_cursor);
  
89
  ts_query_delete(query);
40
  ts_tree_delete(tree);
90
  ts_tree_delete(tree);
41
  ts_parser_delete(parser);
91
  ts_parser_delete(parser);
42
  
92
}
  
93
  
  
94
const char *get_file_extension(const char *file_path) {
  
95
  const char *extension = strrchr(file_path, '.');
  
96
  if (extension != NULL) {
  
97
    return extension + 1;
  
98
  }
  
99
  return NULL;
  
100
}
  
101
  
  
102
int main(void) {
  
103
  const char *file_path = "examples/cmdline.c";
  
104
  /* const char *file_path = "examples/tabs.py"; */
  
105
  const char *extension = get_file_extension(file_path);
  
106
  
  
107
  TSLanguage *tree_sitter_c(void);
  
108
  TSLanguage *tree_sitter_python(void);
  
109
  
  
110
  struct FileContent source_file = read_entire_file(file_path);
  
111
  if (source_file.content != NULL) {
  
112
    if (DEBUG) {
  
113
      /* fprintf(stdout, "File contents:\n%s\n", source_file.content); */
  
114
      /* fprintf(stdout, "Count of characters: %zu\n", source_file.count); */
  
115
    }
  
116
  
  
117
    if (extension != NULL) {
  
118
      if (DEBUG) {
  
119
        fprintf(stdout, "File extension: %s\n", extension);
  
120
      }
  
121
  
  
122
      if (strcmp(extension, "c") == 0) {
  
123
        parse_source_file(file_path, source_file.content, tree_sitter_c());
  
124
      }
  
125
  
  
126
      if (strcmp(extension, "py") == 0) {
  
127
        parse_source_file(file_path, source_file.content, tree_sitter_python());
  
128
      }
  
129
    } else {
  
130
      if (DEBUG) {
  
131
        fprintf(stderr,"No file extension found.\n");
  
132
      }
  
133
    }
  
134
  
  
135
    free((void *)source_file.content);
  
136
  } else {
  
137
    if (DEBUG) {
  
138
      fprintf(stderr, "Failed to read file.\n");
  
139
    }
  
140
  }
  
141
  
43
  return 0;
142
  return 0;
44
}
143
}