summaryrefslogtreecommitdiff
path: root/examples/dte/filetype.c
diff options
context:
space:
mode:
Diffstat (limited to 'examples/dte/filetype.c')
-rw-r--r--examples/dte/filetype.c333
1 files changed, 0 insertions, 333 deletions
diff --git a/examples/dte/filetype.c b/examples/dte/filetype.c
deleted file mode 100644
index 5a40f7c..0000000
--- a/examples/dte/filetype.c
+++ /dev/null
@@ -1,333 +0,0 @@
1#include <stdint.h>
2#include <stdlib.h>
3#include "filetype.h"
4#include "command/serialize.h"
5#include "regexp.h"
6#include "util/array.h"
7#include "util/ascii.h"
8#include "util/bsearch.h"
9#include "util/debug.h"
10#include "util/path.h"
11#include "util/str-util.h"
12#include "util/xmalloc.h"
13
14static int ft_compare(const void *key, const void *elem)
15{
16 const StringView *sv = key;
17 const char *ext = elem; // Cast to first member of struct
18 int res = memcmp(sv->data, ext, sv->length);
19 if (unlikely(res == 0 && ext[sv->length] != '\0')) {
20 res = -1;
21 }
22 return res;
23}
24
25// Built-in filetypes
26#include "filetype/names.c"
27#include "filetype/basenames.c"
28#include "filetype/directories.c"
29#include "filetype/extensions.c"
30#include "filetype/interpreters.c"
31#include "filetype/ignored-exts.c"
32#include "filetype/signatures.c"
33
34UNITTEST {
35 static_assert(NR_BUILTIN_FILETYPES < 256);
36 CHECK_BSEARCH_ARRAY(basenames, name, strcmp);
37 CHECK_BSEARCH_ARRAY(extensions, ext, strcmp);
38 CHECK_BSEARCH_ARRAY(interpreters, key, strcmp);
39 CHECK_BSEARCH_STR_ARRAY(ignored_extensions, strcmp);
40 CHECK_BSEARCH_STR_ARRAY(builtin_filetype_names, strcmp);
41
42 for (size_t i = 0; i < ARRAYLEN(builtin_filetype_names); i++) {
43 const char *name = builtin_filetype_names[i];
44 if (unlikely(!is_valid_filetype_name(name))) {
45 BUG("invalid name at builtin_filetype_names[%zu]: \"%s\"", i, name);
46 }
47 }
48}
49
50typedef struct {
51 unsigned int str_len;
52 char str[];
53} FlexArrayStr;
54
55// Filetypes dynamically added via the `ft` command.
56// Not grouped by name to make it possible to order them freely.
57typedef struct {
58 union {
59 FlexArrayStr *str;
60 CachedRegexp *regexp;
61 } u;
62 uint8_t type; // FileDetectionType
63 char name[];
64} UserFileTypeEntry;
65
66static bool ft_uses_regex(FileDetectionType type)
67{
68 return type == FT_CONTENT || type == FT_FILENAME;
69}
70
71bool add_filetype(PointerArray *filetypes, const char *name, const char *str, FileDetectionType type)
72{
73 BUG_ON(!is_valid_filetype_name(name));
74 regex_t re;
75 bool use_re = ft_uses_regex(type);
76 if (use_re) {
77 int err = regcomp(&re, str, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB);
78 if (unlikely(err)) {
79 return regexp_error_msg(&re, str, err);
80 }
81 }
82
83 size_t name_len = strlen(name);
84 size_t str_len = strlen(str);
85 UserFileTypeEntry *ft = xmalloc(sizeof(*ft) + name_len + 1);
86 ft->type = type;
87
88 char *str_dest;
89 if (use_re) {
90 CachedRegexp *r = xmalloc(sizeof(*r) + str_len + 1);
91 r->re = re;
92 ft->u.regexp = r;
93 str_dest = r->str;
94 } else {
95 FlexArrayStr *s = xmalloc(sizeof(*s) + str_len + 1);
96 s->str_len = str_len;
97 ft->u.str = s;
98 str_dest = s->str;
99 }
100
101 memcpy(ft->name, name, name_len + 1);
102 memcpy(str_dest, str, str_len + 1);
103 ptr_array_append(filetypes, ft);
104 return true;
105}
106
107static StringView path_extension(StringView filename)
108{
109 StringView ext = STRING_VIEW_INIT;
110 ext.data = strview_memrchr(&filename, '.');
111 if (!ext.data || ext.data == filename.data) {
112 return ext;
113 }
114 ext.data++;
115 ext.length = filename.length - (ext.data - filename.data);
116 return ext;
117}
118
119static StringView get_filename_extension(StringView filename)
120{
121 StringView ext = path_extension(filename);
122 if (is_ignored_extension(ext)) {
123 filename.length -= ext.length + 1;
124 ext = path_extension(filename);
125 }
126 if (strview_has_suffix(&ext, "~")) {
127 ext.length--;
128 }
129 return ext;
130}
131
132// Parse hashbang and return interpreter name, without version number.
133// For example, if line is "#!/usr/bin/env python2", "python" is returned.
134static StringView get_interpreter(StringView line)
135{
136 StringView sv = STRING_VIEW_INIT;
137 if (!strview_has_prefix(&line, "#!")) {
138 return sv;
139 }
140
141 strview_remove_prefix(&line, 2);
142 strview_trim_left(&line);
143 if (line.length < 2 || line.data[0] != '/') {
144 return sv;
145 }
146
147 size_t pos = 0;
148 sv = get_delim(line.data, &pos, line.length, ' ');
149 if (pos < line.length && strview_equal_cstring(&sv, "/usr/bin/env")) {
150 while (pos + 1 < line.length && line.data[pos] == ' ') {
151 pos++;
152 }
153 sv = get_delim(line.data, &pos, line.length, ' ');
154 }
155
156 ssize_t last_slash_idx = strview_memrchr_idx(&sv, '/');
157 if (last_slash_idx >= 0) {
158 strview_remove_prefix(&sv, last_slash_idx + 1);
159 }
160
161 while (sv.length && ascii_is_digit_or_dot(sv.data[sv.length - 1])) {
162 sv.length--;
163 }
164
165 return sv;
166}
167
168static bool ft_str_match(const UserFileTypeEntry *ft, const StringView sv)
169{
170 const char *str = ft->u.str->str;
171 const size_t len = ft->u.str->str_len;
172 return sv.length > 0 && strview_equal_strn(&sv, str, len);
173}
174
175static bool ft_regex_match(const UserFileTypeEntry *ft, const StringView sv)
176{
177 const regex_t *re = &ft->u.regexp->re;
178 regmatch_t m;
179 return sv.length > 0 && regexp_exec(re, sv.data, sv.length, 0, &m, 0);
180}
181
182static bool ft_match(const UserFileTypeEntry *ft, const StringView sv)
183{
184 if (ft_uses_regex(ft->type)) {
185 return ft_regex_match(ft, sv);
186 }
187 return ft_str_match(ft, sv);
188}
189
190const char *find_ft(const PointerArray *filetypes, const char *filename, StringView line)
191{
192 const char *b = filename ? path_basename(filename) : NULL;
193 const StringView base = strview_from_cstring(b);
194 const StringView ext = get_filename_extension(base);
195 const StringView path = strview_from_cstring(filename);
196 const StringView interpreter = get_interpreter(line);
197 BUG_ON(path.length == 0 && (base.length != 0 || ext.length != 0));
198 BUG_ON(line.length == 0 && interpreter.length != 0);
199
200 // The order of elements in this array determines the order of
201 // precedence for the lookup() functions (but note that changing
202 // the initializer below makes no difference to the array order)
203 const struct {
204 StringView sv;
205 FileTypeEnum (*lookup)(const StringView sv);
206 } table[] = {
207 [FT_INTERPRETER] = {interpreter, filetype_from_interpreter},
208 [FT_BASENAME] = {base, filetype_from_basename},
209 [FT_CONTENT] = {line, filetype_from_signature},
210 [FT_EXTENSION] = {ext, filetype_from_extension},
211 [FT_FILENAME] = {path, filetype_from_dir_prefix},
212 };
213
214 // Search user `ft` entries
215 for (size_t i = 0, n = filetypes->count; i < n; i++) {
216 const UserFileTypeEntry *ft = filetypes->ptrs[i];
217 if (ft_match(ft, table[ft->type].sv)) {
218 return ft->name;
219 }
220 }
221
222 // Search built-in lookup tables
223 for (FileDetectionType i = 0; i < ARRAYLEN(table); i++) {
224 BUG_ON(!table[i].lookup);
225 FileTypeEnum ft = table[i].lookup(table[i].sv);
226 if (ft != NONE) {
227 return builtin_filetype_names[ft];
228 }
229 }
230
231 // Use "ini" filetype if first line looks like an ini [section]
232 strview_trim_right(&line);
233 if (line.length >= 4) {
234 const char *s = line.data;
235 const size_t n = line.length;
236 if (s[0] == '[' && s[n - 1] == ']' && is_word_byte(s[1])) {
237 if (!strview_contains_char_type(&line, ASCII_CNTRL)) {
238 return builtin_filetype_names[INI];
239 }
240 }
241 }
242
243 if (strview_equal_cstring(&ext, "conf")) {
244 if (strview_has_prefix(&path, "/etc/systemd/")) {
245 return builtin_filetype_names[INI];
246 }
247 BUG_ON(!filename);
248 const StringView dir = path_slice_dirname(filename);
249 if (
250 strview_has_prefix(&path, "/etc/")
251 || strview_has_prefix(&path, "/usr/share/")
252 || strview_has_prefix(&path, "/usr/local/share/")
253 || strview_has_suffix(&dir, "/tmpfiles.d")
254 ) {
255 return builtin_filetype_names[CONFIG];
256 }
257 }
258
259 return NULL;
260}
261
262bool is_ft(const PointerArray *filetypes, const char *name)
263{
264 if (BSEARCH(name, builtin_filetype_names, vstrcmp)) {
265 return true;
266 }
267
268 for (size_t i = 0, n = filetypes->count; i < n; i++) {
269 const UserFileTypeEntry *ft = filetypes->ptrs[i];
270 if (streq(ft->name, name)) {
271 return true;
272 }
273 }
274
275 return false;
276}
277
278void collect_ft(const PointerArray *filetypes, PointerArray *a, const char *prefix)
279{
280 COLLECT_STRINGS(builtin_filetype_names, a, prefix);
281 for (size_t i = 0, n = filetypes->count; i < n; i++) {
282 const UserFileTypeEntry *ft = filetypes->ptrs[i];
283 const char *name = ft->name;
284 if (str_has_prefix(name, prefix)) {
285 ptr_array_append(a, xstrdup(name));
286 }
287 }
288}
289
290static const char *ft_get_str(const UserFileTypeEntry *ft)
291{
292 return ft_uses_regex(ft->type) ? ft->u.regexp->str : ft->u.str->str;
293}
294
295String dump_filetypes(const PointerArray *filetypes)
296{
297 static const char flags[][4] = {
298 [FT_EXTENSION] = "",
299 [FT_FILENAME] = "-f ",
300 [FT_CONTENT] = "-c ",
301 [FT_INTERPRETER] = "-i ",
302 [FT_BASENAME] = "-b ",
303 };
304
305 String s = string_new(4096);
306 for (size_t i = 0, n = filetypes->count; i < n; i++) {
307 const UserFileTypeEntry *ft = filetypes->ptrs[i];
308 BUG_ON(ft->type >= ARRAYLEN(flags));
309 BUG_ON(ft->name[0] == '-');
310 string_append_literal(&s, "ft ");
311 string_append_cstring(&s, flags[ft->type]);
312 string_append_escaped_arg(&s, ft->name, true);
313 string_append_byte(&s, ' ');
314 string_append_escaped_arg(&s, ft_get_str(ft), true);
315 string_append_byte(&s, '\n');
316 }
317 return s;
318}
319
320static void free_filetype_entry(UserFileTypeEntry *ft)
321{
322 if (ft_uses_regex(ft->type)) {
323 free_cached_regexp(ft->u.regexp);
324 } else {
325 free(ft->u.str);
326 }
327 free(ft);
328}
329
330void free_filetypes(PointerArray *filetypes)
331{
332 ptr_array_free_cb(filetypes, FREE_FUNC(free_filetype_entry));
333}