diff options
Diffstat (limited to 'examples/dte/filetype.c')
| -rw-r--r-- | examples/dte/filetype.c | 333 |
1 files changed, 0 insertions, 333 deletions
diff --git a/examples/dte/filetype.c b/examples/dte/filetype.c deleted file mode 100644 index 5a40f7c..0000000 --- a/examples/dte/filetype.c +++ /dev/null | |||
| @@ -1,333 +0,0 @@ | |||
| 1 | #include <stdint.h> | ||
| 2 | #include <stdlib.h> | ||
| 3 | #include "filetype.h" | ||
| 4 | #include "command/serialize.h" | ||
| 5 | #include "regexp.h" | ||
| 6 | #include "util/array.h" | ||
| 7 | #include "util/ascii.h" | ||
| 8 | #include "util/bsearch.h" | ||
| 9 | #include "util/debug.h" | ||
| 10 | #include "util/path.h" | ||
| 11 | #include "util/str-util.h" | ||
| 12 | #include "util/xmalloc.h" | ||
| 13 | |||
| 14 | static int ft_compare(const void *key, const void *elem) | ||
| 15 | { | ||
| 16 | const StringView *sv = key; | ||
| 17 | const char *ext = elem; // Cast to first member of struct | ||
| 18 | int res = memcmp(sv->data, ext, sv->length); | ||
| 19 | if (unlikely(res == 0 && ext[sv->length] != '\0')) { | ||
| 20 | res = -1; | ||
| 21 | } | ||
| 22 | return res; | ||
| 23 | } | ||
| 24 | |||
| 25 | // Built-in filetypes | ||
| 26 | #include "filetype/names.c" | ||
| 27 | #include "filetype/basenames.c" | ||
| 28 | #include "filetype/directories.c" | ||
| 29 | #include "filetype/extensions.c" | ||
| 30 | #include "filetype/interpreters.c" | ||
| 31 | #include "filetype/ignored-exts.c" | ||
| 32 | #include "filetype/signatures.c" | ||
| 33 | |||
| 34 | UNITTEST { | ||
| 35 | static_assert(NR_BUILTIN_FILETYPES < 256); | ||
| 36 | CHECK_BSEARCH_ARRAY(basenames, name, strcmp); | ||
| 37 | CHECK_BSEARCH_ARRAY(extensions, ext, strcmp); | ||
| 38 | CHECK_BSEARCH_ARRAY(interpreters, key, strcmp); | ||
| 39 | CHECK_BSEARCH_STR_ARRAY(ignored_extensions, strcmp); | ||
| 40 | CHECK_BSEARCH_STR_ARRAY(builtin_filetype_names, strcmp); | ||
| 41 | |||
| 42 | for (size_t i = 0; i < ARRAYLEN(builtin_filetype_names); i++) { | ||
| 43 | const char *name = builtin_filetype_names[i]; | ||
| 44 | if (unlikely(!is_valid_filetype_name(name))) { | ||
| 45 | BUG("invalid name at builtin_filetype_names[%zu]: \"%s\"", i, name); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | typedef struct { | ||
| 51 | unsigned int str_len; | ||
| 52 | char str[]; | ||
| 53 | } FlexArrayStr; | ||
| 54 | |||
| 55 | // Filetypes dynamically added via the `ft` command. | ||
| 56 | // Not grouped by name to make it possible to order them freely. | ||
| 57 | typedef struct { | ||
| 58 | union { | ||
| 59 | FlexArrayStr *str; | ||
| 60 | CachedRegexp *regexp; | ||
| 61 | } u; | ||
| 62 | uint8_t type; // FileDetectionType | ||
| 63 | char name[]; | ||
| 64 | } UserFileTypeEntry; | ||
| 65 | |||
| 66 | static bool ft_uses_regex(FileDetectionType type) | ||
| 67 | { | ||
| 68 | return type == FT_CONTENT || type == FT_FILENAME; | ||
| 69 | } | ||
| 70 | |||
| 71 | bool add_filetype(PointerArray *filetypes, const char *name, const char *str, FileDetectionType type) | ||
| 72 | { | ||
| 73 | BUG_ON(!is_valid_filetype_name(name)); | ||
| 74 | regex_t re; | ||
| 75 | bool use_re = ft_uses_regex(type); | ||
| 76 | if (use_re) { | ||
| 77 | int err = regcomp(&re, str, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB); | ||
| 78 | if (unlikely(err)) { | ||
| 79 | return regexp_error_msg(&re, str, err); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | size_t name_len = strlen(name); | ||
| 84 | size_t str_len = strlen(str); | ||
| 85 | UserFileTypeEntry *ft = xmalloc(sizeof(*ft) + name_len + 1); | ||
| 86 | ft->type = type; | ||
| 87 | |||
| 88 | char *str_dest; | ||
| 89 | if (use_re) { | ||
| 90 | CachedRegexp *r = xmalloc(sizeof(*r) + str_len + 1); | ||
| 91 | r->re = re; | ||
| 92 | ft->u.regexp = r; | ||
| 93 | str_dest = r->str; | ||
| 94 | } else { | ||
| 95 | FlexArrayStr *s = xmalloc(sizeof(*s) + str_len + 1); | ||
| 96 | s->str_len = str_len; | ||
| 97 | ft->u.str = s; | ||
| 98 | str_dest = s->str; | ||
| 99 | } | ||
| 100 | |||
| 101 | memcpy(ft->name, name, name_len + 1); | ||
| 102 | memcpy(str_dest, str, str_len + 1); | ||
| 103 | ptr_array_append(filetypes, ft); | ||
| 104 | return true; | ||
| 105 | } | ||
| 106 | |||
| 107 | static StringView path_extension(StringView filename) | ||
| 108 | { | ||
| 109 | StringView ext = STRING_VIEW_INIT; | ||
| 110 | ext.data = strview_memrchr(&filename, '.'); | ||
| 111 | if (!ext.data || ext.data == filename.data) { | ||
| 112 | return ext; | ||
| 113 | } | ||
| 114 | ext.data++; | ||
| 115 | ext.length = filename.length - (ext.data - filename.data); | ||
| 116 | return ext; | ||
| 117 | } | ||
| 118 | |||
| 119 | static StringView get_filename_extension(StringView filename) | ||
| 120 | { | ||
| 121 | StringView ext = path_extension(filename); | ||
| 122 | if (is_ignored_extension(ext)) { | ||
| 123 | filename.length -= ext.length + 1; | ||
| 124 | ext = path_extension(filename); | ||
| 125 | } | ||
| 126 | if (strview_has_suffix(&ext, "~")) { | ||
| 127 | ext.length--; | ||
| 128 | } | ||
| 129 | return ext; | ||
| 130 | } | ||
| 131 | |||
| 132 | // Parse hashbang and return interpreter name, without version number. | ||
| 133 | // For example, if line is "#!/usr/bin/env python2", "python" is returned. | ||
| 134 | static StringView get_interpreter(StringView line) | ||
| 135 | { | ||
| 136 | StringView sv = STRING_VIEW_INIT; | ||
| 137 | if (!strview_has_prefix(&line, "#!")) { | ||
| 138 | return sv; | ||
| 139 | } | ||
| 140 | |||
| 141 | strview_remove_prefix(&line, 2); | ||
| 142 | strview_trim_left(&line); | ||
| 143 | if (line.length < 2 || line.data[0] != '/') { | ||
| 144 | return sv; | ||
| 145 | } | ||
| 146 | |||
| 147 | size_t pos = 0; | ||
| 148 | sv = get_delim(line.data, &pos, line.length, ' '); | ||
| 149 | if (pos < line.length && strview_equal_cstring(&sv, "/usr/bin/env")) { | ||
| 150 | while (pos + 1 < line.length && line.data[pos] == ' ') { | ||
| 151 | pos++; | ||
| 152 | } | ||
| 153 | sv = get_delim(line.data, &pos, line.length, ' '); | ||
| 154 | } | ||
| 155 | |||
| 156 | ssize_t last_slash_idx = strview_memrchr_idx(&sv, '/'); | ||
| 157 | if (last_slash_idx >= 0) { | ||
| 158 | strview_remove_prefix(&sv, last_slash_idx + 1); | ||
| 159 | } | ||
| 160 | |||
| 161 | while (sv.length && ascii_is_digit_or_dot(sv.data[sv.length - 1])) { | ||
| 162 | sv.length--; | ||
| 163 | } | ||
| 164 | |||
| 165 | return sv; | ||
| 166 | } | ||
| 167 | |||
| 168 | static bool ft_str_match(const UserFileTypeEntry *ft, const StringView sv) | ||
| 169 | { | ||
| 170 | const char *str = ft->u.str->str; | ||
| 171 | const size_t len = ft->u.str->str_len; | ||
| 172 | return sv.length > 0 && strview_equal_strn(&sv, str, len); | ||
| 173 | } | ||
| 174 | |||
| 175 | static bool ft_regex_match(const UserFileTypeEntry *ft, const StringView sv) | ||
| 176 | { | ||
| 177 | const regex_t *re = &ft->u.regexp->re; | ||
| 178 | regmatch_t m; | ||
| 179 | return sv.length > 0 && regexp_exec(re, sv.data, sv.length, 0, &m, 0); | ||
| 180 | } | ||
| 181 | |||
| 182 | static bool ft_match(const UserFileTypeEntry *ft, const StringView sv) | ||
| 183 | { | ||
| 184 | if (ft_uses_regex(ft->type)) { | ||
| 185 | return ft_regex_match(ft, sv); | ||
| 186 | } | ||
| 187 | return ft_str_match(ft, sv); | ||
| 188 | } | ||
| 189 | |||
| 190 | const char *find_ft(const PointerArray *filetypes, const char *filename, StringView line) | ||
| 191 | { | ||
| 192 | const char *b = filename ? path_basename(filename) : NULL; | ||
| 193 | const StringView base = strview_from_cstring(b); | ||
| 194 | const StringView ext = get_filename_extension(base); | ||
| 195 | const StringView path = strview_from_cstring(filename); | ||
| 196 | const StringView interpreter = get_interpreter(line); | ||
| 197 | BUG_ON(path.length == 0 && (base.length != 0 || ext.length != 0)); | ||
| 198 | BUG_ON(line.length == 0 && interpreter.length != 0); | ||
| 199 | |||
| 200 | // The order of elements in this array determines the order of | ||
| 201 | // precedence for the lookup() functions (but note that changing | ||
| 202 | // the initializer below makes no difference to the array order) | ||
| 203 | const struct { | ||
| 204 | StringView sv; | ||
| 205 | FileTypeEnum (*lookup)(const StringView sv); | ||
| 206 | } table[] = { | ||
| 207 | [FT_INTERPRETER] = {interpreter, filetype_from_interpreter}, | ||
| 208 | [FT_BASENAME] = {base, filetype_from_basename}, | ||
| 209 | [FT_CONTENT] = {line, filetype_from_signature}, | ||
| 210 | [FT_EXTENSION] = {ext, filetype_from_extension}, | ||
| 211 | [FT_FILENAME] = {path, filetype_from_dir_prefix}, | ||
| 212 | }; | ||
| 213 | |||
| 214 | // Search user `ft` entries | ||
| 215 | for (size_t i = 0, n = filetypes->count; i < n; i++) { | ||
| 216 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | ||
| 217 | if (ft_match(ft, table[ft->type].sv)) { | ||
| 218 | return ft->name; | ||
| 219 | } | ||
| 220 | } | ||
| 221 | |||
| 222 | // Search built-in lookup tables | ||
| 223 | for (FileDetectionType i = 0; i < ARRAYLEN(table); i++) { | ||
| 224 | BUG_ON(!table[i].lookup); | ||
| 225 | FileTypeEnum ft = table[i].lookup(table[i].sv); | ||
| 226 | if (ft != NONE) { | ||
| 227 | return builtin_filetype_names[ft]; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | // Use "ini" filetype if first line looks like an ini [section] | ||
| 232 | strview_trim_right(&line); | ||
| 233 | if (line.length >= 4) { | ||
| 234 | const char *s = line.data; | ||
| 235 | const size_t n = line.length; | ||
| 236 | if (s[0] == '[' && s[n - 1] == ']' && is_word_byte(s[1])) { | ||
| 237 | if (!strview_contains_char_type(&line, ASCII_CNTRL)) { | ||
| 238 | return builtin_filetype_names[INI]; | ||
| 239 | } | ||
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | if (strview_equal_cstring(&ext, "conf")) { | ||
| 244 | if (strview_has_prefix(&path, "/etc/systemd/")) { | ||
| 245 | return builtin_filetype_names[INI]; | ||
| 246 | } | ||
| 247 | BUG_ON(!filename); | ||
| 248 | const StringView dir = path_slice_dirname(filename); | ||
| 249 | if ( | ||
| 250 | strview_has_prefix(&path, "/etc/") | ||
| 251 | || strview_has_prefix(&path, "/usr/share/") | ||
| 252 | || strview_has_prefix(&path, "/usr/local/share/") | ||
| 253 | || strview_has_suffix(&dir, "/tmpfiles.d") | ||
| 254 | ) { | ||
| 255 | return builtin_filetype_names[CONFIG]; | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 | return NULL; | ||
| 260 | } | ||
| 261 | |||
| 262 | bool is_ft(const PointerArray *filetypes, const char *name) | ||
| 263 | { | ||
| 264 | if (BSEARCH(name, builtin_filetype_names, vstrcmp)) { | ||
| 265 | return true; | ||
| 266 | } | ||
| 267 | |||
| 268 | for (size_t i = 0, n = filetypes->count; i < n; i++) { | ||
| 269 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | ||
| 270 | if (streq(ft->name, name)) { | ||
| 271 | return true; | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | return false; | ||
| 276 | } | ||
| 277 | |||
| 278 | void collect_ft(const PointerArray *filetypes, PointerArray *a, const char *prefix) | ||
| 279 | { | ||
| 280 | COLLECT_STRINGS(builtin_filetype_names, a, prefix); | ||
| 281 | for (size_t i = 0, n = filetypes->count; i < n; i++) { | ||
| 282 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | ||
| 283 | const char *name = ft->name; | ||
| 284 | if (str_has_prefix(name, prefix)) { | ||
| 285 | ptr_array_append(a, xstrdup(name)); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | static const char *ft_get_str(const UserFileTypeEntry *ft) | ||
| 291 | { | ||
| 292 | return ft_uses_regex(ft->type) ? ft->u.regexp->str : ft->u.str->str; | ||
| 293 | } | ||
| 294 | |||
| 295 | String dump_filetypes(const PointerArray *filetypes) | ||
| 296 | { | ||
| 297 | static const char flags[][4] = { | ||
| 298 | [FT_EXTENSION] = "", | ||
| 299 | [FT_FILENAME] = "-f ", | ||
| 300 | [FT_CONTENT] = "-c ", | ||
| 301 | [FT_INTERPRETER] = "-i ", | ||
| 302 | [FT_BASENAME] = "-b ", | ||
| 303 | }; | ||
| 304 | |||
| 305 | String s = string_new(4096); | ||
| 306 | for (size_t i = 0, n = filetypes->count; i < n; i++) { | ||
| 307 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | ||
| 308 | BUG_ON(ft->type >= ARRAYLEN(flags)); | ||
| 309 | BUG_ON(ft->name[0] == '-'); | ||
| 310 | string_append_literal(&s, "ft "); | ||
| 311 | string_append_cstring(&s, flags[ft->type]); | ||
| 312 | string_append_escaped_arg(&s, ft->name, true); | ||
| 313 | string_append_byte(&s, ' '); | ||
| 314 | string_append_escaped_arg(&s, ft_get_str(ft), true); | ||
| 315 | string_append_byte(&s, '\n'); | ||
| 316 | } | ||
| 317 | return s; | ||
| 318 | } | ||
| 319 | |||
| 320 | static void free_filetype_entry(UserFileTypeEntry *ft) | ||
| 321 | { | ||
| 322 | if (ft_uses_regex(ft->type)) { | ||
| 323 | free_cached_regexp(ft->u.regexp); | ||
| 324 | } else { | ||
| 325 | free(ft->u.str); | ||
| 326 | } | ||
| 327 | free(ft); | ||
| 328 | } | ||
| 329 | |||
| 330 | void free_filetypes(PointerArray *filetypes) | ||
| 331 | { | ||
| 332 | ptr_array_free_cb(filetypes, FREE_FUNC(free_filetype_entry)); | ||
| 333 | } | ||
