diff options
Diffstat (limited to 'examples/dte/filetype.c')
| -rw-r--r-- | examples/dte/filetype.c | 333 |
1 files changed, 333 insertions, 0 deletions
diff --git a/examples/dte/filetype.c b/examples/dte/filetype.c new file mode 100644 index 0000000..5a40f7c --- /dev/null +++ b/examples/dte/filetype.c @@ -0,0 +1,333 @@ +#include <stdint.h> +#include <stdlib.h> +#include "filetype.h" +#include "command/serialize.h" +#include "regexp.h" +#include "util/array.h" +#include "util/ascii.h" +#include "util/bsearch.h" +#include "util/debug.h" +#include "util/path.h" +#include "util/str-util.h" +#include "util/xmalloc.h" + +static int ft_compare(const void *key, const void *elem) +{ + const StringView *sv = key; + const char *ext = elem; // Cast to first member of struct + int res = memcmp(sv->data, ext, sv->length); + if (unlikely(res == 0 && ext[sv->length] != '\0')) { + res = -1; + } + return res; +} + +// Built-in filetypes +#include "filetype/names.c" +#include "filetype/basenames.c" +#include "filetype/directories.c" +#include "filetype/extensions.c" +#include "filetype/interpreters.c" +#include "filetype/ignored-exts.c" +#include "filetype/signatures.c" + +UNITTEST { + static_assert(NR_BUILTIN_FILETYPES < 256); + CHECK_BSEARCH_ARRAY(basenames, name, strcmp); + CHECK_BSEARCH_ARRAY(extensions, ext, strcmp); + CHECK_BSEARCH_ARRAY(interpreters, key, strcmp); + CHECK_BSEARCH_STR_ARRAY(ignored_extensions, strcmp); + CHECK_BSEARCH_STR_ARRAY(builtin_filetype_names, strcmp); + + for (size_t i = 0; i < ARRAYLEN(builtin_filetype_names); i++) { + const char *name = builtin_filetype_names[i]; + if (unlikely(!is_valid_filetype_name(name))) { + BUG("invalid name at builtin_filetype_names[%zu]: \"%s\"", i, name); + } + } +} + +typedef struct { + unsigned int str_len; + char str[]; +} FlexArrayStr; + +// Filetypes dynamically added via the `ft` command. +// Not grouped by name to make it possible to order them freely. +typedef struct { + union { + FlexArrayStr *str; + CachedRegexp *regexp; + } u; + uint8_t type; // FileDetectionType + char name[]; +} UserFileTypeEntry; + +static bool ft_uses_regex(FileDetectionType type) +{ + return type == FT_CONTENT || type == FT_FILENAME; +} + +bool add_filetype(PointerArray *filetypes, const char *name, const char *str, FileDetectionType type) +{ + BUG_ON(!is_valid_filetype_name(name)); + regex_t re; + bool use_re = ft_uses_regex(type); + if (use_re) { + int err = regcomp(&re, str, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB); + if (unlikely(err)) { + return regexp_error_msg(&re, str, err); + } + } + + size_t name_len = strlen(name); + size_t str_len = strlen(str); + UserFileTypeEntry *ft = xmalloc(sizeof(*ft) + name_len + 1); + ft->type = type; + + char *str_dest; + if (use_re) { + CachedRegexp *r = xmalloc(sizeof(*r) + str_len + 1); + r->re = re; + ft->u.regexp = r; + str_dest = r->str; + } else { + FlexArrayStr *s = xmalloc(sizeof(*s) + str_len + 1); + s->str_len = str_len; + ft->u.str = s; + str_dest = s->str; + } + + memcpy(ft->name, name, name_len + 1); + memcpy(str_dest, str, str_len + 1); + ptr_array_append(filetypes, ft); + return true; +} + +static StringView path_extension(StringView filename) +{ + StringView ext = STRING_VIEW_INIT; + ext.data = strview_memrchr(&filename, '.'); + if (!ext.data || ext.data == filename.data) { + return ext; + } + ext.data++; + ext.length = filename.length - (ext.data - filename.data); + return ext; +} + +static StringView get_filename_extension(StringView filename) +{ + StringView ext = path_extension(filename); + if (is_ignored_extension(ext)) { + filename.length -= ext.length + 1; + ext = path_extension(filename); + } + if (strview_has_suffix(&ext, "~")) { + ext.length--; + } + return ext; +} + +// Parse hashbang and return interpreter name, without version number. +// For example, if line is "#!/usr/bin/env python2", "python" is returned. +static StringView get_interpreter(StringView line) +{ + StringView sv = STRING_VIEW_INIT; + if (!strview_has_prefix(&line, "#!")) { + return sv; + } + + strview_remove_prefix(&line, 2); + strview_trim_left(&line); + if (line.length < 2 || line.data[0] != '/') { + return sv; + } + + size_t pos = 0; + sv = get_delim(line.data, &pos, line.length, ' '); + if (pos < line.length && strview_equal_cstring(&sv, "/usr/bin/env")) { + while (pos + 1 < line.length && line.data[pos] == ' ') { + pos++; + } + sv = get_delim(line.data, &pos, line.length, ' '); + } + + ssize_t last_slash_idx = strview_memrchr_idx(&sv, '/'); + if (last_slash_idx >= 0) { + strview_remove_prefix(&sv, last_slash_idx + 1); + } + + while (sv.length && ascii_is_digit_or_dot(sv.data[sv.length - 1])) { + sv.length--; + } + + return sv; +} + +static bool ft_str_match(const UserFileTypeEntry *ft, const StringView sv) +{ + const char *str = ft->u.str->str; + const size_t len = ft->u.str->str_len; + return sv.length > 0 && strview_equal_strn(&sv, str, len); +} + +static bool ft_regex_match(const UserFileTypeEntry *ft, const StringView sv) +{ + const regex_t *re = &ft->u.regexp->re; + regmatch_t m; + return sv.length > 0 && regexp_exec(re, sv.data, sv.length, 0, &m, 0); +} + +static bool ft_match(const UserFileTypeEntry *ft, const StringView sv) +{ + if (ft_uses_regex(ft->type)) { + return ft_regex_match(ft, sv); + } + return ft_str_match(ft, sv); +} + +const char *find_ft(const PointerArray *filetypes, const char *filename, StringView line) +{ + const char *b = filename ? path_basename(filename) : NULL; + const StringView base = strview_from_cstring(b); + const StringView ext = get_filename_extension(base); + const StringView path = strview_from_cstring(filename); + const StringView interpreter = get_interpreter(line); + BUG_ON(path.length == 0 && (base.length != 0 || ext.length != 0)); + BUG_ON(line.length == 0 && interpreter.length != 0); + + // The order of elements in this array determines the order of + // precedence for the lookup() functions (but note that changing + // the initializer below makes no difference to the array order) + const struct { + StringView sv; + FileTypeEnum (*lookup)(const StringView sv); + } table[] = { + [FT_INTERPRETER] = {interpreter, filetype_from_interpreter}, + [FT_BASENAME] = {base, filetype_from_basename}, + [FT_CONTENT] = {line, filetype_from_signature}, + [FT_EXTENSION] = {ext, filetype_from_extension}, + [FT_FILENAME] = {path, filetype_from_dir_prefix}, + }; + + // Search user `ft` entries + for (size_t i = 0, n = filetypes->count; i < n; i++) { + const UserFileTypeEntry *ft = filetypes->ptrs[i]; + if (ft_match(ft, table[ft->type].sv)) { + return ft->name; + } + } + + // Search built-in lookup tables + for (FileDetectionType i = 0; i < ARRAYLEN(table); i++) { + BUG_ON(!table[i].lookup); + FileTypeEnum ft = table[i].lookup(table[i].sv); + if (ft != NONE) { + return builtin_filetype_names[ft]; + } + } + + // Use "ini" filetype if first line looks like an ini [section] + strview_trim_right(&line); + if (line.length >= 4) { + const char *s = line.data; + const size_t n = line.length; + if (s[0] == '[' && s[n - 1] == ']' && is_word_byte(s[1])) { + if (!strview_contains_char_type(&line, ASCII_CNTRL)) { + return builtin_filetype_names[INI]; + } + } + } + + if (strview_equal_cstring(&ext, "conf")) { + if (strview_has_prefix(&path, "/etc/systemd/")) { + return builtin_filetype_names[INI]; + } + BUG_ON(!filename); + const StringView dir = path_slice_dirname(filename); + if ( + strview_has_prefix(&path, "/etc/") + || strview_has_prefix(&path, "/usr/share/") + || strview_has_prefix(&path, "/usr/local/share/") + || strview_has_suffix(&dir, "/tmpfiles.d") + ) { + return builtin_filetype_names[CONFIG]; + } + } + + return NULL; +} + +bool is_ft(const PointerArray *filetypes, const char *name) +{ + if (BSEARCH(name, builtin_filetype_names, vstrcmp)) { + return true; + } + + for (size_t i = 0, n = filetypes->count; i < n; i++) { + const UserFileTypeEntry *ft = filetypes->ptrs[i]; + if (streq(ft->name, name)) { + return true; + } + } + + return false; +} + +void collect_ft(const PointerArray *filetypes, PointerArray *a, const char *prefix) +{ + COLLECT_STRINGS(builtin_filetype_names, a, prefix); + for (size_t i = 0, n = filetypes->count; i < n; i++) { + const UserFileTypeEntry *ft = filetypes->ptrs[i]; + const char *name = ft->name; + if (str_has_prefix(name, prefix)) { + ptr_array_append(a, xstrdup(name)); + } + } +} + +static const char *ft_get_str(const UserFileTypeEntry *ft) +{ + return ft_uses_regex(ft->type) ? ft->u.regexp->str : ft->u.str->str; +} + +String dump_filetypes(const PointerArray *filetypes) +{ + static const char flags[][4] = { + [FT_EXTENSION] = "", + [FT_FILENAME] = "-f ", + [FT_CONTENT] = "-c ", + [FT_INTERPRETER] = "-i ", + [FT_BASENAME] = "-b ", + }; + + String s = string_new(4096); + for (size_t i = 0, n = filetypes->count; i < n; i++) { + const UserFileTypeEntry *ft = filetypes->ptrs[i]; + BUG_ON(ft->type >= ARRAYLEN(flags)); + BUG_ON(ft->name[0] == '-'); + string_append_literal(&s, "ft "); + string_append_cstring(&s, flags[ft->type]); + string_append_escaped_arg(&s, ft->name, true); + string_append_byte(&s, ' '); + string_append_escaped_arg(&s, ft_get_str(ft), true); + string_append_byte(&s, '\n'); + } + return s; +} + +static void free_filetype_entry(UserFileTypeEntry *ft) +{ + if (ft_uses_regex(ft->type)) { + free_cached_regexp(ft->u.regexp); + } else { + free(ft->u.str); + } + free(ft); +} + +void free_filetypes(PointerArray *filetypes) +{ + ptr_array_free_cb(filetypes, FREE_FUNC(free_filetype_entry)); +} |
