summaryrefslogtreecommitdiff
path: root/examples/dte/load-save.c
diff options
context:
space:
mode:
Diffstat (limited to 'examples/dte/load-save.c')
-rw-r--r--examples/dte/load-save.c505
1 files changed, 505 insertions, 0 deletions
diff --git a/examples/dte/load-save.c b/examples/dte/load-save.c
new file mode 100644
index 0000000..b3ea3fa
--- /dev/null
+++ b/examples/dte/load-save.c
@@ -0,0 +1,505 @@
+#include "compat.h"
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "load-save.h"
+#include "block.h"
+#include "convert.h"
+#include "encoding.h"
+#include "error.h"
+#include "util/debug.h"
+#include "util/fd.h"
+#include "util/list.h"
+#include "util/log.h"
+#include "util/path.h"
+#include "util/str-util.h"
+#include "util/time-util.h"
+#include "util/xmalloc.h"
+#include "util/xreadwrite.h"
+
+static void add_block(Buffer *buffer, Block *blk)
+{
+ buffer->nl += blk->nl;
+ list_add_before(&blk->node, &buffer->blocks);
+}
+
+static Block *add_utf8_line (
+ Buffer *buffer,
+ Block *blk,
+ const unsigned char *line,
+ size_t len
+) {
+ size_t size = len + 1;
+ if (blk) {
+ size_t avail = blk->alloc - blk->size;
+ if (size <= avail) {
+ goto copy;
+ }
+ add_block(buffer, blk);
+ }
+ size = MAX(size, 8192);
+ blk = block_new(size);
+copy:
+ memcpy(blk->data + blk->size, line, len);
+ blk->size += len;
+ blk->data[blk->size++] = '\n';
+ blk->nl++;
+ return blk;
+}
+
+static bool decode_and_add_blocks(Buffer *buffer, const unsigned char *buf, size_t size, bool utf8_bom)
+{
+ EncodingType bom_type = detect_encoding_from_bom(buf, size);
+ EncodingType enc_type = buffer->encoding.type;
+ if (enc_type == ENCODING_AUTODETECT) {
+ if (bom_type != UNKNOWN_ENCODING) {
+ BUG_ON(buffer->encoding.name);
+ Encoding e = encoding_from_type(bom_type);
+ if (conversion_supported_by_iconv(e.name, "UTF-8")) {
+ buffer_set_encoding(buffer, e, utf8_bom);
+ } else {
+ buffer_set_encoding(buffer, encoding_from_type(UTF8), utf8_bom);
+ }
+ }
+ }
+
+ // Skip BOM only if it matches the specified file encoding
+ if (bom_type != UNKNOWN_ENCODING && bom_type == buffer->encoding.type) {
+ const ByteOrderMark *bom = get_bom_for_encoding(bom_type);
+ if (bom) {
+ const size_t bom_len = bom->len;
+ buf += bom_len;
+ size -= bom_len;
+ buffer->bom = true;
+ }
+ }
+
+ FileDecoder *dec = new_file_decoder(buffer->encoding.name, buf, size);
+ if (!dec) {
+ return false;
+ }
+
+ const char *line;
+ size_t len;
+ if (file_decoder_read_line(dec, &line, &len)) {
+ if (len && line[len - 1] == '\r') {
+ buffer->crlf_newlines = true;
+ len--;
+ }
+ Block *blk = add_utf8_line(buffer, NULL, line, len);
+ while (file_decoder_read_line(dec, &line, &len)) {
+ if (buffer->crlf_newlines && len && line[len - 1] == '\r') {
+ len--;
+ }
+ blk = add_utf8_line(buffer, blk, line, len);
+ }
+ if (blk) {
+ add_block(buffer, blk);
+ }
+ }
+
+ if (buffer->encoding.type == ENCODING_AUTODETECT) {
+ const char *enc = file_decoder_get_encoding(dec);
+ buffer_set_encoding(buffer, encoding_from_name(enc ? enc : "UTF-8"), utf8_bom);
+ }
+
+ free_file_decoder(dec);
+ return true;
+}
+
+static void fixup_blocks(Buffer *buffer)
+{
+ if (list_empty(&buffer->blocks)) {
+ Block *blk = block_new(1);
+ list_add_before(&blk->node, &buffer->blocks);
+ } else {
+ // Incomplete lines are not allowed because they are special cases
+ // and cause lots of trouble
+ Block *blk = BLOCK(buffer->blocks.prev);
+ if (blk->size && blk->data[blk->size - 1] != '\n') {
+ if (blk->size == blk->alloc) {
+ blk->alloc = round_size_to_next_multiple(blk->size + 1, 64);
+ xrenew(blk->data, blk->alloc);
+ }
+ blk->data[blk->size++] = '\n';
+ blk->nl++;
+ buffer->nl++;
+ }
+ }
+}
+
+static int xmadvise_sequential(void *addr, size_t len)
+{
+#if HAVE_POSIX_MADVISE
+ return posix_madvise(addr, len, POSIX_MADV_SEQUENTIAL);
+#else
+ // "The posix_madvise() function shall have no effect on the semantics
+ // of access to memory in the specified range, although it may affect
+ // the performance of access". Ergo, doing nothing is a valid fallback.
+ (void)addr;
+ (void)len;
+ return 0;
+#endif
+}
+
+static bool update_file_info(FileInfo *info, const struct stat *st)
+{
+ *info = (FileInfo) {
+ .size = st->st_size,
+ .mode = st->st_mode,
+ .gid = st->st_gid,
+ .uid = st->st_uid,
+ .dev = st->st_dev,
+ .ino = st->st_ino,
+ .mtime = *get_stat_mtime(st),
+ };
+ return true;
+}
+
+static bool buffer_stat(FileInfo *info, const char *filename)
+{
+ struct stat st;
+ return !stat(filename, &st) && update_file_info(info, &st);
+}
+
+static bool buffer_fstat(FileInfo *info, int fd)
+{
+ struct stat st;
+ return !fstat(fd, &st) && update_file_info(info, &st);
+}
+
+bool read_blocks(Buffer *buffer, int fd, bool utf8_bom)
+{
+ const size_t map_size = 64 * 1024;
+ size_t size = buffer->file.size;
+ unsigned char *buf = NULL;
+ bool mapped = false;
+ bool ret = false;
+
+ if (size >= map_size) {
+ // NOTE: size must be greater than 0
+ buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (buf != MAP_FAILED) {
+ xmadvise_sequential(buf, size);
+ mapped = true;
+ goto decode;
+ }
+ buf = NULL;
+ }
+
+ if (likely(size > 0)) {
+ buf = malloc(size);
+ if (unlikely(!buf)) {
+ goto error;
+ }
+ ssize_t rc = xread_all(fd, buf, size);
+ if (unlikely(rc < 0)) {
+ goto error;
+ }
+ size = rc;
+ } else {
+ // st_size is zero for some files in /proc
+ size_t alloc = map_size;
+ BUG_ON(!IS_POWER_OF_2(alloc));
+ buf = malloc(alloc);
+ if (unlikely(!buf)) {
+ goto error;
+ }
+ size_t pos = 0;
+ while (1) {
+ ssize_t rc = xread_all(fd, buf + pos, alloc - pos);
+ if (rc < 0) {
+ goto error;
+ }
+ if (rc == 0) {
+ break;
+ }
+ pos += rc;
+ if (pos == alloc) {
+ size_t new_alloc = alloc << 1;
+ if (unlikely(alloc >= new_alloc)) {
+ errno = EOVERFLOW;
+ goto error;
+ }
+ alloc = new_alloc;
+ char *new_buf = realloc(buf, alloc);
+ if (unlikely(!new_buf)) {
+ goto error;
+ }
+ buf = new_buf;
+ }
+ }
+ size = pos;
+ }
+
+decode:
+ ret = decode_and_add_blocks(buffer, buf, size, utf8_bom);
+
+error:
+ if (mapped) {
+ munmap(buf, size);
+ } else {
+ free(buf);
+ }
+
+ if (ret) {
+ fixup_blocks(buffer);
+ }
+
+ return ret;
+}
+
+bool load_buffer(Buffer *buffer, const char *filename, const GlobalOptions *gopts, bool must_exist)
+{
+ int fd = xopen(filename, O_RDONLY | O_CLOEXEC, 0);
+
+ if (fd < 0) {
+ if (errno != ENOENT) {
+ return error_msg("Error opening %s: %s", filename, strerror(errno));
+ }
+ if (must_exist) {
+ return error_msg("File %s does not exist", filename);
+ }
+ fixup_blocks(buffer);
+ } else {
+ if (!buffer_fstat(&buffer->file, fd)) {
+ error_msg("fstat failed on %s: %s", filename, strerror(errno));
+ goto error;
+ }
+ if (!S_ISREG(buffer->file.mode)) {
+ error_msg("Not a regular file %s", filename);
+ goto error;
+ }
+ if (unlikely(buffer->file.size < 0)) {
+ error_msg("Invalid file size: %jd", (intmax_t)buffer->file.size);
+ goto error;
+ }
+ if (buffer->file.size / 1024 / 1024 > gopts->filesize_limit) {
+ error_msg (
+ "File size exceeds 'filesize-limit' option (%uMiB): %s",
+ gopts->filesize_limit, filename
+ );
+ goto error;
+ }
+ if (!read_blocks(buffer, fd, gopts->utf8_bom)) {
+ error_msg("Error reading %s: %s", filename, strerror(errno));
+ goto error;
+ }
+ xclose(fd);
+ }
+
+ if (buffer->encoding.type == ENCODING_AUTODETECT) {
+ Encoding enc = encoding_from_type(UTF8);
+ buffer_set_encoding(buffer, enc, gopts->utf8_bom);
+ }
+
+ return true;
+
+error:
+ xclose(fd);
+ return false;
+}
+
+static mode_t get_umask(void)
+{
+ // Wonderful get-and-set API
+ mode_t old = umask(0);
+ umask(old);
+ return old;
+}
+
+static bool write_buffer(Buffer *buffer, FileEncoder *enc, int fd, EncodingType bom_type)
+{
+ size_t size = 0;
+ const ByteOrderMark *bom = get_bom_for_encoding(bom_type);
+ if (bom) {
+ size = bom->len;
+ BUG_ON(size == 0);
+ if (xwrite_all(fd, bom->bytes, size) < 0) {
+ return error_msg_errno("write");
+ }
+ }
+
+ Block *blk;
+ block_for_each(blk, &buffer->blocks) {
+ ssize_t rc = file_encoder_write(enc, blk->data, blk->size);
+ if (rc < 0) {
+ return error_msg_errno("write");
+ }
+ size += rc;
+ }
+
+ size_t nr_errors = file_encoder_get_nr_errors(enc);
+ if (nr_errors > 0) {
+ // Any real error hides this message
+ error_msg (
+ "Warning: %zu non-reversible character conversion%s; file saved",
+ nr_errors,
+ (nr_errors > 1) ? "s" : ""
+ );
+ }
+
+ // Need to truncate if writing to existing file
+ if (xftruncate(fd, size)) {
+ return error_msg_errno("ftruncate");
+ }
+
+ return true;
+}
+
+static int tmp_file(const char *filename, const FileInfo *info, char *buf, size_t buflen)
+{
+ if (str_has_prefix(filename, "/tmp/")) {
+ // Don't use temporary file when saving file in /tmp because crontab
+ // command doesn't like the file to be replaced
+ return -1;
+ }
+
+ const char *base = path_basename(filename);
+ const StringView dir = path_slice_dirname(filename);
+ const int dlen = (int)dir.length;
+ int n = snprintf(buf, buflen, "%.*s/.tmp.%s.XXXXXX", dlen, dir.data, base);
+ if (unlikely(n <= 0 || n >= buflen)) {
+ buf[0] = '\0';
+ return -1;
+ }
+
+ int fd = mkstemp(buf);
+ if (fd < 0) {
+ // No write permission to the directory?
+ buf[0] = '\0';
+ return -1;
+ }
+
+ if (!info->mode) {
+ // New file
+ if (xfchmod(fd, 0666 & ~get_umask()) != 0) {
+ LOG_WARNING("failed to set file mode: %s", strerror(errno));
+ }
+ return fd;
+ }
+
+ // Preserve ownership and mode of the original file if possible
+ if (xfchown(fd, info->uid, info->gid) != 0) {
+ LOG_WARNING("failed to preserve file ownership: %s", strerror(errno));
+ }
+ if (xfchmod(fd, info->mode) != 0) {
+ LOG_WARNING("failed to preserve file mode: %s", strerror(errno));
+ }
+
+ return fd;
+}
+
+static int xfsync(int fd)
+{
+#if HAVE_FSYNC
+ retry:
+ if (fsync(fd) == 0) {
+ return 0;
+ }
+
+ switch (errno) {
+ // EINVAL is ignored because it just means "operation not possible
+ // on this descriptor" rather than indicating an actual error
+ case EINVAL:
+ case ENOTSUP:
+ case ENOSYS:
+ return 0;
+ case EINTR:
+ goto retry;
+ }
+
+ return -1;
+#else
+ (void)fd;
+ return 0;
+#endif
+}
+
+bool save_buffer (
+ Buffer *buffer,
+ const char *filename,
+ const Encoding *encoding,
+ bool crlf,
+ bool write_bom,
+ bool hardlinks
+) {
+ char tmp[8192];
+ tmp[0] = '\0';
+ int fd = -1;
+ if (hardlinks) {
+ LOG_INFO("target file has hard links; writing in-place");
+ } else {
+ // Try to use temporary file (safer)
+ fd = tmp_file(filename, &buffer->file, tmp, sizeof(tmp));
+ }
+
+ if (fd < 0) {
+ // Overwrite the original file directly (if it exists).
+ // Ownership is preserved automatically if the file exists.
+ mode_t mode = buffer->file.mode;
+ if (mode == 0) {
+ // New file
+ mode = 0666 & ~get_umask();
+ }
+ fd = xopen(filename, O_CREAT | O_TRUNC | O_WRONLY | O_CLOEXEC, mode);
+ if (fd < 0) {
+ return error_msg_errno("open");
+ }
+ }
+
+ FileEncoder *enc = new_file_encoder(encoding, crlf, fd);
+ if (unlikely(!enc)) {
+ // This should never happen because encoding is validated early
+ error_msg_errno("new_file_encoder");
+ goto error;
+ }
+
+ EncodingType bom_type = write_bom ? encoding->type : UNKNOWN_ENCODING;
+ if (!write_buffer(buffer, enc, fd, bom_type)) {
+ goto error;
+ }
+
+ if (buffer->options.fsync && xfsync(fd) != 0) {
+ error_msg_errno("fsync");
+ goto error;
+ }
+
+ int r = xclose(fd);
+ fd = -1;
+ if (r != 0) {
+ error_msg_errno("close");
+ goto error;
+ }
+
+ if (tmp[0] && rename(tmp, filename)) {
+ error_msg_errno("rename");
+ goto error;
+ }
+
+ free_file_encoder(enc);
+ buffer_stat(&buffer->file, filename);
+ return true;
+
+error:
+ if (fd >= 0) {
+ xclose(fd);
+ }
+ if (enc) {
+ free_file_encoder(enc);
+ }
+ if (tmp[0]) {
+ unlink(tmp);
+ } else {
+ // Not using temporary file, therefore mtime may have changed.
+ // Update stat to avoid "File has been modified by someone else"
+ // error later when saving the file again.
+ buffer_stat(&buffer->file, filename);
+ }
+ return false;
+}