From 057be23acf19acae0683c59b0a346b411a04880a Mon Sep 17 00:00:00 2001
From: Mitja Felicijan
Date: Sat, 5 Aug 2023 13:41:36 +0200
Subject: Cleanup of posts
---
...23-08-01-make-b-w-svg-charts-with-matplotlib.md | 1 +
...01-03-encoding-binary-data-into-dna-sequence.md | 14 +-
.../2023-05-22-crafting-stories-in-zed-editor.md | 1 +
static/posts/dna-sequence/benchmarks.csv | 7 +
static/posts/dna-sequence/benchmarks.ods | Bin 21911 -> 0 bytes
static/posts/dna-sequence/chart-1.png | Bin 64760 -> 0 bytes
static/posts/dna-sequence/chart-2.png | Bin 74241 -> 0 bytes
static/posts/dna-sequence/chart-encoding-speed.png | Bin 14201 -> 0 bytes
static/posts/dna-sequence/chart-file-sizes.png | Bin 12391 -> 0 bytes
static/posts/dna-sequence/chart-size.py | 28 +
static/posts/dna-sequence/chart-size.svg | 1553 ++++++++++++++++++++
static/posts/dna-sequence/chart-speed.py | 23 +
static/posts/dna-sequence/chart-speed.svg | 1416 ++++++++++++++++++
templates/base.html | 19 +-
14 files changed, 3053 insertions(+), 9 deletions(-)
create mode 100644 static/posts/dna-sequence/benchmarks.csv
delete mode 100755 static/posts/dna-sequence/benchmarks.ods
delete mode 100644 static/posts/dna-sequence/chart-1.png
delete mode 100644 static/posts/dna-sequence/chart-2.png
delete mode 100755 static/posts/dna-sequence/chart-encoding-speed.png
delete mode 100755 static/posts/dna-sequence/chart-file-sizes.png
create mode 100644 static/posts/dna-sequence/chart-size.py
create mode 100644 static/posts/dna-sequence/chart-size.svg
create mode 100644 static/posts/dna-sequence/chart-speed.py
create mode 100644 static/posts/dna-sequence/chart-speed.svg
diff --git a/content/notes/2023-08-01-make-b-w-svg-charts-with-matplotlib.md b/content/notes/2023-08-01-make-b-w-svg-charts-with-matplotlib.md
index 4a8f4f4..51e85ec 100644
--- a/content/notes/2023-08-01-make-b-w-svg-charts-with-matplotlib.md
+++ b/content/notes/2023-08-01-make-b-w-svg-charts-with-matplotlib.md
@@ -45,6 +45,7 @@ df = pd.read_csv("data.csv")
# Settings
plt.title("Connect median NLB vs ALB")
+plt.tight_layout(pad=2)
fig = plt.gcf()
fig.set_size_inches(10, 4)
diff --git a/content/posts/2019-01-03-encoding-binary-data-into-dna-sequence.md b/content/posts/2019-01-03-encoding-binary-data-into-dna-sequence.md
index f003fc3..0d44a40 100644
--- a/content/posts/2019-01-03-encoding-binary-data-into-dna-sequence.md
+++ b/content/posts/2019-01-03-encoding-binary-data-into-dna-sequence.md
@@ -110,7 +110,6 @@ Cytosine and thymine are pyrimidine bases, while adenine and guanine are purine
bases. The sugar and the base together are called a nucleoside.

-
*DNA (a) forms a double stranded helix, and (b) adenine pairs with thymine and
cytosine pairs with guanine. (credit a: modification of work by Jerome Walker,
Dennis Myts)*
@@ -135,7 +134,9 @@ As already mentioned, the Basic Encoding is based on a simple mapping. Since DNA
is composed of 4 nucleotides (Adenine, Cytosine, Guanine, Thymine; usually
referred using the first letter). Using this technique we can encode
-$$ log_2(4) = log_2(2^2) = 2 bits $$
+
+
+
using a single nucleotide. In this way, we are able to use the 4 bases that
compose the DNA strand to encode each byte of data.
@@ -301,7 +302,6 @@ Then we encode FASTA file from previous operation to encode this data into PNG.
After encoding into PNG format this file looks like this.

-
The larger the input stream is the larger the PNG file would be.
Compiled basic Hello World C program with
@@ -396,11 +396,13 @@ Then we GZIP all the FASTA files to see how much the can be compressed.
gzip -9 < 10MB.fa > 10MB.fa.gz
```
-[Download ODS file with benchmarks](/dna-sequence/benchmarks.ods).
+
+The speed increase that occurs when encoding to FASTA format.
-
+
+Size of the out file after encoding.
-
+[Download CSV file with benchmarks](/posts/dna-sequence/benchmarks.csv).
## References
diff --git a/content/posts/2023-05-22-crafting-stories-in-zed-editor.md b/content/posts/2023-05-22-crafting-stories-in-zed-editor.md
index a3104b8..86be7c8 100644
--- a/content/posts/2023-05-22-crafting-stories-in-zed-editor.md
+++ b/content/posts/2023-05-22-crafting-stories-in-zed-editor.md
@@ -30,6 +30,7 @@ supported languages is not extensive, but it's still impressive. It's a great
example of how to create a product that stays out of your way.

+C code on a light theme.
For C development it downloaded [clangd](https://clangd.llvm.org/) and setting
up missing dependencies in code was rather easy. For this project I use
diff --git a/static/posts/dna-sequence/benchmarks.csv b/static/posts/dna-sequence/benchmarks.csv
new file mode 100644
index 0000000..8645d5e
--- /dev/null
+++ b/static/posts/dna-sequence/benchmarks.csv
@@ -0,0 +1,7 @@
+Packages,Encode to FASTA (ms),FASTA file size (KB),FASTA gzipped (KB)
+1KB,5.625224,4.1,1.4
+10KB,32.679975,40.7,13
+100KB,112.864416,406.7,121
+1MB,872.887675,4100,1200
+10MB,8472.693202,40700,12000
+100MB,85525.178217,406700,118000
diff --git a/static/posts/dna-sequence/benchmarks.ods b/static/posts/dna-sequence/benchmarks.ods
deleted file mode 100755
index 62a8e30..0000000
Binary files a/static/posts/dna-sequence/benchmarks.ods and /dev/null differ
diff --git a/static/posts/dna-sequence/chart-1.png b/static/posts/dna-sequence/chart-1.png
deleted file mode 100644
index c017e43..0000000
Binary files a/static/posts/dna-sequence/chart-1.png and /dev/null differ
diff --git a/static/posts/dna-sequence/chart-2.png b/static/posts/dna-sequence/chart-2.png
deleted file mode 100644
index 80b922b..0000000
Binary files a/static/posts/dna-sequence/chart-2.png and /dev/null differ
diff --git a/static/posts/dna-sequence/chart-encoding-speed.png b/static/posts/dna-sequence/chart-encoding-speed.png
deleted file mode 100755
index 7fb106d..0000000
Binary files a/static/posts/dna-sequence/chart-encoding-speed.png and /dev/null differ
diff --git a/static/posts/dna-sequence/chart-file-sizes.png b/static/posts/dna-sequence/chart-file-sizes.png
deleted file mode 100755
index 31bfa66..0000000
Binary files a/static/posts/dna-sequence/chart-file-sizes.png and /dev/null differ
diff --git a/static/posts/dna-sequence/chart-size.py b/static/posts/dna-sequence/chart-size.py
new file mode 100644
index 0000000..4fc408d
--- /dev/null
+++ b/static/posts/dna-sequence/chart-size.py
@@ -0,0 +1,28 @@
+import csv
+
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Read the data
+df = pd.read_csv("benchmarks.csv")
+
+# Settings
+plt.title("Encode to FASTA out filesize")
+plt.tight_layout(pad=2)
+fig = plt.gcf()
+fig.set_size_inches(10, 4)
+
+# Plotting
+plt.plot(df["Packages"], df["FASTA file size (KB)"], label = "Raw", color="black", linestyle="-")
+plt.plot(df["Packages"], df["FASTA gzipped (KB)"], label = "Gzipped", color="black", linestyle="--")
+
+# Adding x and y axis labels
+plt.xlabel("Size of an input file", fontstyle="italic")
+plt.ylabel("File size (KB)", fontstyle="italic")
+
+# Legend
+legend = plt.legend()
+legend.get_frame().set_linewidth(0)
+
+# Export as SVG
+plt.savefig("chart-size.svg", format="svg")
diff --git a/static/posts/dna-sequence/chart-size.svg b/static/posts/dna-sequence/chart-size.svg
new file mode 100644
index 0000000..1a2d127
--- /dev/null
+++ b/static/posts/dna-sequence/chart-size.svg
@@ -0,0 +1,1553 @@
+
+
+
diff --git a/static/posts/dna-sequence/chart-speed.py b/static/posts/dna-sequence/chart-speed.py
new file mode 100644
index 0000000..c07b057
--- /dev/null
+++ b/static/posts/dna-sequence/chart-speed.py
@@ -0,0 +1,23 @@
+import csv
+
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Read the data
+df = pd.read_csv("benchmarks.csv")
+
+# Settings
+plt.title("Encode to FASTA speed over time")
+plt.tight_layout(pad=2)
+fig = plt.gcf()
+fig.set_size_inches(10, 4)
+
+# Plotting
+plt.plot(df["Packages"], df["Encode to FASTA (ms)"], label = "ALB", color="black", linestyle="--")
+
+# Adding x and y axis labels
+plt.xlabel("Size of an input file", fontstyle="italic")
+plt.ylabel("Encoding time (ms)", fontstyle="italic")
+
+# Export as SVG
+plt.savefig("chart-speed.svg", format="svg")
diff --git a/static/posts/dna-sequence/chart-speed.svg b/static/posts/dna-sequence/chart-speed.svg
new file mode 100644
index 0000000..7bb0c29
--- /dev/null
+++ b/static/posts/dna-sequence/chart-speed.svg
@@ -0,0 +1,1416 @@
+
+
+
diff --git a/templates/base.html b/templates/base.html
index f5cb386..6e0e50e 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -95,6 +95,19 @@
font-family: monospace;
}
+ figure {
+ margin-inline-start: 0;
+ margin-inline-end: 0;
+ }
+
+ figcaption {
+ text-align: center;
+ }
+
+ figcaption p {
+ margin: 0.3em 0 0 0;
+ }
+
img, video, audio {
max-width: 100%;
}
@@ -167,14 +180,14 @@
You can write me an email
at m@mitjafelicijan.com or
catch up with me on Telegram.
+ target="_blank">on Telegram.
This website does not track you. Content is made available under
the CC BY 4.0 license unless specified
+ target="_blank" rel="noreferrer">CC BY 4.0 license unless specified
otherwise. Blog is also available as RSS feed.
+ target="_blank">RSS feed.