From 057be23acf19acae0683c59b0a346b411a04880a Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Sat, 5 Aug 2023 13:41:36 +0200 Subject: Cleanup of posts --- ...23-08-01-make-b-w-svg-charts-with-matplotlib.md | 1 + ...01-03-encoding-binary-data-into-dna-sequence.md | 14 +- .../2023-05-22-crafting-stories-in-zed-editor.md | 1 + static/posts/dna-sequence/benchmarks.csv | 7 + static/posts/dna-sequence/benchmarks.ods | Bin 21911 -> 0 bytes static/posts/dna-sequence/chart-1.png | Bin 64760 -> 0 bytes static/posts/dna-sequence/chart-2.png | Bin 74241 -> 0 bytes static/posts/dna-sequence/chart-encoding-speed.png | Bin 14201 -> 0 bytes static/posts/dna-sequence/chart-file-sizes.png | Bin 12391 -> 0 bytes static/posts/dna-sequence/chart-size.py | 28 + static/posts/dna-sequence/chart-size.svg | 1553 ++++++++++++++++++++ static/posts/dna-sequence/chart-speed.py | 23 + static/posts/dna-sequence/chart-speed.svg | 1416 ++++++++++++++++++ templates/base.html | 19 +- 14 files changed, 3053 insertions(+), 9 deletions(-) create mode 100644 static/posts/dna-sequence/benchmarks.csv delete mode 100755 static/posts/dna-sequence/benchmarks.ods delete mode 100644 static/posts/dna-sequence/chart-1.png delete mode 100644 static/posts/dna-sequence/chart-2.png delete mode 100755 static/posts/dna-sequence/chart-encoding-speed.png delete mode 100755 static/posts/dna-sequence/chart-file-sizes.png create mode 100644 static/posts/dna-sequence/chart-size.py create mode 100644 static/posts/dna-sequence/chart-size.svg create mode 100644 static/posts/dna-sequence/chart-speed.py create mode 100644 static/posts/dna-sequence/chart-speed.svg diff --git a/content/notes/2023-08-01-make-b-w-svg-charts-with-matplotlib.md b/content/notes/2023-08-01-make-b-w-svg-charts-with-matplotlib.md index 4a8f4f4..51e85ec 100644 --- a/content/notes/2023-08-01-make-b-w-svg-charts-with-matplotlib.md +++ b/content/notes/2023-08-01-make-b-w-svg-charts-with-matplotlib.md @@ -45,6 +45,7 @@ df = pd.read_csv("data.csv") # Settings plt.title("Connect median NLB vs ALB") +plt.tight_layout(pad=2) fig = plt.gcf() fig.set_size_inches(10, 4) diff --git a/content/posts/2019-01-03-encoding-binary-data-into-dna-sequence.md b/content/posts/2019-01-03-encoding-binary-data-into-dna-sequence.md index f003fc3..0d44a40 100644 --- a/content/posts/2019-01-03-encoding-binary-data-into-dna-sequence.md +++ b/content/posts/2019-01-03-encoding-binary-data-into-dna-sequence.md @@ -110,7 +110,6 @@ Cytosine and thymine are pyrimidine bases, while adenine and guanine are purine bases. The sugar and the base together are called a nucleoside. ![DNA](/posts/dna-sequence/dna-basics.jpg) - *DNA (a) forms a double stranded helix, and (b) adenine pairs with thymine and cytosine pairs with guanine. (credit a: modification of work by Jerome Walker, Dennis Myts)* @@ -135,7 +134,9 @@ As already mentioned, the Basic Encoding is based on a simple mapping. Since DNA is composed of 4 nucleotides (Adenine, Cytosine, Guanine, Thymine; usually referred using the first letter). Using this technique we can encode -$$ log_2(4) = log_2(2^2) = 2 bits $$ +
+ +
using a single nucleotide. In this way, we are able to use the 4 bases that compose the DNA strand to encode each byte of data. @@ -301,7 +302,6 @@ Then we encode FASTA file from previous operation to encode this data into PNG. After encoding into PNG format this file looks like this. ![Encoded Quote in PNG format](/posts/dna-sequence/quote.png) - The larger the input stream is the larger the PNG file would be. Compiled basic Hello World C program with @@ -396,11 +396,13 @@ Then we GZIP all the FASTA files to see how much the can be compressed. gzip -9 < 10MB.fa > 10MB.fa.gz ``` -[Download ODS file with benchmarks](/dna-sequence/benchmarks.ods). +![Encode to FASTA](/posts/dna-sequence/chart-speed.svg) +The speed increase that occurs when encoding to FASTA format. -![Sample binary file 1KB](/posts/dna-sequence/chart-1.png) +![File sizes](/posts/dna-sequence/chart-size.svg) +Size of the out file after encoding. -![Sample binary file 1KB](/posts/dna-sequence/chart-2.png) +[Download CSV file with benchmarks](/posts/dna-sequence/benchmarks.csv). ## References diff --git a/content/posts/2023-05-22-crafting-stories-in-zed-editor.md b/content/posts/2023-05-22-crafting-stories-in-zed-editor.md index a3104b8..86be7c8 100644 --- a/content/posts/2023-05-22-crafting-stories-in-zed-editor.md +++ b/content/posts/2023-05-22-crafting-stories-in-zed-editor.md @@ -30,6 +30,7 @@ supported languages is not extensive, but it's still impressive. It's a great example of how to create a product that stays out of your way. ![Zed editor](/posts/zed/zed-1.png?style=bigimg) +C code on a light theme. For C development it downloaded [clangd](https://clangd.llvm.org/) and setting up missing dependencies in code was rather easy. For this project I use diff --git a/static/posts/dna-sequence/benchmarks.csv b/static/posts/dna-sequence/benchmarks.csv new file mode 100644 index 0000000..8645d5e --- /dev/null +++ b/static/posts/dna-sequence/benchmarks.csv @@ -0,0 +1,7 @@ +Packages,Encode to FASTA (ms),FASTA file size (KB),FASTA gzipped (KB) +1KB,5.625224,4.1,1.4 +10KB,32.679975,40.7,13 +100KB,112.864416,406.7,121 +1MB,872.887675,4100,1200 +10MB,8472.693202,40700,12000 +100MB,85525.178217,406700,118000 diff --git a/static/posts/dna-sequence/benchmarks.ods b/static/posts/dna-sequence/benchmarks.ods deleted file mode 100755 index 62a8e30..0000000 Binary files a/static/posts/dna-sequence/benchmarks.ods and /dev/null differ diff --git a/static/posts/dna-sequence/chart-1.png b/static/posts/dna-sequence/chart-1.png deleted file mode 100644 index c017e43..0000000 Binary files a/static/posts/dna-sequence/chart-1.png and /dev/null differ diff --git a/static/posts/dna-sequence/chart-2.png b/static/posts/dna-sequence/chart-2.png deleted file mode 100644 index 80b922b..0000000 Binary files a/static/posts/dna-sequence/chart-2.png and /dev/null differ diff --git a/static/posts/dna-sequence/chart-encoding-speed.png b/static/posts/dna-sequence/chart-encoding-speed.png deleted file mode 100755 index 7fb106d..0000000 Binary files a/static/posts/dna-sequence/chart-encoding-speed.png and /dev/null differ diff --git a/static/posts/dna-sequence/chart-file-sizes.png b/static/posts/dna-sequence/chart-file-sizes.png deleted file mode 100755 index 31bfa66..0000000 Binary files a/static/posts/dna-sequence/chart-file-sizes.png and /dev/null differ diff --git a/static/posts/dna-sequence/chart-size.py b/static/posts/dna-sequence/chart-size.py new file mode 100644 index 0000000..4fc408d --- /dev/null +++ b/static/posts/dna-sequence/chart-size.py @@ -0,0 +1,28 @@ +import csv + +import matplotlib.pyplot as plt +import pandas as pd + +# Read the data +df = pd.read_csv("benchmarks.csv") + +# Settings +plt.title("Encode to FASTA out filesize") +plt.tight_layout(pad=2) +fig = plt.gcf() +fig.set_size_inches(10, 4) + +# Plotting +plt.plot(df["Packages"], df["FASTA file size (KB)"], label = "Raw", color="black", linestyle="-") +plt.plot(df["Packages"], df["FASTA gzipped (KB)"], label = "Gzipped", color="black", linestyle="--") + +# Adding x and y axis labels +plt.xlabel("Size of an input file", fontstyle="italic") +plt.ylabel("File size (KB)", fontstyle="italic") + +# Legend +legend = plt.legend() +legend.get_frame().set_linewidth(0) + +# Export as SVG +plt.savefig("chart-size.svg", format="svg") diff --git a/static/posts/dna-sequence/chart-size.svg b/static/posts/dna-sequence/chart-size.svg new file mode 100644 index 0000000..1a2d127 --- /dev/null +++ b/static/posts/dna-sequence/chart-size.svg @@ -0,0 +1,1553 @@ + + + + + + + + 2023-08-05T13:29:17.701350 + image/svg+xml + + + Matplotlib v3.5.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/static/posts/dna-sequence/chart-speed.py b/static/posts/dna-sequence/chart-speed.py new file mode 100644 index 0000000..c07b057 --- /dev/null +++ b/static/posts/dna-sequence/chart-speed.py @@ -0,0 +1,23 @@ +import csv + +import matplotlib.pyplot as plt +import pandas as pd + +# Read the data +df = pd.read_csv("benchmarks.csv") + +# Settings +plt.title("Encode to FASTA speed over time") +plt.tight_layout(pad=2) +fig = plt.gcf() +fig.set_size_inches(10, 4) + +# Plotting +plt.plot(df["Packages"], df["Encode to FASTA (ms)"], label = "ALB", color="black", linestyle="--") + +# Adding x and y axis labels +plt.xlabel("Size of an input file", fontstyle="italic") +plt.ylabel("Encoding time (ms)", fontstyle="italic") + +# Export as SVG +plt.savefig("chart-speed.svg", format="svg") diff --git a/static/posts/dna-sequence/chart-speed.svg b/static/posts/dna-sequence/chart-speed.svg new file mode 100644 index 0000000..7bb0c29 --- /dev/null +++ b/static/posts/dna-sequence/chart-speed.svg @@ -0,0 +1,1416 @@ + + + + + + + + 2023-08-05T13:29:20.420382 + image/svg+xml + + + Matplotlib v3.5.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/templates/base.html b/templates/base.html index f5cb386..6e0e50e 100644 --- a/templates/base.html +++ b/templates/base.html @@ -95,6 +95,19 @@ font-family: monospace; } + figure { + margin-inline-start: 0; + margin-inline-end: 0; + } + + figcaption { + text-align: center; + } + + figcaption p { + margin: 0.3em 0 0 0; + } + img, video, audio { max-width: 100%; } @@ -167,14 +180,14 @@ You can write me an email at m@mitjafelicijan.com or catch up with me on Telegram. + target="_blank">on Telegram.


This website does not track you. Content is made available under the CC BY 4.0 license unless specified + target="_blank" rel="noreferrer">CC BY 4.0 license unless specified otherwise. Blog is also available as RSS feed. + target="_blank">RSS feed.

-- cgit v1.2.3