From 9f5454bda6299db43a4e9de5b3716471388b81d9 Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Sat, 27 Aug 2022 14:05:48 +0200 Subject: Move blog to Hugo --- .../.ipynb_checkpoints/TF Test-checkpoint.ipynb | 588 +++++++++++++++++++++ .../sentiment-analysis-checkpoint.ipynb | 170 ++++++ .../guardian-sa-title-desc-relationship.png | Bin 0 -> 15404 bytes static/sentiment-analysis/sentiment-analysis.ipynb | 170 ++++++ 4 files changed, 928 insertions(+) create mode 100644 static/sentiment-analysis/.ipynb_checkpoints/TF Test-checkpoint.ipynb create mode 100644 static/sentiment-analysis/.ipynb_checkpoints/sentiment-analysis-checkpoint.ipynb create mode 100644 static/sentiment-analysis/guardian-sa-title-desc-relationship.png create mode 100644 static/sentiment-analysis/sentiment-analysis.ipynb (limited to 'static/sentiment-analysis') diff --git a/static/sentiment-analysis/.ipynb_checkpoints/TF Test-checkpoint.ipynb b/static/sentiment-analysis/.ipynb_checkpoints/TF Test-checkpoint.ipynb new file mode 100644 index 0000000..e2a85c4 --- /dev/null +++ b/static/sentiment-analysis/.ipynb_checkpoints/TF Test-checkpoint.ipynb @@ -0,0 +1,588 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/m/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.0.0-beta1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/m/.local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", + "/home/m/.local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n" + ] + } + ], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "\n", + "# Helper libraries\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "print(tf.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from numpy import genfromtxt\n", + "data = genfromtxt('data.csv', delimiter=',')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "data_input = data[:,0:3]\n", + "data_labels = data[:,3]\n", + "\n", + "#data_input = np.transpose(data_input)\n", + "#data_labels = np.transpose(data_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(600, 3)\n", + "[1.e-01 1.e+00 3.e+02]\n" + ] + } + ], + "source": [ + "print(np.shape(data_input))\n", + "print(data_input[2])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "print(len(data_input))\n", + "print(len(data_labels))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(500, 3)\n", + "(100, 3)\n", + "(500,)\n", + "(100,)\n" + ] + } + ], + "source": [ + "data_input_train = data_input[0:500,:]\n", + "data_input_test = data_input[500:,:]\n", + "\n", + "data_labels_train = data_labels[0:500]\n", + "data_labels_test = data_labels[500:]\n", + "\n", + "print(np.shape(data_input_train))\n", + "print(np.shape(data_input_test))\n", + "\n", + "print(np.shape(data_labels_train))\n", + "print(np.shape(data_labels_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.Sequential([\n", + " keras.layers.Dense(128, activation='relu', input_shape=[3]),\n", + " keras.layers.Dense(512, activation='relu'),\n", + " keras.layers.Dense(512, activation='relu'),\n", + " keras.layers.Dense(512, activation='relu'),\n", + " keras.layers.Dense(128, activation='relu'),\n", + " keras.layers.Dense(1)\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = tf.keras.optimizers.RMSprop(0.001)\n", + "model.compile(loss='mse',\n", + " optimizer=optimizer,\n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train on 500 samples\n", + "Epoch 1/100\n", + "500/500 [==============================] - 0s 399us/sample - loss: 247.2794 - accuracy: 0.0040\n", + "Epoch 2/100\n", + "500/500 [==============================] - 0s 121us/sample - loss: 4.2495 - accuracy: 0.0060\n", + "Epoch 3/100\n", + "500/500 [==============================] - 0s 131us/sample - loss: 1.8787 - accuracy: 0.0040\n", + "Epoch 4/100\n", + "500/500 [==============================] - 0s 121us/sample - loss: 0.4284 - accuracy: 0.0060\n", + "Epoch 5/100\n", + "500/500 [==============================] - 0s 107us/sample - loss: 4.7904 - accuracy: 0.0080\n", + "Epoch 6/100\n", + "500/500 [==============================] - 0s 113us/sample - loss: 0.0819 - accuracy: 0.0040\n", + "Epoch 7/100\n", + "500/500 [==============================] - 0s 108us/sample - loss: 1.6904 - accuracy: 0.0040\n", + "Epoch 8/100\n", + "500/500 [==============================] - 0s 116us/sample - loss: 0.1761 - accuracy: 0.0040\n", + "Epoch 9/100\n", + "500/500 [==============================] - 0s 142us/sample - loss: 0.1135 - accuracy: 0.0040\n", + "Epoch 10/100\n", + "500/500 [==============================] - 0s 124us/sample - loss: 0.4387 - accuracy: 0.0040\n", + "Epoch 11/100\n", + "500/500 [==============================] - 0s 112us/sample - loss: 0.0815 - accuracy: 0.0040\n", + "Epoch 12/100\n", + "500/500 [==============================] - 0s 117us/sample - loss: 0.1725 - accuracy: 0.0040\n", + "Epoch 13/100\n", + "500/500 [==============================] - 0s 119us/sample - loss: 0.1487 - accuracy: 0.0040\n", + "Epoch 14/100\n", + "500/500 [==============================] - 0s 111us/sample - loss: 0.0720 - accuracy: 0.0040\n", + "Epoch 15/100\n", + "500/500 [==============================] - 0s 111us/sample - loss: 0.3110 - accuracy: 0.0040\n", + "Epoch 16/100\n", + "500/500 [==============================] - 0s 128us/sample - loss: 0.0947 - accuracy: 0.0040\n", + "Epoch 17/100\n", + "500/500 [==============================] - 0s 133us/sample - loss: 0.0739 - accuracy: 0.0040\n", + "Epoch 18/100\n", + "500/500 [==============================] - 0s 131us/sample - loss: 0.1353 - accuracy: 0.0060\n", + "Epoch 19/100\n", + "500/500 [==============================] - 0s 135us/sample - loss: 0.0837 - accuracy: 0.0040\n", + "Epoch 20/100\n", + "500/500 [==============================] - 0s 130us/sample - loss: 0.0754 - accuracy: 0.0040\n", + "Epoch 21/100\n", + "500/500 [==============================] - 0s 118us/sample - loss: 0.0840 - accuracy: 0.0040\n", + "Epoch 22/100\n", + "500/500 [==============================] - 0s 115us/sample - loss: 0.1105 - accuracy: 0.0040\n", + "Epoch 23/100\n", + "500/500 [==============================] - 0s 116us/sample - loss: 0.0651 - accuracy: 0.0040\n", + "Epoch 24/100\n", + "500/500 [==============================] - 0s 109us/sample - loss: 0.0615 - accuracy: 0.0040\n", + "Epoch 25/100\n", + "500/500 [==============================] - 0s 118us/sample - loss: 0.0656 - accuracy: 0.0040\n", + "Epoch 26/100\n", + "500/500 [==============================] - 0s 113us/sample - loss: 0.0695 - accuracy: 0.0040\n", + "Epoch 27/100\n", + "500/500 [==============================] - 0s 116us/sample - loss: 0.0585 - accuracy: 0.0040\n", + "Epoch 28/100\n", + "500/500 [==============================] - 0s 118us/sample - loss: 0.1300 - accuracy: 0.0040\n", + "Epoch 29/100\n", + "500/500 [==============================] - 0s 112us/sample - loss: 0.0567 - accuracy: 0.0040\n", + "Epoch 30/100\n", + "500/500 [==============================] - 0s 137us/sample - loss: 0.0647 - accuracy: 0.0040\n", + "Epoch 31/100\n", + "500/500 [==============================] - 0s 130us/sample - loss: 0.0559 - accuracy: 0.0040\n", + "Epoch 32/100\n", + "500/500 [==============================] - 0s 130us/sample - loss: 0.0576 - accuracy: 0.0040\n", + "Epoch 33/100\n", + "500/500 [==============================] - 0s 128us/sample - loss: 0.0578 - accuracy: 0.0040\n", + "Epoch 34/100\n", + "500/500 [==============================] - 0s 130us/sample - loss: 0.0512 - accuracy: 0.0040\n", + "Epoch 35/100\n", + "500/500 [==============================] - 0s 114us/sample - loss: 0.0601 - accuracy: 0.0040\n", + "Epoch 36/100\n", + "500/500 [==============================] - 0s 111us/sample - loss: 0.0531 - accuracy: 0.0040\n", + "Epoch 37/100\n", + "500/500 [==============================] - 0s 130us/sample - loss: 0.0532 - accuracy: 0.0040\n", + "Epoch 38/100\n", + "500/500 [==============================] - 0s 131us/sample - loss: 0.0480 - accuracy: 0.0040\n", + "Epoch 39/100\n", + "500/500 [==============================] - 0s 136us/sample - loss: 0.0503 - accuracy: 0.0040\n", + "Epoch 40/100\n", + "500/500 [==============================] - 0s 134us/sample - loss: 0.0468 - accuracy: 0.0040\n", + "Epoch 41/100\n", + "500/500 [==============================] - 0s 115us/sample - loss: 0.0509 - accuracy: 0.0040\n", + "Epoch 42/100\n", + "500/500 [==============================] - 0s 109us/sample - loss: 0.0453 - accuracy: 0.0040\n", + "Epoch 43/100\n", + "500/500 [==============================] - 0s 111us/sample - loss: 0.0484 - accuracy: 0.0040\n", + "Epoch 44/100\n", + "500/500 [==============================] - 0s 104us/sample - loss: 0.0458 - accuracy: 0.0040\n", + "Epoch 45/100\n", + "500/500 [==============================] - 0s 110us/sample - loss: 0.0481 - accuracy: 0.0040\n", + "Epoch 46/100\n", + "500/500 [==============================] - 0s 114us/sample - loss: 0.0468 - accuracy: 0.0060\n", + "Epoch 47/100\n", + "500/500 [==============================] - 0s 124us/sample - loss: 0.0473 - accuracy: 0.0060\n", + "Epoch 48/100\n", + "500/500 [==============================] - 0s 137us/sample - loss: 0.0455 - accuracy: 0.0040\n", + "Epoch 49/100\n", + "500/500 [==============================] - 0s 125us/sample - loss: 0.0431 - accuracy: 0.0060\n", + "Epoch 50/100\n", + "500/500 [==============================] - 0s 132us/sample - loss: 0.0432 - accuracy: 0.0060\n", + "Epoch 51/100\n", + "500/500 [==============================] - 0s 116us/sample - loss: 0.0484 - accuracy: 0.0060\n", + "Epoch 52/100\n", + "500/500 [==============================] - 0s 112us/sample - loss: 0.0482 - accuracy: 0.0040\n", + "Epoch 53/100\n", + "500/500 [==============================] - 0s 117us/sample - loss: 0.0444 - accuracy: 0.0060\n", + "Epoch 54/100\n", + "500/500 [==============================] - 0s 109us/sample - loss: 0.0469 - accuracy: 0.0060\n", + "Epoch 55/100\n", + "500/500 [==============================] - 0s 106us/sample - loss: 0.0427 - accuracy: 0.0040\n", + "Epoch 56/100\n", + "500/500 [==============================] - 0s 110us/sample - loss: 0.0433 - accuracy: 0.0040\n", + "Epoch 57/100\n", + "500/500 [==============================] - 0s 102us/sample - loss: 0.0437 - accuracy: 0.0060\n", + "Epoch 58/100\n", + "500/500 [==============================] - 0s 117us/sample - loss: 0.0425 - accuracy: 0.0040\n", + "Epoch 59/100\n", + "500/500 [==============================] - 0s 105us/sample - loss: 0.0418 - accuracy: 0.0040\n", + "Epoch 60/100\n", + "500/500 [==============================] - 0s 109us/sample - loss: 0.0397 - accuracy: 0.0040\n", + "Epoch 61/100\n", + "500/500 [==============================] - 0s 119us/sample - loss: 0.0507 - accuracy: 0.0040\n", + "Epoch 62/100\n", + "500/500 [==============================] - 0s 112us/sample - loss: 0.0402 - accuracy: 0.0060\n", + "Epoch 63/100\n", + "500/500 [==============================] - 0s 133us/sample - loss: 0.0397 - accuracy: 0.0040\n", + "Epoch 64/100\n", + "500/500 [==============================] - 0s 132us/sample - loss: 0.0427 - accuracy: 0.0060\n", + "Epoch 65/100\n", + "500/500 [==============================] - 0s 138us/sample - loss: 0.0398 - accuracy: 0.0040\n", + "Epoch 66/100\n", + "500/500 [==============================] - 0s 145us/sample - loss: 0.0375 - accuracy: 0.0060\n", + "Epoch 67/100\n", + "500/500 [==============================] - 0s 138us/sample - loss: 0.0402 - accuracy: 0.0060\n", + "Epoch 68/100\n", + "500/500 [==============================] - 0s 132us/sample - loss: 0.0388 - accuracy: 0.0080\n", + "Epoch 69/100\n", + "500/500 [==============================] - 0s 115us/sample - loss: 0.0375 - accuracy: 0.0080\n", + "Epoch 70/100\n", + "500/500 [==============================] - 0s 113us/sample - loss: 0.0384 - accuracy: 0.0040\n", + "Epoch 71/100\n", + "500/500 [==============================] - 0s 109us/sample - loss: 0.0360 - accuracy: 0.0080\n", + "Epoch 72/100\n", + "500/500 [==============================] - 0s 111us/sample - loss: 0.0350 - accuracy: 0.0080\n", + "Epoch 73/100\n", + "500/500 [==============================] - 0s 118us/sample - loss: 0.0370 - accuracy: 0.0060\n", + "Epoch 74/100\n", + "500/500 [==============================] - 0s 95us/sample - loss: 0.0354 - accuracy: 0.0080\n", + "Epoch 75/100\n", + "500/500 [==============================] - 0s 102us/sample - loss: 0.0376 - accuracy: 0.0060\n", + "Epoch 76/100\n", + "500/500 [==============================] - 0s 106us/sample - loss: 0.0371 - accuracy: 0.0080\n", + "Epoch 77/100\n", + "500/500 [==============================] - 0s 100us/sample - loss: 0.0369 - accuracy: 0.0060\n", + "Epoch 78/100\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "500/500 [==============================] - 0s 98us/sample - loss: 0.0315 - accuracy: 0.0060\n", + "Epoch 79/100\n", + "500/500 [==============================] - 0s 97us/sample - loss: 0.0355 - accuracy: 0.0060\n", + "Epoch 80/100\n", + "500/500 [==============================] - 0s 100us/sample - loss: 0.0278 - accuracy: 0.0080\n", + "Epoch 81/100\n", + "500/500 [==============================] - 0s 99us/sample - loss: 0.0320 - accuracy: 0.0080\n", + "Epoch 82/100\n", + "500/500 [==============================] - 0s 99us/sample - loss: 0.0321 - accuracy: 0.0080\n", + "Epoch 83/100\n", + "500/500 [==============================] - 0s 94us/sample - loss: 0.0332 - accuracy: 0.0060\n", + "Epoch 84/100\n", + "500/500 [==============================] - 0s 106us/sample - loss: 0.0317 - accuracy: 0.0060\n", + "Epoch 85/100\n", + "500/500 [==============================] - 0s 103us/sample - loss: 0.0293 - accuracy: 0.0080\n", + "Epoch 86/100\n", + "500/500 [==============================] - 0s 107us/sample - loss: 0.0304 - accuracy: 0.0060\n", + "Epoch 87/100\n", + "500/500 [==============================] - 0s 101us/sample - loss: 0.0327 - accuracy: 0.0040\n", + "Epoch 88/100\n", + "500/500 [==============================] - 0s 100us/sample - loss: 0.0290 - accuracy: 0.0080\n", + "Epoch 89/100\n", + "500/500 [==============================] - 0s 123us/sample - loss: 0.0293 - accuracy: 0.0060\n", + "Epoch 90/100\n", + "500/500 [==============================] - 0s 104us/sample - loss: 0.0246 - accuracy: 0.0060\n", + "Epoch 91/100\n", + "500/500 [==============================] - 0s 124us/sample - loss: 0.0303 - accuracy: 0.0060\n", + "Epoch 92/100\n", + "500/500 [==============================] - 0s 129us/sample - loss: 0.0376 - accuracy: 0.0080\n", + "Epoch 93/100\n", + "500/500 [==============================] - 0s 122us/sample - loss: 0.0264 - accuracy: 0.0080\n", + "Epoch 94/100\n", + "500/500 [==============================] - 0s 102us/sample - loss: 0.0265 - accuracy: 0.0080\n", + "Epoch 95/100\n", + "500/500 [==============================] - 0s 108us/sample - loss: 0.0291 - accuracy: 0.0080\n", + "Epoch 96/100\n", + "500/500 [==============================] - 0s 101us/sample - loss: 0.0314 - accuracy: 0.0080\n", + "Epoch 97/100\n", + "500/500 [==============================] - 0s 95us/sample - loss: 0.0257 - accuracy: 0.0060\n", + "Epoch 98/100\n", + "500/500 [==============================] - 0s 100us/sample - loss: 0.0248 - accuracy: 0.0080\n", + "Epoch 99/100\n", + "500/500 [==============================] - 0s 94us/sample - loss: 0.0250 - accuracy: 0.0040\n", + "Epoch 100/100\n", + "500/500 [==============================] - 0s 106us/sample - loss: 0.0312 - accuracy: 0.0060\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#model.fit(data_input_train, data_labels_train, validation_data=(data_input_test, data_labels_test), epochs=100)\n", + "model.fit(data_input_train, data_labels_train, epochs=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "100/100 - 0s - loss: 0.0470 - accuracy: 0.0100\n", + "\n", + "Test accuracy: 0.01\n" + ] + } + ], + "source": [ + "test_loss, test_acc = model.evaluate(data_input_test, data_labels_test, verbose=2)\n", + "\n", + "print('\\nTest accuracy:', test_acc)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.3141548]]\n" + ] + } + ], + "source": [ + "input = np.array([0.46,2,136])\n", + "input.shape = (1,3)\n", + "\n", + "prediction = model.predict(input)\n", + "print(prediction)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "predictions = model.predict(data_input_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib qt \n", + "plt.plot(predictions)\n", + "plt.plot(data_labels_test, 'r')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 204, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib qt\n", + "a = data_labels_test - predictions\n", + "plt.plot(a[0])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 207, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib qt\n", + "a = data_labels_test - predictions\n", + "plt.plot(predictions)\n", + "plt.plot(data_labels_test, 'r')\n", + "plt.plot(a[0], 'g')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.08489150602276586" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.average(a[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_13\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_38 (Dense) (None, 128) 512 \n", + "_________________________________________________________________\n", + "dense_39 (Dense) (None, 512) 66048 \n", + "_________________________________________________________________\n", + "dense_40 (Dense) (None, 512) 262656 \n", + "_________________________________________________________________\n", + "dense_41 (Dense) (None, 512) 262656 \n", + "_________________________________________________________________\n", + "dense_42 (Dense) (None, 128) 65664 \n", + "_________________________________________________________________\n", + "dense_43 (Dense) (None, 1) 129 \n", + "=================================================================\n", + "Total params: 657,665\n", + "Trainable params: 657,665\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [], + "source": [ + "model.save('my_model.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/static/sentiment-analysis/.ipynb_checkpoints/sentiment-analysis-checkpoint.ipynb b/static/sentiment-analysis/.ipynb_checkpoints/sentiment-analysis-checkpoint.ipynb new file mode 100644 index 0000000..2c0934c --- /dev/null +++ b/static/sentiment-analysis/.ipynb_checkpoints/sentiment-analysis-checkpoint.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sentiment analysis of Guardian World News articles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get articles from a website" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install rss parser dependency" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install feedparser" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parsing RSS feed for world news" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import feedparser\n", + "feed_url = \"https://www.theguardian.com/world/rss\"\n", + "feed = feedparser.parse(feed_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "for item in feed.entries:\n", + " # sanitize html\n", + " item.description = re.sub('<[^<]+?>', '', item.description)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install Vader Sentiment library and perform sentiment analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install vaderSentiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", + "analyser = SentimentIntensityAnalyzer()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sentiment_results = []\n", + "for item in feed.entries:\n", + " sentiment_title = analyser.polarity_scores(item.title)\n", + " sentiment_description = analyser.polarity_scores(item.description)\n", + " sentiment_results.append([sentiment_title['compound'], sentiment_description['compound']])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install Matplotlib and visualize compound score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "plt.rcParams['figure.figsize'] = (15, 3)\n", + "plt.plot(sentiment_results, drawstyle='steps')\n", + "plt.title('Sentiment analysis relationship between title and description (Guardian World News)')\n", + "plt.legend(['title', 'description'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/static/sentiment-analysis/guardian-sa-title-desc-relationship.png b/static/sentiment-analysis/guardian-sa-title-desc-relationship.png new file mode 100644 index 0000000..7195bbf Binary files /dev/null and b/static/sentiment-analysis/guardian-sa-title-desc-relationship.png differ diff --git a/static/sentiment-analysis/sentiment-analysis.ipynb b/static/sentiment-analysis/sentiment-analysis.ipynb new file mode 100644 index 0000000..2c0934c --- /dev/null +++ b/static/sentiment-analysis/sentiment-analysis.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sentiment analysis of Guardian World News articles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get articles from a website" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install rss parser dependency" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install feedparser" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parsing RSS feed for world news" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import feedparser\n", + "feed_url = \"https://www.theguardian.com/world/rss\"\n", + "feed = feedparser.parse(feed_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "for item in feed.entries:\n", + " # sanitize html\n", + " item.description = re.sub('<[^<]+?>', '', item.description)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install Vader Sentiment library and perform sentiment analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install vaderSentiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", + "analyser = SentimentIntensityAnalyzer()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sentiment_results = []\n", + "for item in feed.entries:\n", + " sentiment_title = analyser.polarity_scores(item.title)\n", + " sentiment_description = analyser.polarity_scores(item.description)\n", + " sentiment_results.append([sentiment_title['compound'], sentiment_description['compound']])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install Matplotlib and visualize compound score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "plt.rcParams['figure.figsize'] = (15, 3)\n", + "plt.plot(sentiment_results, drawstyle='steps')\n", + "plt.title('Sentiment analysis relationship between title and description (Guardian World News)')\n", + "plt.legend(['title', 'description'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- cgit v1.2.3