1{
  2 "cells": [
  3  {
  4   "cell_type": "markdown",
  5   "metadata": {},
  6   "source": [
  7    "# Sentiment analysis of Guardian World News articles"
  8   ]
  9  },
 10  {
 11   "cell_type": "markdown",
 12   "metadata": {},
 13   "source": [
 14    "## Get articles from a website"
 15   ]
 16  },
 17  {
 18   "cell_type": "markdown",
 19   "metadata": {},
 20   "source": [
 21    "### Install rss parser dependency"
 22   ]
 23  },
 24  {
 25   "cell_type": "code",
 26   "execution_count": null,
 27   "metadata": {},
 28   "outputs": [],
 29   "source": [
 30    "!pip3 install feedparser"
 31   ]
 32  },
 33  {
 34   "cell_type": "markdown",
 35   "metadata": {},
 36   "source": [
 37    "### Parsing RSS feed for world news"
 38   ]
 39  },
 40  {
 41   "cell_type": "code",
 42   "execution_count": null,
 43   "metadata": {},
 44   "outputs": [],
 45   "source": [
 46    "import feedparser\n",
 47    "feed_url = \"https://www.theguardian.com/world/rss\"\n",
 48    "feed = feedparser.parse(feed_url)"
 49   ]
 50  },
 51  {
 52   "cell_type": "code",
 53   "execution_count": null,
 54   "metadata": {},
 55   "outputs": [],
 56   "source": [
 57    "import re\n",
 58    "for item in feed.entries:\n",
 59    "    # sanitize html\n",
 60    "    item.description = re.sub('<[^<]+?>', '', item.description)"
 61   ]
 62  },
 63  {
 64   "cell_type": "markdown",
 65   "metadata": {},
 66   "source": [
 67    "### Install Vader Sentiment library and perform sentiment analysis"
 68   ]
 69  },
 70  {
 71   "cell_type": "code",
 72   "execution_count": null,
 73   "metadata": {},
 74   "outputs": [],
 75   "source": [
 76    "!pip3 install vaderSentiment"
 77   ]
 78  },
 79  {
 80   "cell_type": "code",
 81   "execution_count": null,
 82   "metadata": {},
 83   "outputs": [],
 84   "source": [
 85    "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n",
 86    "analyser = SentimentIntensityAnalyzer()"
 87   ]
 88  },
 89  {
 90   "cell_type": "code",
 91   "execution_count": null,
 92   "metadata": {},
 93   "outputs": [],
 94   "source": [
 95    "sentiment_results = []\n",
 96    "for item in feed.entries:\n",
 97    "    sentiment_title = analyser.polarity_scores(item.title)\n",
 98    "    sentiment_description = analyser.polarity_scores(item.description)\n",
 99    "    sentiment_results.append([sentiment_title['compound'], sentiment_description['compound']])"
100   ]
101  },
102  {
103   "cell_type": "markdown",
104   "metadata": {},
105   "source": [
106    "### Install Matplotlib and visualize compound score"
107   ]
108  },
109  {
110   "cell_type": "code",
111   "execution_count": null,
112   "metadata": {},
113   "outputs": [],
114   "source": [
115    "!pip3 install matplotlib"
116   ]
117  },
118  {
119   "cell_type": "code",
120   "execution_count": null,
121   "metadata": {},
122   "outputs": [],
123   "source": [
124    "import matplotlib.pyplot as plt"
125   ]
126  },
127  {
128   "cell_type": "code",
129   "execution_count": null,
130   "metadata": {},
131   "outputs": [],
132   "source": [
133    "%matplotlib inline\n",
134    "plt.rcParams['figure.figsize'] = (15, 3)\n",
135    "plt.plot(sentiment_results, drawstyle='steps')\n",
136    "plt.title('Sentiment analysis relationship between title and description (Guardian World News)')\n",
137    "plt.legend(['title', 'description'])\n",
138    "plt.show()"
139   ]
140  },
141  {
142   "cell_type": "code",
143   "execution_count": null,
144   "metadata": {},
145   "outputs": [],
146   "source": []
147  }
148 ],
149 "metadata": {
150  "kernelspec": {
151   "display_name": "Python 3",
152   "language": "python",
153   "name": "python3"
154  },
155  "language_info": {
156   "codemirror_mode": {
157    "name": "ipython",
158    "version": 3
159   },
160   "file_extension": ".py",
161   "mimetype": "text/x-python",
162   "name": "python",
163   "nbconvert_exporter": "python",
164   "pygments_lexer": "ipython3",
165   "version": "3.7.3"
166  }
167 },
168 "nbformat": 4,
169 "nbformat_minor": 4
170}