import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
nltk.download("vader_lexicon")
[docs]def sentiment_labler(df, col):
"""
Labelling each row in a given column of tweets/text with positive, negative or neutral sentiment.
Parameters
----------
df : pd.DataFrame
A dataframe that has been pre-processed.
col : str
Column name of the column containing tweets in the dataset.
Returns
-------
df : pd.DataFrame
Dataframe contains all tweets the corresponding labels.
Examples
--------
sentiment_labler(df, "text")
"""
sid = SentimentIntensityAnalyzer()
def extract_sentiment(text):
#Only three labels used by NLTK sentiment analyzer: neutral, positive and negative
#each with a score
#The sentiment is calculated based on the compound score with thresholds explained in:
#https://towardsdatascience.com/social-media-sentiment-analysis-in-python-with-vader-no-training-required-4bc6a21e87b8
scores = sid.polarity_scores(text)
if scores["compound"] > 0.05:
return "positive"
elif scores["compound"] < -0.05:
return "negative"
else:
return "neutral"
labelled_df = df.assign(sentiment=df[col].apply(extract_sentiment))
return labelled_df