Source code for twitterpersona.sentiment_analysis

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd

nltk.download("vader_lexicon")

[docs]def sentiment_labler(df, col):
    """
    Labelling each row in a given column of tweets/text with positive, negative or neutral sentiment.

    Parameters
    ----------
    df : pd.DataFrame
        A dataframe that has been pre-processed.
    
    col : str
        Column name of the column containing tweets in the dataset.
    
    Returns
    -------

    df : pd.DataFrame
        Dataframe contains all tweets the corresponding labels.

    Examples
    --------
    sentiment_labler(df, "text")
    """
    sid = SentimentIntensityAnalyzer()

    def extract_sentiment(text):
        #Only three labels used by NLTK sentiment analyzer: neutral, positive and negative
        #each with a score
        #The sentiment is calculated based on the compound score with thresholds explained in:
        #https://towardsdatascience.com/social-media-sentiment-analysis-in-python-with-vader-no-training-required-4bc6a21e87b8
        scores = sid.polarity_scores(text)
        if scores["compound"] > 0.05:
            return "positive"
        elif scores["compound"] < -0.05:
            return "negative"
        else:
            return "neutral"

    labelled_df = df.assign(sentiment=df[col].apply(extract_sentiment))
    return labelled_df


[docs]def count_tweets(df, proportion= True):
    """
    Count the proportion of different sentiment tweets in a labelled sentiment dataframe

    Parameters
    ----------
    df : pd.DataFrame
        dataframe for each sentiment
    
    proportion : bool
        if True: returns the proportion; otherwise, return the counts.
    
    Returns
    -------
    dictionary
        A dictionary which calculates the proportion of three sentiments of tweets.

    Examples
    --------
    labelled_df = sentiment_labler(df, "text")
    count_tweets(labelled_df)
    """

    if proportion:
        sentiment_counts = df['sentiment'].value_counts(normalize = True)
    else:
        sentiment_counts = df['sentiment'].value_counts()

    return dict(sentiment_counts)