who won in US president elections 2020? according to tweets

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob
import plotly.graph_objects as go
import re

Import datasets

In [2]:
trump = pd.read_csv('Archive/hashtag_donaldtrump.csv', lineterminator='\n')
biden = pd.read_csv('Archive/hashtag_joebiden.csv',lineterminator='\n')

Drop useles columns

In [3]:
trump = trump.drop(columns = ['tweet_id','collected_at','user_description','collected_at'])
biden = biden.drop(columns = ['tweet_id','collected_at','user_description','collected_at'])
In [4]:
trump.loc[:,'candidate'] = 'Trump'
biden.loc[:,'candidate'] = 'Biden'
In [5]:
Data_Mixed = pd.concat([trump,biden])
Data_Mixed.sort_values(by='created_at')
Data_Mixed.head()
Out[5]:
created_at tweet likes retweet_count source user_id user_name user_screen_name user_join_date user_followers_count user_location lat long city country continent state state_code candidate
0 2020-10-15 00:00:01 #Elecciones2020 | En #Florida: #JoeBiden dice ... 0.0 0.0 TweetDeck 3.606665e+08 El Sol Latino News elsollatinonews 2011-08-23 15:33:45 1860.0 Philadelphia, PA / Miami, FL 25.774270 -80.193660 NaN United States of America North America Florida FL Trump
1 2020-10-15 00:00:01 Usa 2020, Trump contro Facebook e Twitter: cop... 26.0 9.0 Social Mediaset 3.316176e+08 Tgcom24 MediasetTgcom24 2011-07-08 13:12:20 1067661.0 NaN NaN NaN NaN NaN NaN NaN NaN Trump
2 2020-10-15 00:00:02 #Trump: As a student I used to hear for years,... 2.0 1.0 Twitter Web App 8.436472e+06 snarke snarke 2007-08-26 05:56:11 1185.0 Portland 45.520247 -122.674195 Portland United States of America North America Oregon OR Trump
3 2020-10-15 00:00:02 2 hours since last tweet from #Trump! Maybe he... 0.0 0.0 Trumpytweeter 8.283556e+17 Trumpytweeter trumpytweeter 2017-02-05 21:32:17 32.0 NaN NaN NaN NaN NaN NaN NaN NaN Trump
4 2020-10-15 00:00:08 You get a tie! And you get a tie! #Trump ‘s ra... 4.0 3.0 Twitter for iPhone 4.741380e+07 Rana Abtar - رنا أبتر Ranaabtar 2009-06-15 19:05:35 5393.0 Washington DC 38.894992 -77.036558 Washington United States of America North America District of Columbia DC Trump
In [6]:
def clean(text):
    '''Make text lowercase, remove text in square brackets,remove links,remove punctuation
    and remove words containing numbers.'''
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text=re.sub(r'@[A-Za-z0-9]+','',text)
    text=re.sub(r'#','',text)
    text=re.sub(r'RT[\s]+','',text)
    text=re.sub(r'[^\w]', ' ', text)
    return text
In [7]:
# create fuction to get the subjectivity and polarity
def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity
def getPolarity(text):
    return TextBlob(text).sentiment.polarity
def getAnalysis(score):
    if score < 0:
        return 'negative'
    elif score==0:
        return 'neutral'
    else:
        return 'positive'

DonaldTrump tweets sentiment analysis

In [8]:
Trump_Tweets = Data_Mixed.query('(candidate == "Trump")').sort_values('user_followers_count',ascending = False).drop_duplicates(['tweet'])[['tweet','country','likes','retweet_count','source','created_at','continent','state','lat','long','city','candidate']]


Trump_Tweets.reset_index(inplace = True, drop = True)

Trump_Tweets['ClearTweet'] = Trump_Tweets['tweet'].apply(clean)

Trump_Tweets['subjectivity']= Trump_Tweets['ClearTweet'].apply(getSubjectivity)
Trump_Tweets['polarity']    = Trump_Tweets['ClearTweet'].apply(getPolarity)
Trump_Tweets['analysis']    = Trump_Tweets['polarity'].apply(getAnalysis)

Trump_Tweets.head()
Trump_Tweets.to_csv('Archive/sent_donaldtrump.csv')

JoeBiden tweets sentiment analysis

In [9]:
Biden_Tweets = Data_Mixed.query('(candidate == "Biden")').sort_values('user_followers_count',ascending = False)[['tweet','country','likes','retweet_count','source','created_at','continent','state','lat','long','city','candidate']]

Biden_Tweets.reset_index(inplace = True, drop = True)

Biden_Tweets['ClearTweet'] = Biden_Tweets['tweet'].apply(clean)

 
Biden_Tweets['subjectivity']= Biden_Tweets['ClearTweet'].apply(getSubjectivity)
Biden_Tweets['polarity']    = Biden_Tweets['ClearTweet'].apply(getPolarity)
Biden_Tweets['analysis']    = Biden_Tweets['polarity'].apply(getAnalysis)
Biden_Tweets.head()

Biden_Tweets.to_csv('Archive/sent_biden.csv')