import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob
import plotly.graph_objects as go
import re
trump = pd.read_csv('Archive/hashtag_donaldtrump.csv', lineterminator='\n')
biden = pd.read_csv('Archive/hashtag_joebiden.csv',lineterminator='\n')
Drop useles columns
trump = trump.drop(columns = ['tweet_id','collected_at','user_description','collected_at'])
biden = biden.drop(columns = ['tweet_id','collected_at','user_description','collected_at'])
trump.loc[:,'candidate'] = 'Trump'
biden.loc[:,'candidate'] = 'Biden'
Data_Mixed = pd.concat([trump,biden])
Data_Mixed.sort_values(by='created_at')
Data_Mixed.head()
def clean(text):
'''Make text lowercase, remove text in square brackets,remove links,remove punctuation
and remove words containing numbers.'''
text = str(text).lower()
text = re.sub('\[.*?\]', '', text)
text = re.sub('https?://\S+|www\.\S+', '', text)
text = re.sub('<.*?>+', '', text)
text = re.sub('\n', '', text)
text = re.sub('\w*\d\w*', '', text)
text=re.sub(r'@[A-Za-z0-9]+','',text)
text=re.sub(r'#','',text)
text=re.sub(r'RT[\s]+','',text)
text=re.sub(r'[^\w]', ' ', text)
return text
# create fuction to get the subjectivity and polarity
def getSubjectivity(text):
return TextBlob(text).sentiment.subjectivity
def getPolarity(text):
return TextBlob(text).sentiment.polarity
def getAnalysis(score):
if score < 0:
return 'negative'
elif score==0:
return 'neutral'
else:
return 'positive'
Trump_Tweets = Data_Mixed.query('(candidate == "Trump")').sort_values('user_followers_count',ascending = False).drop_duplicates(['tweet'])[['tweet','country','likes','retweet_count','source','created_at','continent','state','lat','long','city','candidate']]
Trump_Tweets.reset_index(inplace = True, drop = True)
Trump_Tweets['ClearTweet'] = Trump_Tweets['tweet'].apply(clean)
Trump_Tweets['subjectivity']= Trump_Tweets['ClearTweet'].apply(getSubjectivity)
Trump_Tweets['polarity'] = Trump_Tweets['ClearTweet'].apply(getPolarity)
Trump_Tweets['analysis'] = Trump_Tweets['polarity'].apply(getAnalysis)
Trump_Tweets.head()
Trump_Tweets.to_csv('Archive/sent_donaldtrump.csv')
Biden_Tweets = Data_Mixed.query('(candidate == "Biden")').sort_values('user_followers_count',ascending = False)[['tweet','country','likes','retweet_count','source','created_at','continent','state','lat','long','city','candidate']]
Biden_Tweets.reset_index(inplace = True, drop = True)
Biden_Tweets['ClearTweet'] = Biden_Tweets['tweet'].apply(clean)
Biden_Tweets['subjectivity']= Biden_Tweets['ClearTweet'].apply(getSubjectivity)
Biden_Tweets['polarity'] = Biden_Tweets['ClearTweet'].apply(getPolarity)
Biden_Tweets['analysis'] = Biden_Tweets['polarity'].apply(getAnalysis)
Biden_Tweets.head()
Biden_Tweets.to_csv('Archive/sent_biden.csv')