diff --git a/main.py b/main.py
new file mode 100644
index 0000000..d84d787
--- /dev/null
+++ b/main.py
@@ -0,0 +1,593 @@
+#import dependencies#import 
+import re
+import time
+import requests
+import tweepy
+import json
+
+
+#math/data
+import numpy as np
+import math
+import pandas as pd
+import seaborn as sns
+import pandas as pd
+import matplotlib.pyplot as plt
+from seaborn import heatmap
+from wordcloud import WordCloud
+import collections
+
+import wikipedia as wp
+from pygeocoder import Geocoder
+from googletrans import Translator
+
+# Import and Initialize Sentiment Analyzer
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+analyzer = SentimentIntensityAnalyzer()
+
+from bs4 import BeautifulSoup
+from datetime import datetime
+from datetime import date, timedelta
+
+
+#from config import consumer_key, consumer_secret, access_token, access_token_secret
+from apikeys import twitterAccessToken as access_token
+from apikeys import twitterAccessTokenSecret as access_token_secret
+from apikeys import twitterConsumerKey as consumer_key
+from apikeys import twitterConsumerSecretKey as consumer_secret
+newsKey = '3327bb97745b4f24b875a9bbc9154270'
+
+
+# Setup Tweepy API Authentication# Setup 
+auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
+auth.set_access_token(access_token, access_token_secret)
+api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())
+
+
+
+
+def parse_url( url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.text, 'lxml')
+    listylist=[]
+    for table in soup.find_all('table'):
+        listylist.append(parse_html_table(table))
+    return listylist
+
+def parse_html_table( table):
+    n_columns = 0; n_rows=0; column_names = []
+
+    # Find number of rows and columns
+    # we also find the column titles if we can
+    for row in table.find_all('tr'):
+
+        # Determine the number of rows in the table
+        td_tags = row.find_all('td')
+        if len(td_tags) > 0:
+            n_rows+=1
+            if n_columns == 0:
+                # Set the number of columns for our table
+                n_columns = len(td_tags)
+
+        # Handle column names if we find them
+        th_tags = row.find_all('th') 
+        if len(th_tags) > 0 and len(column_names) == 0:
+            for th in th_tags:
+                column_names.append(th.get_text())
+
+    # Safeguard on Column Titles
+    if len(column_names) > 0 and len(column_names) != n_columns:
+        raise Exception("Column titles do not match the number of columns")
+
+    columns = column_names if len(column_names) > 0 else range(0,n_columns)
+    df = pd.DataFrame(columns = columns,
+                      index= range(0,n_rows))
+    row_marker = 0
+    for row in table.find_all('tr'):
+        column_marker = 0
+        columns = row.find_all('td')
+        for column in columns:
+            df.iat[row_marker,column_marker] = column.get_text()
+            column_marker += 1
+        if len(columns) > 0:
+            row_marker += 1
+
+    # Convert to float if possible
+    for col in df:
+        try:
+            df[col] = df[col].astype(float)
+        except ValueError:
+            pass
+
+    return df
+
+def getCountryLanguages():
+    #TODO: Use the .apply to just change the table to one dialect. Imrpove language scope later.
+    df = parse_url('https://www.infoplease.com/world/countries-world/languages-spoken-each-country-world')
+    countryLanguages = df[0].rename(columns={0:'country',1:'language'}).set_index('country')
+    countryLanguages['language'] = [re.sub('\d+|%|\(.*\)|\s','',i).split(',')[0].split(';')[0] for i in countryLanguages['language']]
+    return countryLanguages
+
+
+#returns hashtag, followers, following, text, geo, date
+#cityCountry example: 'paris,france'
+def SearchForData(search_term, nTweets, cityCountry='',radius=100, numDays=1):
+    
+    # Setup Tweepy API Authentication
+    #auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
+    #auth.set_access_token(access_token, access_token_secret)
+    #api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())
+    
+    #--- Calculate geocordinates from cityCountry --- --- ---- ---- --- --- --- --
+    if cityCountry:
+        geoConvertTries = 0
+        while True:
+            try:
+                result = Geocoder.geocode(cityCountry)
+            except Exception as error:
+                #print('errrooooorrrrr: ',error.message)
+                if 'OVER_QUERY_LIMIT' in str(error):
+                    print('Encountered an error:{0}\nWaiting 20 seconds and trying again.'.format(error))
+                    time.sleep(20)
+                    if geoConvertTries>10:
+                        print("Could not convert geo. returning empty list")
+                        return []
+                else:
+                    print("Could not convert geo. returning empty list")
+                    return []
+            else:
+                break
+        geoConvertTries+=1
+        # 34.0934,56.134,50mi
+        coords = str(result[0].coordinates).replace('(','').replace(')','') + f',{radius}mi'
+        coords=coords.replace(' ','')
+        print(cityCountry, ": ", coords)
+    #--- ---- ----- ---- ---- ---- ---- ---- --- ---- ---- --- ---- --- ---- --- --
+
+    #--- grab tweets --- ---- ---- ---- ---- ---- ---- ---- --- --- ---- ---- ----
+    maxTweets = 10000; oldest_tweet = None; unique_ids = []; desiredTweets = [];nTweetsPerDay=nTweets/numDays
+    for day,num in zip([str(date.today() - timedelta(i)).split()[0] for i in range(numDays)], range(1,numDays+1)):
+        tweetsPerDay=[];tries=1
+        while len(desiredTweets) < min(nTweetsPerDay*num , maxTweets/numDays):
+            
+            #--- determine whether to grab tweets by geo or not --- ---- --- ----- --
+            while True:
+                try:
+                    if cityCountry:
+                        tweetsPerDay = api.search(search_term, count=nTweetsPerDay, result_type="recent", max_id=oldest_tweet, geocode=coords, until=day)
+                    else:
+                        tweetsPerDay = api.search(search_term, count=nTweetsPerDay, result_type="recent", max_id=oldest_tweet, until=day)
+                    time.sleep(2)
+                except Exception as error:
+                    print(error,'Trying again after 1 minute.')
+                    time.sleep(60)
+                else:
+                    break
+            
+            #---- ----- ----- ---- ----- ---- ----- ---- ----- ---- ---- ---- ---- --
+
+            #--- Dont go through an infinite loop trying to fill tweets that don't exist -----
+            if len(tweetsPerDay['statuses'])==0:
+                print(f'No tweets returned while searching for \'{search_term}\'',len(desiredTweets)\
+                     ,' ',day)
+                if desiredTweets==[]:
+                    return pd.DataFrame([{'text':'', 'vader':0,
+                                          'location':cityCountry, 'geo':coords,
+                                          'hashtags':0, 'followers':0,
+                                         'friends_count':0,'statuses_count':0,
+                                          'created_at':datetime.strptime(str(date.today()), '%Y-%m-%d')}])
+                return pd.DataFrame(desiredTweets)
+
+            #--- Append relevent tweets to output listy list ---- --- ---- ---- ---- --- ---
+            for tweet in tweetsPerDay['statuses']:
+                # Append tweet_id to ids list if it doesn't already exist. This allows checking for duplicate tweets
+                if tweet["id"] not in unique_ids :
+                    unique_ids.append(tweet['id'])
+                    if cityCountry:
+                        desiredTweets.append({'text':tweet['text'], 'vader':analyzer.polarity_scores(tweet['text'])['compound'],
+                                          'location':cityCountry, 'geo':coords,
+                                          'hashtags':tweet['entities']['hashtags'], 'followers':tweet['user']['followers_count'],
+                                         'friends_count':tweet['user']['friends_count'],'statuses_count':tweet['user']['statuses_count'],
+                                          'created_at':datetime.strptime(tweet['created_at'],'%a %b %d %H:%M:%S %z %Y')})
+                    else:
+                        desiredTweets.append({'text':tweet['text'], 'vader':analyzer.polarity_scores(tweet['text'])['compound'],
+                                          'hashtags':tweet['entities']['hashtags'], 'followers':tweet['user']['followers_count'],
+                                         'friends_count':tweet['user']['friends_count'],'statuses_count':tweet['user']['statuses_count'],
+                                          'created_at':datetime.strptime(tweet['created_at'],'%a %b %d %H:%M:%S %z %Y')})
+                
+                # Reassign the the oldest tweet (i.e. the max_id) subtract 1 so the previous oldest isn't included
+                oldest_tweet = tweet["id"] - 1
+            print(f'{day}, {cityCountry}: attempt {tries}, {len(desiredTweets)} tweets gathered.');tries+=1
+
+    #--- Print sample tweet --- --- ---- ---- --- ---- ---- --- ---- ---- ---
+    translator = Translator()
+    try:
+        print ('Sample Tweet:',translator.translate(desiredTweets[0]['text'], dest='en').text)
+    except:
+        print('there was an error translating sample tweet: ',desiredTweets[0]['text'])
+    return pd.DataFrame(desiredTweets) if cityCountry else desiredTweets
+
+
+def worldTweets(search_term,numTweets, numDays):
+    #-- Get the most populated cities from wikipedia (Thank you wikipedia library!) --
+    html = wp.page("List_of_cities_by_population_density").html().encode("UTF-8")
+    worldCities = pd.read_html(html)[1]
+    worldCities = worldCities.drop([2,3,4],axis=1)
+    worldCities = worldCities.rename(columns={0:'city',1:'population',5:'density',6:'country'})
+    worldCities = worldCities.iloc[1:]
+    worldCities['population'] = [int(city.split('\xa0')[-1].split('[')[0].replace(',','')) for city in worldCities['population']]
+    worldCities['density'] = [int(city.split('\xa0')[-1].split('[')[0].replace(',','')) for city in worldCities['density']]
+    #--- ---- ----- ----- ---- ---- ----- ---- ---- ----- --- ---- ---- ----- ---- ---
+    
+    #--- Get tweets by Worlds most densily populated cities ---- --- ---- ---- --- ---
+    translator = Translator()
+    comparisons=pd.DataFrame(columns=['time density','sentiment'])
+    cityCount = 10
+    for index,row in worldCities.iterrows():
+        #-- location --- ----- --- ----
+        city,pop,density,country = row
+        cityCountry = city+' , '+country
+
+        #-- language conversion --- ---- --
+        languagesDf = getCountryLanguages()
+        if translateToLocalLanguage:
+            try:
+                translatedSearch = translator.translate(search_term, src='en', dest=languagesDf.loc[country,'language']).text
+            except ValueError:
+                print("could not translate ", languagesDf.loc[country,'language'])
+                translatedSearch=search_term
+                print('translated word: ',translatedSearch)
+        else:
+            translatedSearch=search_term
+        #--- --- --- ---- ---- --- --- ---
+        
+        try:
+            tweetsWorld = pd.concat([tweetsWorld, SearchForData(translatedSearch, numTweets, cityCountry, 100)], axis=0)
+        except:
+            tweetsWorld = SearchForData(translatedSearch, numTweets, cityCountry, 100)
+        print('\n')
+        time.sleep(4)
+        #if cityCount==0:
+        #    break
+        #else:
+        #    cityCount-=1
+    return tweetsWorld
+
+def usTweets(search_term, numTweets, numDays):
+    #--- population per cities in United States --- ---- ---- --- ---- --- --- --- ---
+    html = wp.page("List_of_United_States_cities_by_population_density").html().encode("UTF-8")
+    UScities = pd.read_html(html)[1]
+    UScities = UScities.drop([0,2,4,6,8],axis=1)
+    UScities = UScities.rename(columns={1:'city',3:'state',5: 'land area (mi^2)',7:'density'})
+    UScities = UScities.iloc[1:]
+    #df['population']=[int(city.split('\xa0')[-1].split('[')[0].replace(',','')) for city in df['population']]
+    UScities['density'] = [float(city.split('\xa0')[-1].split('[')[0].replace(',','')) for city in UScities['density']]
+    UScities['land area (mi^2)']=[float(area.split('\xa0')[-1]) for area in UScities['land area (mi^2)']]
+    #--- ---- ----- ----- ---- ---- ----- ---- ---- ----- --- ---- ---- ----- ---- ---
+    
+    
+    
+    #--- Add US Cities --- ---- ---- ---- ---- ---
+    cityCount = 100;nottheseones = 0
+    for index,row in UScities.iterrows():
+        if nottheseones<0:
+            nottheseones+=1
+            pass
+        #-- location --- ----- --- ----
+        city,state,area,density = row
+        cityState = state+' , '+city
+
+        try:
+            tweetsUS = pd.concat([tweetsUS, SearchForData(search_term=search_term, nTweets=numTweets, cityCountry=cityState,radius=max(area,5), numDays=numDays )], axis=0)
+        except:
+            tweetsUS = SearchForData(search_term=search_term, nTweets=numTweets, cityCountry=cityState,radius=max(area,5), numDays=numDays)
+        
+        print('\n')
+        time.sleep(4)
+        if cityCount==0:
+            break
+        else:
+            cityCount-=1
+    return tweetsUS, UScities
+
+
+def GetTweetsByPopularCities(search_term, numTweets, translateToLocalLanguage = True, numDays = 1):
+    #tweetsWorld = worldTweets()
+    tweetsUS, UScities = usTweets(search_term, numTweets, numDays)
+    #return tweetsUS
+    tweetsUS['day']=[time.date() if time!= 0 else 0 for time in tweetsUS['created_at']]
+    
+    
+    #Calculate time density of tweets per city
+    cityData = tweetsUS.groupby(['location','geo','day']).mean()
+    aveTimes={}
+    for date in tweetsUS.groupby(['location','geo','day'])['created_at']:
+        if date == None:
+            aveTimes[cityTime]=None
+        cityTime, timeseries = date
+        (cityState,geo,timeobj)=cityTime
+        #print(cit)
+        [state,city]=cityState.split(',')
+        city=city.lstrip().rstrip(); state=state.rstrip().lstrip()
+        cityDensity=UScities.loc[UScities['city']==city].loc[UScities['state']==state].loc[:,'density'].values[0]
+        #return timeseries
+        try:
+            aveTimes[cityTime]=np.mean([(b-a).seconds*100/cityDensity for a,b in zip(timeseries.tolist()[1:],timeseries.tolist()[0:-1])])
+            if not aveTimes[cityTime]:
+                aveTimes[cityTime]=0
+        except:
+            if len(timeseries.tolist())<2:
+                aveTimes[cityTime]=None
+        #print(f'{city}, {cityDensity}, {aveTimes[cityState]}')
+    #KeyError: ('New Jersey , Guttenberg', datetime.date(2018, 6, 30))
+    cityData['time between tweets']=[aveTimes[citytime] for citytime in cityData.index]
+    
+    #c_matrix = pd.pivot_table(news_data_new, "score", index=["name"], columns=["year_month"], aggfunc=np.mean)                          
+    matrix=pd.pivot_table(cityData.loc[:,['location','day','vader']].drop(columns=['location', 'day']),'vader',index=['location'],columns=['day'],aggfunc=np.mean)
+    plt.figure(figsize=(12,12))
+    hm = heatmap(matrix, cmap='RdYlGn', annot=True)
+    hm.set_title('City Tweet Sentiment Over Time')
+    plt.savefig('/plots/cityTweetSentimentOverTime.png',dpi=100)
+    plt.show()
+    return cityData, tweetsUS
+
+
+def getSentimentsByPopularUsers(target_term):
+    # Get 100 recent tweets and create lists of tweets, sentiments, followers and retweets
+    public_tweets = api.search(target_term, count=100, lang="en", result_type="recent")
+    tweets = []
+    sentiments = []
+    followers = []
+    retweets = []
+    for tweet in public_tweets['statuses']:
+        tweets.append(tweet['text'])
+        followers.append(tweet['user']['followers_count'])
+        retweets.append(tweet['retweet_count'])
+        result = analyzer.polarity_scores(tweet["text"])
+        sentiments.append(result['compound'])
+    # Create a dataframe by combining all the lists    
+    combined_df = pd.DataFrame({'Followers':followers,'Sentiments':sentiments, 'Retweets':retweets, 'Tweets':tweets})
+    
+    # Calculate average sentiment of top 10 users with most followers
+    top_10_followers = combined_df.nlargest(10, 'Followers')
+    top_10_followers_average = round(top_10_followers['Sentiments'].mean(),3)
+
+    top_10_retweets = combined_df.nlargest(10, 'Retweets')
+    top_10_retweets_average = round(top_10_retweets['Sentiments'].mean(),3)
+
+    overal_average = round(combined_df['Sentiments'].mean(),3)
+    
+    # Print analysis
+    print('Average overall sentiment is ' + str(overal_average))
+    print('Sentiment of top 10 influential people is '+str(top_10_followers_average))
+    print('Sentiment of top 10 retweeted tweets is '+str(top_10_retweets_average))
+
+    if top_10_followers_average>overal_average:
+        print('Top 10 influential people have more positive sentiments than general public about '+target_term)
+    else:
+        print('General public has more positive sentiments than top 10 influential people about '+target_term)
+    
+    #Draw bar chart for sentiments
+    plt.figure(figsize= (15,10))    
+    titles = ['Influential Users','Retweeted Most','Overall average']
+    sent = [top_10_followers_average, top_10_retweets_average, overal_average]
+    plt.bar(titles, sent)
+
+    plt.title(f"Sentiment Analysis {time.strftime('%x %X')}")
+    plt.ylabel("Tweet Polarity")
+    plt.axhline(0, color = 'black')
+    plt.savefig("/plots/output_9_1.png")
+    #plt.ylim (-1.0,1.0)
+    plt.show()
+    
+    # Draw scatter chart for sentiments, number of followers and number of retweets    
+    # Remove users with top 20 follower count to allow more meaningful visualization
+    combined_df1 = combined_df.nsmallest(80, 'Followers')
+    plt.figure(figsize= (15,10))
+    plt.scatter(combined_df1['Followers'], combined_df1['Sentiments'], color="lightskyblue", edgecolor="black"
+                , label="Number of retweets", alpha=0.5, s=combined_df1['Retweets'], linewidths=1)
+    plt.title(f"Sentiment Analysis based on Number of Followers {time.strftime('%x %X')}")
+    plt.xlabel("Number of Followers")
+    plt.ylabel("Tweet Polarity")
+    plt.ylim (-1.0,1.0)
+
+    lgnd=plt.legend(loc="upper right", title="Sentiment Analysis based on Number of Followers", scatterpoints=1)
+    lgnd.legendHandles[0]._sizes = [50]
+    lgnd.legendHandles[0]._sizes = [50]
+    lgnd.legendHandles[0]._sizes = [50]
+    plt.savefig("/plots/output_8_1.png")
+    plt.show()
+    
+def bar_news(news_data):
+    '''takes a DataFrame as an argument, 
+    creates a bar chart "Overal Quantity of Articles per News Source"
+    ''' 
+    #group data by News Sources names
+    group_news=news_data.groupby("name")
+    group_count=group_news['published_at'].count().rename("count")
+
+    df_group_count=pd.DataFrame(group_count)
+    df_group_count.sort_values(by=['count'], inplace=True, ascending=False)
+    df_group_count.reset_index(inplace=True)
+    
+    #create a bar chart to show quantity of articles for each news source 
+    group_name=df_group_count['name']
+    y_pos=np.arange(len(df_group_count['count']))
+    plt.figure(figsize=(10, 7))
+    plt.barh(y_pos, df_group_count['count'], tick_label=group_name, alpha=0.6, color="green")
+    plt.title(f"Overal Quantity of Articles per News Source, 09/01/17-{time.strftime('%x')}")
+    plt.xlabel("Number of Articles")
+    plt.savefig("/plots/news_bar_chart.png", dpi=100, bbox_inches='tight')
+    plt.show()
+    
+    #add analysis for bar chart
+    q=len(df_group_count['count'])-1
+    name_min=df_group_count.iloc[q,0]
+    min_q=df_group_count.iloc[q,1]
+    
+    name_max=df_group_count.iloc[0,0]
+    max_q=df_group_count.iloc[0,1]
+    print(f'Based on the analysis of "Overal Quantity of Articles per News Source" chart, we can notice that {name_max} published the maximum number of articles ({max_q}).' ) 
+    print(f'{name_min} published the minimal quantity of articles ({min_q}) related to the given keywords.')
+    
+def compound_scores_heat_map(news_data_new):
+    '''takes a DataFrame with compound scores as an argument,
+    creates a heatmap chart for average compound scores.
+    Since some news sources didn't publish articles on particular months,
+    using pd.pivot_table gives us a 2D Series where missing values are replaced with NaN.
+    '''
+    c_matrix = pd.pivot_table(news_data_new, "score", index=["name"], columns=["year_month"], aggfunc=np.mean)                          
+    c_matrix=c_matrix.round(2)
+    
+    #create a heatmap chart of the average compound scores
+    fig = plt.figure(figsize=(12,12))
+    hm = sns.heatmap(c_matrix, cmap='RdYlGn', annot=True)
+    hm.set_title(f"Heatmap of Avg. Compound Scores, 09/01/17-{time.strftime('%x')}")
+    plt.savefig("/plots/compound_scores_heat_map.png", dpi=100)
+    plt.show()
+
+
+def  compound_score_data(news_data):
+    '''takes a DataFrame as an argument and calculates compound scores for articles description,
+    returns a new DataFrame
+    '''
+    news_data_new=news_data.loc[:,['year_month', 'name', 'description']]
+    for index, row in news_data_new.iterrows():
+        text=news_data_new.loc[index,"description"]
+        if text:
+            results = analyzer.polarity_scores(text)
+            news_data_new.loc[index,"score"]=round(results["compound"],2)
+        else:
+            news_data_new.drop(index, inplace=True)
+    print(news_data_new.head())
+    compound_scores_heat_map(news_data_new)
+
+    
+def count_articles_heat_map(news_data): 
+    '''takes a DataFrame as an argument,
+    creates a heatmap chart "Number of Articles per Month"
+    Since some news sources didn't publish articles on particular months,
+    using pd.pivot_table gives us a 2D Series where missing values are replaced with NaN.
+    '''
+    d_matrix = pd.pivot_table(news_data, "url", index=["name"], columns=["year_month"], aggfunc='count')                          
+    d_matrix.head(20)
+    fig = plt.figure(figsize=(12,12))
+    cm = sns.heatmap(d_matrix, cmap='YlGnBu', annot=True, fmt='g')
+    cm.set_title(f"Number of Articles per Month, 09/01/17-{time.strftime('%x')}")
+    plt.savefig("/plots/count_values_heat_map.png", dpi=100)
+    plt.show()
+
+    
+def news_api_data(user_input):
+    '''takes an argument "user_input" as keywords to perform API calls to newsapi.org,
+    returns a DataFrame with news data
+    ''' 
+   
+    #in case we have several words as user's input, we can mark words or phrases that must appear with a + symbol
+    user_input_list=user_input.split(" ")
+    user_input_plus="".join(f'+{k}' for k in user_input_list)
+    print(user_input_plus)
+
+    url="https://newsapi.org/v2/everything"
+    
+    #specify parameters for sample, i.e results come from the particular news  resources
+    sparams={
+       'apiKey': newsKey,
+       'q': user_input_plus,
+       'language': 'en',
+       'pageSize': 100,
+       'from': '2017-06-01',
+       'sortBy': 'publishedAt',
+       'sources': 'abc-news, associated-press, bbc-news, breitbart-news, cbc-news, cbs-news, cnn,  fox-news, daily-mail, independent, nbc-news, newsweek, new-york-magazine, reuters, time, the-globe-and-mail, the-telegraph, the-washington-times, usa-today'
+    }
+       
+    #make API request and retrieve totalResults value 
+    responses=requests.get(url, params=sparams)
+    
+    st_code=responses.status_code   
+    if st_code!=200: 
+        print(f'API problem, code {responses.status_code}')
+    
+    news_info=responses.json()
+    #print(responses.json())
+    
+    sample_total=news_info['totalResults']
+
+    #if response containes more than 10000 articles, retrive information for 10000.
+    #Otherwise API returns an error 419 - too many requests 
+    if sample_total>10000:
+        sample_total=10000
+    print(f'The total number of results is {sample_total}')
+
+    #calculate the nuber of pages to iterate through (numer of all results is diveded by 100 (i.e. records per page))
+    x=min(80, math.ceil(sample_total/100))
+    print(f'The number of pages is {x}')
+
+    #create DataFrame for retrieved results
+    news_data=pd.DataFrame(columns=["published_at","name", "author", "description", "url","year_month"])
+    index=0
+
+    #iterate through each page and store  results in the DataFrame
+    for page in range(1,x+1):
+        sparams['page']=page
+        responses=requests.get(url, params=sparams)    
+        news_info=responses.json()
+        #print(f'processing page {page} of {x}')
+        if 'articles' not in news_info:
+            #print(f'skipping page {page} of {x}')
+            continue
+        for article in news_info['articles']:
+            news_data.loc[index,"published_at"]=article["publishedAt"]
+            news_data.loc[index,"name"]=article["source"]["name"]
+            news_data.loc[index,"author"]=article["author"]
+            news_data.loc[index,"description"]=article["description"]
+            news_data.loc[index,"url"]=article["url"]
+            news_data.loc[index,"year_month"]=article["publishedAt"][:7]
+            index+=1
+
+    #clean the data from duplicate records 
+    news_data.drop_duplicates(subset=['published_at','name','description'], keep='first', inplace=True)
+
+    news_data.to_csv("report.csv")
+    #news_data.info()
+    bar_news( news_data)
+    compound_score_data(news_data)
+    count_articles_heat_map(news_data)
+    #except:
+    #    print('Query limit reached. Skipping news analysis')
+    
+def getWordCloud(search_term):
+    #searchfordata inputs: search_term, nTweets, cityCountry='',radius=100, numDays=1
+    tweet=SearchForData(search_term,100)
+    # Getting all hashtags in a list
+    hashtags = []
+    text=''
+    for t in range(len(tweet)):
+        for u in (tweet[t]['hashtags']):
+            hashtags.append(u['text'])
+
+    #print(hashtags)
+    # Assigning the list to a string variable since wordcloud needs it in a string variable and can'y use list
+    text=' '.join(hashtags)
+    #print(text)
+    # Using WordCloud library to create a Wordcloud chart
+    wordcloud = WordCloud(background_color='white',max_words=len(text),max_font_size=40,relative_scaling=.15).generate(text)
+
+    plt.figure(figsize=(7,10))
+    plt.imshow(wordcloud)
+    plt.axis('off')
+    plt.savefig('/plots/WordCloud.png',dpi=500)
+    plt.show()
+
+
+def main(search_term):
+    timeStart = time.time()
+    
+    getSentimentsByPopularUsers(search_term)
+    news_api_data(search_term)
+    getWordCloud(search_term)
+    #GetTweetsByPopularCities inputs:        search_term, numTweets, translateToLocalLanguage = True, numDays = 1
+    cityDataMain = GetTweetsByPopularCities(search_term, numTweets=100,translateToLocalLanguage= False, numDays = 6)
+    
+    print(f'Test took {(time.time()-timeStart)//60} minutes')
+    return cityDataMain
diff --git a/plots/WordCloud.png b/plots/WordCloud.png
new file mode 100644
index 0000000..ebf6a1b
Binary files /dev/null and b/plots/WordCloud.png differ
diff --git a/plots/compound_scores_heat_map.png b/plots/compound_scores_heat_map.png
new file mode 100644
index 0000000..8eadecd
Binary files /dev/null and b/plots/compound_scores_heat_map.png differ
diff --git a/plots/count_values_heat_map.png b/plots/count_values_heat_map.png
new file mode 100644
index 0000000..23b6132
Binary files /dev/null and b/plots/count_values_heat_map.png differ
diff --git a/plots/fig.png b/plots/fig.png
new file mode 100644
index 0000000..6b03a4c
Binary files /dev/null and b/plots/fig.png differ
diff --git a/plots/news_bar_chart.png b/plots/news_bar_chart.png
new file mode 100644
index 0000000..468c239
Binary files /dev/null and b/plots/news_bar_chart.png differ
diff --git a/plots/output_8_1.png b/plots/output_8_1.png
new file mode 100644
index 0000000..4d39cb3
Binary files /dev/null and b/plots/output_8_1.png differ
diff --git a/plots/output_9_1.png b/plots/output_9_1.png
new file mode 100644
index 0000000..a7ee1d7
Binary files /dev/null and b/plots/output_9_1.png differ