diff --git a/DataSearch.ipynb b/DataSearch.ipynb index 73d2838..c20ae8d 100644 --- a/DataSearch.ipynb +++ b/DataSearch.ipynb @@ -2,7 +2,1018 @@ "cells": [ { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "translated word: aso\n", + "\n", + "Manila , Philippines : 14.5995124,120.9842195,100mi\n", + "there was an error translating sample tweet: @ryapee Hi Rya order ako ulit polyblender mejo winasak ng aso ko. 😂\n", + "\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "{'Manila,Philippines': followers following geo \\\n", + " 0 154060 False None \n", + " 1 83 False None \n", + " 2 643 False None \n", + " 3 2729 False None \n", + " 4 25 False None \n", + " 5 116 False None \n", + " 6 113 False None \n", + " 7 130 False None \n", + " 8 54 False None \n", + " 9 24 False None \n", + " 10 841 False None \n", + " 11 260 False None \n", + " 12 260 False None \n", + " 13 176 False None \n", + " 14 1357 False None \n", + " 15 422 False None \n", + " 16 422 False None \n", + " 17 365 False None \n", + " 18 75 False None \n", + " 19 1021 False None \n", + " 20 47 False None \n", + " 21 147 False None \n", + " 22 269 False None \n", + " 23 548 False None \n", + " 24 88 False None \n", + " 25 394 False None \n", + " 26 105 False None \n", + " 27 174 False None \n", + " 28 90 False None \n", + " 29 850 False None \n", + " .. ... ... ... \n", + " 70 80 False None \n", + " 71 180 False None \n", + " 72 1280 False None \n", + " 73 74 False None \n", + " 74 72 False None \n", + " 75 365 False None \n", + " 76 301 False None \n", + " 77 561 False None \n", + " 78 180 False None \n", + " 79 329 False None \n", + " 80 5 False None \n", + " 81 797 False None \n", + " 82 293 False None \n", + " 83 293 False None \n", + " 84 84 False None \n", + " 85 293 False None \n", + " 86 440 False None \n", + " 87 476 False None \n", + " 88 35 False None \n", + " 89 183 False None \n", + " 90 62 False None \n", + " 91 44 False None \n", + " 92 167 False None \n", + " 93 536 False None \n", + " 94 3228 False {'type': 'Point', 'coordinates': [14.61941886,... \n", + " 95 242 False None \n", + " 96 473 False None \n", + " 97 220 False None \n", + " 98 141 False None \n", + " 99 200 False None \n", + " \n", + " hashtags statuses_count \\\n", + " 0 [] 22455 \n", + " 1 [] 3030 \n", + " 2 [{'text': 'AdoptDontShop', 'indices': [38, 52]}] 8529 \n", + " 3 [] 28771 \n", + " 4 [] 14 \n", + " 5 [] 1993 \n", + " 6 [] 4495 \n", + " 7 [] 1745 \n", + " 8 [] 727 \n", + " 9 [] 296 \n", + " 10 [] 29025 \n", + " 11 [] 8486 \n", + " 12 [] 8486 \n", + " 13 [] 4682 \n", + " 14 [] 15736 \n", + " 15 [] 21045 \n", + " 16 [] 21045 \n", + " 17 [] 2083 \n", + " 18 [] 243 \n", + " 19 [] 11119 \n", + " 20 [] 905 \n", + " 21 [] 3406 \n", + " 22 [] 21310 \n", + " 23 [] 3553 \n", + " 24 [] 1470 \n", + " 25 [] 67411 \n", + " 26 [] 417 \n", + " 27 [] 4033 \n", + " 28 [] 828 \n", + " 29 [] 11211 \n", + " .. ... ... \n", + " 70 [] 1208 \n", + " 71 [] 6216 \n", + " 72 [] 5483 \n", + " 73 [] 2055 \n", + " 74 [] 456 \n", + " 75 [] 2083 \n", + " 76 [] 4788 \n", + " 77 [] 13927 \n", + " 78 [] 6718 \n", + " 79 [] 1259 \n", + " 80 [] 460 \n", + " 81 [] 42407 \n", + " 82 [] 3881 \n", + " 83 [] 3881 \n", + " 84 [] 15631 \n", + " 85 [] 3881 \n", + " 86 [] 1375 \n", + " 87 [] 7158 \n", + " 88 [] 1902 \n", + " 89 [] 1154 \n", + " 90 [] 756 \n", + " 91 [] 64 \n", + " 92 [] 2591 \n", + " 93 [] 6135 \n", + " 94 [] 10223 \n", + " 95 [] 4463 \n", + " 96 [] 5234 \n", + " 97 [] 10928 \n", + " 98 [] 12781 \n", + " 99 [] 8882 \n", + " \n", + " text \n", + " 0 @ryapee Hi Rya order ako ulit polyblender mejo... \n", + " 1 Yung dating saling pusa naging aso bigla // 🎶 \n", + " 2 sana meron din dito sa Pilipinas yung #AdoptDo... \n", + " 3 Aso nga kasi ako, bantay ako dito hahaha 😂 \n", + " 4 lakas mangahol ng kaklase ko dinaig pa aso nam... \n", + " 5 @LampanoElla Dun sa aso oo HAHA \n", + " 6 Me: labas mo dila para lumabas dila ng aso \\nS... \n", + " 7 cute kong aso https://t.co/twDprm7o5P \n", + " 8 me: pabili pong dog food\\ntindero: alin? ung p... \n", + " 9 @dsgalarpez hahahaha aso ka na ba ngayon? \n", + " 10 @DenniceRoselle Uy kawawa mga aso. Di naman si... \n", + " 11 Nagtanggal tuloy ako nang mga tae nang aso kai... \n", + " 12 Nakakapikon ung aso 😭😭 \n", + " 13 May mga sakit aso namin hanep \n", + " 14 i hate when strangers esp. men look at you str... \n", + " 15 @dnnkthryn Ngek malas. Sa rosewood naman okay ... \n", + " 16 @dnnkthryn Yup. Iba talaga pag bahay super fre... \n", + " 17 Tang ina sobrang iba pala pakiramdam pag namat... \n", + " 18 aso't pusa ❤\\ngoodmorning hubby babe ! https:/... \n", + " 19 RT @akoposimarcelo: Yung buti pa yung mga aso,... \n", + " 20 napaka clingy ng aso ko, nebeyen hehe \n", + " 21 ang laki ng aso!!! panay naka tahol \n", + " 22 Alam ata ng aso ko na birthday ko ngayon. Iba ... \n", + " 23 Nagduduet nanaman yung dalawang aso hays \n", + " 24 Yung aso naming maligalig na palundag lundag pa \n", + " 25 Mukha talaga akong tanga kapag nakikipaglaro a... \n", + " 26 RT @akoposimarcelo: Yung buti pa yung mga aso,... \n", + " 27 nagdisitahulan mga aso ang creepy huhu \n", + " 28 @glbysrcmny aso \n", + " 29 Distemper virus. may ganyan plang sakit nang m... \n", + " .. ... \n", + " 70 Plus two agad aso namen AAHAHAHAHAH saya \n", + " 71 @jhnlstrpgnsn Hahaha hindi ko aso yun sa ate k... \n", + " 72 RT @akoposimarcelo: Yung buti pa yung mga aso,... \n", + " 73 Mama: Tanga Di Mo Pa Pinapakain Yung Aso.\\n\\nT... \n", + " 74 @cescamarii di yan kusa aso yan \n", + " 75 Nasagasaan aso ko 😢 \n", + " 76 Ngayon ko nalang na appreciate ulit yung ganda... \n", + " 77 pag gantong nalulungkot ako imbis na maghanap ... \n", + " 78 creepy ng aso pero mahal ko kayo HAHAHHHAHAHA \n", + " 79 Pagod bebi ko ako inaantok na tagal ni aso HAH... \n", + " 80 So ayun diba may hamster kami si Luxus saka si... \n", + " 81 @Anniefernando6 @aldenAllTheWay Baka kayo ang ... \n", + " 82 hindi ako to pramis. hahahahahahaha hindi ako ... \n", + " 83 HUY ANG WEIRD TALAGA KASI NAGSESAVE AKO NG PIC... \n", + " 84 Fun fact about you — Sobrang love ko yung mga ... \n", + " 85 diko maintindihan bakit ako nagsesave ng pictu... \n", + " 86 Hayaan mo lang na husgahan ka nila.Hindi yung ... \n", + " 87 @pauiicosta lumabas na naman pagka aso mo haha... \n", + " 88 nagttampo ako kay potchi, tangina aso lang yon... \n", + " 89 Ako: pare iiyak ka pag namatay aso mo? \\nRoque... \n", + " 90 Aso ko e pero di na maghahabol🐶 https://t.co/Y... \n", + " 91 hi aso \n", + " 92 Nangapitbahay nako para sa aso. Happy pill! 😊 ... \n", + " 93 NP: Banal na aso, santong kabayo\\n\\nNatatawa a... \n", + " 94 Late post: Buti na lang talaga alert ako..kunc... \n", + " 95 tao,ahas at aso. \n", + " 96 ALAM NIYO BANG MUNTIKAN NG GAWING PAGKAIN NG A... \n", + " 97 Dang kyot ng aso nila Sir huhu i want 😍 \n", + " 98 Hindi sa dinidepensahan pero may umuulol na na... \n", + " 99 RT @Itsmeearlbravo: Di naman siguro ako pinang... \n", + " \n", + " [100 rows x 6 columns]}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import wikipedia as wp\n", + "from pygeocoder import Geocoder\n", + "import time\n", + "from googletrans import Translator\n", + "# Import and Initialize Sentiment Analyzer\n", + "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", + "analyzer = SentimentIntensityAnalyzer()\n", + "import requests\n", + "import pandas as pd\n", + "from bs4 import BeautifulSoup\n", + "import re\n", + "\n", + "import tweepy; import json\n", + "from apikeys import twitterAccessToken as access_token\n", + "from apikeys import twitterAccessTokenSecret as access_token_secret\n", + "from apikeys import twitterConsumerKey as consumer_key\n", + "from apikeys import twitterConsumerSecretKey as consumer_secret\n", + "\n", + "def parse_url( url):\n", + " response = requests.get(url)\n", + " soup = BeautifulSoup(response.text, 'lxml')\n", + " listylist=[]\n", + " for table in soup.find_all('table'):\n", + " listylist.append(parse_html_table(table))\n", + " return listylist\n", + "\n", + "def parse_html_table( table):\n", + " n_columns = 0; n_rows=0; column_names = []\n", + "\n", + " # Find number of rows and columns\n", + " # we also find the column titles if we can\n", + " for row in table.find_all('tr'):\n", + "\n", + " # Determine the number of rows in the table\n", + " td_tags = row.find_all('td')\n", + " if len(td_tags) > 0:\n", + " n_rows+=1\n", + " if n_columns == 0:\n", + " # Set the number of columns for our table\n", + " n_columns = len(td_tags)\n", + "\n", + " # Handle column names if we find them\n", + " th_tags = row.find_all('th') \n", + " if len(th_tags) > 0 and len(column_names) == 0:\n", + " for th in th_tags:\n", + " column_names.append(th.get_text())\n", + "\n", + " # Safeguard on Column Titles\n", + " if len(column_names) > 0 and len(column_names) != n_columns:\n", + " raise Exception(\"Column titles do not match the number of columns\")\n", + "\n", + " columns = column_names if len(column_names) > 0 else range(0,n_columns)\n", + " df = pd.DataFrame(columns = columns,\n", + " index= range(0,n_rows))\n", + " row_marker = 0\n", + " for row in table.find_all('tr'):\n", + " column_marker = 0\n", + " columns = row.find_all('td')\n", + " for column in columns:\n", + " df.iat[row_marker,column_marker] = column.get_text()\n", + " column_marker += 1\n", + " if len(columns) > 0:\n", + " row_marker += 1\n", + "\n", + " # Convert to float if possible\n", + " for col in df:\n", + " try:\n", + " df[col] = df[col].astype(float)\n", + " except ValueError:\n", + " pass\n", + "\n", + " return df\n", + "\n", + "def getCountryLanguages():\n", + " #TODO: Use the .apply to just change the table to one dialect. Imrpove language scope later.\n", + " df = parse_url('https://www.infoplease.com/world/countries-world/languages-spoken-each-country-world')\n", + " countryLanguages = df[0].rename(columns={0:'country',1:'language'}).set_index('country')\n", + " countryLanguages['language'] = [re.sub('\\d+|%|\\(.*\\)|\\s','',i).split(',')[0].split(';')[0] for i in countryLanguages['language']]\n", + " return countryLanguages\n", + "\n", + "\n", + "#returns hashtag, followers, following, text, geo, date\n", + "def SearchForData(search_term, nTweets, cityCountry='',radius=100):\n", + " \n", + " # Setup Tweepy API Authentication\n", + " auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n", + " auth.set_access_token(access_token, access_token_secret)\n", + " api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())\n", + " \n", + " #--- Calculate geocordinates from cityCountry --- --- ---- ---- --- --- --- --\n", + " try:\n", + " result = Geocoder.geocode(cityCountry)\n", + " except Exception as error:\n", + " raise ValueError(f'Cannot translate to geo coordinates.\\n',error)\n", + " # 34.0934,56.134,50mi\n", + " coords = str(result[0].coordinates).replace('(','').replace(')','') + f',{radius}mi'\n", + " coords=coords.replace(' ','')\n", + " print(cityCountry, \": \", coords)\n", + " #--- ---- ----- ---- ---- ---- ---- ---- --- ---- ---- --- ---- --- ---- --- --\n", + "\n", + " #--- grab tweets --- ---- ---- ---- ---- ---- ---- ---- --- --- ---- ---- ----\n", + " maxTweets = 10000; public_tweets=[]; oldest_tweet = None; unique_ids = []; desiredTweets = []\n", + " while len(desiredTweets) < min(nTweets,maxTweets):\n", + " #--- determine whether to grab tweets by geo or not --- ---- --- ----- --\n", + " if cityCountry:\n", + " public_tweets = api.search(search_term, count=100, result_type=\"recent\", max_id=oldest_tweet, geocode=coords)\n", + " else:\n", + " public_tweets = api.search(search_term, count=100, result_type=\"recent\", max_id=oldest_tweet)\n", + " #---- ----- ----- ---- ----- ---- ----- ---- ----- ---- ---- ---- ---- --\n", + " \n", + " #--- Dont go through an infinite loop trying to fill tweets that don't exist -----\n", + " if len(public_tweets['statuses'])==0:\n", + " print(f'No tweets returned while searching for \\'{search_term}\\'')\n", + " print(public_tweets['statuses'])\n", + " return desiredTweets\n", + " #--- --- ---- ---- ----- ---- ---- ---- ---- ---- ---- --- ---- ---- --- -- ------\n", + " \n", + " #--- Append relevent tweets to output listy list ---- --- ---- ---- ---- --- ---\n", + " for tweet in public_tweets['statuses']:\n", + " # Append tweet_id to ids list if it doesn't already exist. This allows checking for duplicate tweets\n", + " if tweet[\"id\"] not in unique_ids :\n", + " unique_ids.append(tweet['id'])\n", + " desiredTweets.append({'text':tweet['text'], 'geo':tweet['geo'], \n", + " 'hashtags':tweet['entities']['hashtags'], 'followers':tweet['user']['followers_count'],\n", + " 'friends_count':tweet['user']['friends_count'],'statuses_count':tweet['user']['statuses_count']})\n", + " \n", + " # Reassign the the oldest tweet (i.e. the max_id) subtract 1 so the previous oldest isn't included\n", + " oldest_tweet = tweet[\"id\"] - 1\n", + "\n", + " #--- Print sample tweet --- --- ---- ---- --- ---- ---- --- ---- ---- ---\n", + " translator = Translator()\n", + " try:\n", + " print ('Sample Tweet:',translator.translate(desiredTweets[0]['text'], dest='en').text)\n", + " except:\n", + " print('there was an error translating sample tweet: ',desiredTweets[0]['text'])\n", + " return pd.DataFrame(desiredTweets)\n", + "\n", + "\n", + "def GetTweetsByPopularCities(search_term, numTweets):\n", + " #-- Get the most populated cities from wikipedia (Thank you wikipedia library!) --\n", + " html = wp.page(\"List_of_cities_by_population_density\").html().encode(\"UTF-8\")\n", + " df = pd.read_html(html)[1]\n", + " df=df.drop([2,3,4,5],axis=1)\n", + " df=df.rename(columns={0:'city',1:'population',6:'country'})\n", + " df=df.iloc[1:]\n", + " #--- ---- ----- ----- ---- ---- ----- ---- ---- ----- --- ---- ---- ----- ---- ---\n", + "\n", + " translator = Translator()\n", + " tweets={}\n", + " for index,row in df.iterrows():\n", + " #-- location --- ----- --- ----\n", + " city,pop,country = row\n", + " cityCountry = city+' , '+country\n", + "\n", + " #-- language conversion --- ---- --\n", + " languagesDf = getCountryLanguages()\n", + " try:\n", + " translatedSearch = translator.translate(search_term, src='en', dest=languagesDf.loc[country,'language']).text\n", + " except ValueError:\n", + " print(\"could not translate \", languagesDf.loc[country,'language'])\n", + " translatedSearch=search_term\n", + " print('translated word: ',translatedSearch)\n", + " tweets[city+','+country]=SearchForData(translatedSearch, numTweets, cityCountry, 100)\n", + " print('\\n')\n", + " time.sleep(4)\n", + " break\n", + " return tweets\n", + "\n", + "tweets = GetTweetsByPopularCities('dog', 100)\n", + "tweets{'Manila,Philippines'}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "import matplotlib.pyplot as plt\n", + "timeBetween=[]\n", + "prevCreatedAt = datetime.strptime(tweets[0]['created_at'],'%a %b %d %H:%M:%S %z %Y')\n", + "for tweet in tweets[1:]:\n", + " #print(tweet['created_at'])\n", + " timeBetween.append((prevCreatedAt - datetime.strptime(tweet['created_at'],'%a %b %d %H:%M:%S %z %Y')).total_seconds()/60)\n", + " prevCreatedAt = datetime.strptime(tweet['created_at'],'%a %b %d %H:%M:%S %z %Y')\n", + "plt.plot(range(999),timeBetween)\n", + "plt.show()\n", + "plt.plot([datetime.strptime(tweet['created_at'],'%a %b %d %H:%M:%S %z %Y') for tweet in tweets][::-1],range(1000))\n", + "#plt.x('date',rotation='vertical')\n", + "plt.show()\n", + "print((datetime.strptime(tweets[-1]['created_at'],'%a %b %d %H:%M:%S %z %Y')-datetime.strptime(tweets[0]['created_at'],'%a %b %d %H:%M:%S %z %Y')).total_seconds()//60//60)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#This code came from the following link:\n", + "#https://srome.github.io/Parsing-HTML-Tables-in-Python-with-BeautifulSoup-and-pandas/\n", + "import requests\n", + "import pandas as pd\n", + "from bs4 import BeautifulSoup\n", + "import re\n", + "\n", + "class HTMLTableParser():\n", + "\n", + " def parse_url(self, url):\n", + " response = requests.get(url)\n", + " soup = BeautifulSoup(response.text, 'lxml')\n", + " listylist=[]\n", + " for table in soup.find_all('table'):\n", + " listylist.append(self.parse_html_table(table))\n", + " return listylist\n", + "\n", + " def parse_html_table(self, table):\n", + " n_columns = 0; n_rows=0; column_names = []\n", + "\n", + " # Find number of rows and columns\n", + " # we also find the column titles if we can\n", + " for row in table.find_all('tr'):\n", + "\n", + " # Determine the number of rows in the table\n", + " td_tags = row.find_all('td')\n", + " if len(td_tags) > 0:\n", + " n_rows+=1\n", + " if n_columns == 0:\n", + " # Set the number of columns for our table\n", + " n_columns = len(td_tags)\n", + "\n", + " # Handle column names if we find them\n", + " th_tags = row.find_all('th') \n", + " if len(th_tags) > 0 and len(column_names) == 0:\n", + " for th in th_tags:\n", + " column_names.append(th.get_text())\n", + "\n", + " # Safeguard on Column Titles\n", + " if len(column_names) > 0 and len(column_names) != n_columns:\n", + " raise Exception(\"Column titles do not match the number of columns\")\n", + "\n", + " columns = column_names if len(column_names) > 0 else range(0,n_columns)\n", + " df = pd.DataFrame(columns = columns,\n", + " index= range(0,n_rows))\n", + " row_marker = 0\n", + " for row in table.find_all('tr'):\n", + " column_marker = 0\n", + " columns = row.find_all('td')\n", + " for column in columns:\n", + " df.iat[row_marker,column_marker] = column.get_text()\n", + " column_marker += 1\n", + " if len(columns) > 0:\n", + " row_marker += 1\n", + "\n", + " # Convert to float if possible\n", + " for col in df:\n", + " try:\n", + " df[col] = df[col].astype(float)\n", + " except ValueError:\n", + " pass\n", + "\n", + " return df\n", + "\n", + "#TODO: Use the .apply to just change the table to one dialect. Imrpove language scope later.\n", + "obj = HTMLTableParser()\n", + "df = obj.parse_url('https://www.infoplease.com/world/countries-world/languages-spoken-each-country-world')\n", + "countryLanguages = df[0].rename(columns={0:'country',1:'language'}).set_index('country')\n", + "newDict = []\n", + "for index, row in countryLanguages.iterrows():\n", + " language = re.sub('\\d+|%|\\(.*\\)|\\s','',countryLanguages.loc[index].values[0]).split(',')[0].split(';')[0]\n", + " newDict.append([index, language]) #print(index,\": \",language)\n", + "newDict\n", + "\n", + "newDf = pd.DataFrame(newDict)\n", + "newDf = newDf.rename(columns={0:'country',1:'language'}).set_index('country')\n", + "newDf" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
languagelanguages
country
AfghanistanDari Persian, Pashtu (both official), other Tu...DariPersian
AlbaniaAlbanian (Tosk is the official dialect), GreekAlbanian
AlgeriaArabic (official), French, Berber dialectsArabic
AndorraCatalán (official), French, Castilian, PortugueseCatalán
AngolaPortuguese (official), Bantu and other African...Portuguese
Antigua and BarbudaEnglish (official), local dialectsEnglish
ArgentinaSpanish (official), English, Italian, German, ...Spanish
ArmeniaArmenian 98%, Yezidi, RussianArmenian
AustraliaEnglish 79%, native and other languagesEnglish
AustriaGerman (official nationwide); Slovene, Croatia...German
AzerbaijanAzerbaijani Turkic 89%, Russian 3%, Armenian 2...AzerbaijaniTurkic
BahamasEnglish (official), Creole (among Haitian immi...English
BahrainArabic, English, Farsi, UrduArabic
BangladeshBangla (official), EnglishBangla
BarbadosEnglishEnglish
BelarusBelorussian (White Russian), Russian, otherBelorussian
BelgiumDutch (Flemish) 60%, French 40%, German less t...Dutch
BelizeEnglish (official), Spanish, Mayan, Garifuna (...English
BeninFrench (official), Fon, Yoruba, tribal languagesFrench
BhutanDzongkha (official), Tibetan dialects (among B...Dzongkha
BoliviaSpanish, Quechua, Aymara (all official)Spanish
Bosnia and HerzegovinaBosnian, Croatian, SerbianBosnian
BotswanaEnglish 2% (official), Setswana 78%, Kalanga 8...English
BrazilPortuguese (official), Spanish, English, FrenchPortuguese
BruneiMalay (official), English, ChineseMalay
BulgariaBulgarian 85%, Turkish 10%, Roma 4%Bulgarian
Burkina FasoFrench (official); native African (Sudanic) la...Frenchlanguages
BurundiKirundi and French (official), SwahiliKirundiandFrench
CambodiaKhmer 95% (official), French, EnglishKhmer
CameroonFrench, English (both official); 24 major Afri...French
.........
SwazilandEnglish, siSwati (both official)English
SwedenSwedish, small Sami- and Finnish-speaking mino...Swedish
SwitzerlandGerman 64%, French 20%, Italian 7% (all offici...German
SyriaArabic (official); Kurdish, Armenian, Aramaic,...Arabic
TaiwanChinese (Mandarin, official), Taiwanese (Min),...Chinese
TajikistanTajik (official), Russian widely used in gover...Tajik
TanzaniaSwahili, English (both official); Arabic; many...Swahili
ThailandThai (Siamese), English (secondary language of...Thai
TogoFrench (official, commerce); Ewé, Mina (south)...French
TongaTongan (an Austronesian language), EnglishTongan
Trinidad and TobagoEnglish (official), Hindi, French, Spanish, Ch...English
TunisiaArabic (official, commerce), French (commerce)Arabic
TurkeyTurkish (official), Kurdish, Dimli, Azeri, Kab...Turkish
TurkmenistanTurkmen 72%; Russian 12%; Uzbek 9%, other 7%Turkmen
TuvaluTuvaluan, English, Samoan, Kiribati (on the is...Tuvaluan
UgandaEnglish (official), Ganda or Luganda, other Ni...English
UkraineUkrainian 67%, Russian 24%, Romanian, Polish, ...Ukrainian
United Arab EmiratesArabic (official), Persian, English, Hindi, UrduArabic
United KingdomEnglish, Welsh, Scots GaelicEnglish
United StatesEnglish 82%, Spanish 11% (2000)English
UruguaySpanish, Portunol, or BrazileroSpanish
UzbekistanUzbek 74.3%, Russian 14.2%, Tajik 4.4%, other ...Uzbek.
VanuatuBislama 23% (a Melanesian pidgin English), Eng...Bislama
Vatican City (Holy See)Italian, Latin, French, various other languagesItalian
VenezuelaSpanish (official), numerous indigenous dialectsSpanish
VietnamVietnamese (official); English (increasingly f...Vietnamese
Western Sahara (proposed state)Hassaniya Arabic, Moroccan ArabicHassaniyaArabic
YemenArabicArabic
ZambiaEnglish (official); major vernaculars: Bemba, ...English
ZimbabweEnglish (official), Shona, Ndebele (Sindebele)...English
\n", + "

198 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " language \\\n", + "country \n", + "Afghanistan Dari Persian, Pashtu (both official), other Tu... \n", + "Albania Albanian (Tosk is the official dialect), Greek \n", + "Algeria Arabic (official), French, Berber dialects \n", + "Andorra Catalán (official), French, Castilian, Portuguese \n", + "Angola Portuguese (official), Bantu and other African... \n", + "Antigua and Barbuda English (official), local dialects \n", + "Argentina Spanish (official), English, Italian, German, ... \n", + "Armenia Armenian 98%, Yezidi, Russian \n", + "Australia English 79%, native and other languages \n", + "Austria German (official nationwide); Slovene, Croatia... \n", + "Azerbaijan Azerbaijani Turkic 89%, Russian 3%, Armenian 2... \n", + "Bahamas English (official), Creole (among Haitian immi... \n", + "Bahrain Arabic, English, Farsi, Urdu \n", + "Bangladesh Bangla (official), English \n", + "Barbados English \n", + "Belarus Belorussian (White Russian), Russian, other \n", + "Belgium Dutch (Flemish) 60%, French 40%, German less t... \n", + "Belize English (official), Spanish, Mayan, Garifuna (... \n", + "Benin French (official), Fon, Yoruba, tribal languages \n", + "Bhutan Dzongkha (official), Tibetan dialects (among B... \n", + "Bolivia Spanish, Quechua, Aymara (all official) \n", + "Bosnia and Herzegovina Bosnian, Croatian, Serbian \n", + "Botswana English 2% (official), Setswana 78%, Kalanga 8... \n", + "Brazil Portuguese (official), Spanish, English, French \n", + "Brunei Malay (official), English, Chinese \n", + "Bulgaria Bulgarian 85%, Turkish 10%, Roma 4% \n", + "Burkina Faso French (official); native African (Sudanic) la... \n", + "Burundi Kirundi and French (official), Swahili \n", + "Cambodia Khmer 95% (official), French, English \n", + "Cameroon French, English (both official); 24 major Afri... \n", + "... ... \n", + "Swaziland English, siSwati (both official) \n", + "Sweden Swedish, small Sami- and Finnish-speaking mino... \n", + "Switzerland German 64%, French 20%, Italian 7% (all offici... \n", + "Syria Arabic (official); Kurdish, Armenian, Aramaic,... \n", + "Taiwan Chinese (Mandarin, official), Taiwanese (Min),... \n", + "Tajikistan Tajik (official), Russian widely used in gover... \n", + "Tanzania Swahili, English (both official); Arabic; many... \n", + "Thailand Thai (Siamese), English (secondary language of... \n", + "Togo French (official, commerce); Ewé, Mina (south)... \n", + "Tonga Tongan (an Austronesian language), English \n", + "Trinidad and Tobago English (official), Hindi, French, Spanish, Ch... \n", + "Tunisia Arabic (official, commerce), French (commerce) \n", + "Turkey Turkish (official), Kurdish, Dimli, Azeri, Kab... \n", + "Turkmenistan Turkmen 72%; Russian 12%; Uzbek 9%, other 7% \n", + "Tuvalu Tuvaluan, English, Samoan, Kiribati (on the is... \n", + "Uganda English (official), Ganda or Luganda, other Ni... \n", + "Ukraine Ukrainian 67%, Russian 24%, Romanian, Polish, ... \n", + "United Arab Emirates Arabic (official), Persian, English, Hindi, Urdu \n", + "United Kingdom English, Welsh, Scots Gaelic \n", + "United States English 82%, Spanish 11% (2000) \n", + "Uruguay Spanish, Portunol, or Brazilero \n", + "Uzbekistan Uzbek 74.3%, Russian 14.2%, Tajik 4.4%, other ... \n", + "Vanuatu Bislama 23% (a Melanesian pidgin English), Eng... \n", + "Vatican City (Holy See) Italian, Latin, French, various other languages \n", + "Venezuela Spanish (official), numerous indigenous dialects \n", + "Vietnam Vietnamese (official); English (increasingly f... \n", + "Western Sahara (proposed state) Hassaniya Arabic, Moroccan Arabic \n", + "Yemen Arabic \n", + "Zambia English (official); major vernaculars: Bemba, ... \n", + "Zimbabwe English (official), Shona, Ndebele (Sindebele)... \n", + "\n", + " languages \n", + "country \n", + "Afghanistan DariPersian \n", + "Albania Albanian \n", + "Algeria Arabic \n", + "Andorra Catalán \n", + "Angola Portuguese \n", + "Antigua and Barbuda English \n", + "Argentina Spanish \n", + "Armenia Armenian \n", + "Australia English \n", + "Austria German \n", + "Azerbaijan AzerbaijaniTurkic \n", + "Bahamas English \n", + "Bahrain Arabic \n", + "Bangladesh Bangla \n", + "Barbados English \n", + "Belarus Belorussian \n", + "Belgium Dutch \n", + "Belize English \n", + "Benin French \n", + "Bhutan Dzongkha \n", + "Bolivia Spanish \n", + "Bosnia and Herzegovina Bosnian \n", + "Botswana English \n", + "Brazil Portuguese \n", + "Brunei Malay \n", + "Bulgaria Bulgarian \n", + "Burkina Faso Frenchlanguages \n", + "Burundi KirundiandFrench \n", + "Cambodia Khmer \n", + "Cameroon French \n", + "... ... \n", + "Swaziland English \n", + "Sweden Swedish \n", + "Switzerland German \n", + "Syria Arabic \n", + "Taiwan Chinese \n", + "Tajikistan Tajik \n", + "Tanzania Swahili \n", + "Thailand Thai \n", + "Togo French \n", + "Tonga Tongan \n", + "Trinidad and Tobago English \n", + "Tunisia Arabic \n", + "Turkey Turkish \n", + "Turkmenistan Turkmen \n", + "Tuvalu Tuvaluan \n", + "Uganda English \n", + "Ukraine Ukrainian \n", + "United Arab Emirates Arabic \n", + "United Kingdom English \n", + "United States English \n", + "Uruguay Spanish \n", + "Uzbekistan Uzbek. \n", + "Vanuatu Bislama \n", + "Vatican City (Holy See) Italian \n", + "Venezuela Spanish \n", + "Vietnam Vietnamese \n", + "Western Sahara (proposed state) HassaniyaArabic \n", + "Yemen Arabic \n", + "Zambia English \n", + "Zimbabwe English \n", + "\n", + "[198 rows x 2 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = parse_url('https://www.infoplease.com/world/countries-world/languages-spoken-each-country-world')\n", + "countryLanguages = df[0].rename(columns={0:'country',1:'language'}).set_index('country')\n", + "countryLanguages['languages'] = [re.sub('\\d+|%|\\(.*\\)|\\s','',i).split(',')[0].split(';')[0] for i in countryLanguages['language']]\n", + "countryLanguages " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "translator=Translator()\n", + "translator.translate('Hola me llamo Jennifer 😜😜', dest='en').text" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "# Setup Tweepy API Authentication\n", + "auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n", + "auth.set_access_token(access_token, access_token_secret)\n", + "api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())\n", + "tweets = api.search('dalailama')" + ] + }, + { + "cell_type": "code", + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -10,80 +1021,73 @@ "text/plain": [ "{'contributors': None,\n", " 'coordinates': None,\n", - " 'created_at': 'Mon Jun 25 06:43:18 +0000 2018',\n", - " 'entities': {'hashtags': [],\n", + " 'created_at': 'Fri Jun 29 01:52:56 +0000 2018',\n", + " 'entities': {'hashtags': [{'indices': [57, 63], 'text': 'peace'},\n", + " {'indices': [87, 98], 'text': 'hatespeech'},\n", + " {'indices': [100, 106], 'text': 'anger'}],\n", " 'symbols': [],\n", - " 'urls': [],\n", - " 'user_mentions': [{'id': 886971481200177153,\n", - " 'id_str': '886971481200177153',\n", - " 'indices': [0, 16],\n", - " 'name': 'Sun Shine',\n", - " 'screen_name': 'SunShin93735000'},\n", - " {'id': 1556419213,\n", - " 'id_str': '1556419213',\n", - " 'indices': [17, 29],\n", - " 'name': 'Kitty_Maggy',\n", - " 'screen_name': 'Kitty_Maggy'},\n", - " {'id': 38142665,\n", - " 'id_str': '38142665',\n", - " 'indices': [30, 42],\n", - " 'name': 'Le Parisien',\n", - " 'screen_name': 'le_Parisien'}]},\n", + " 'urls': [{'display_url': 'twitter.com/i/web/status/1…',\n", + " 'expanded_url': 'https://twitter.com/i/web/status/1012514219374231553',\n", + " 'indices': [109, 132],\n", + " 'url': 'https://t.co/oSqPQyK9GL'}],\n", + " 'user_mentions': []},\n", " 'favorite_count': 0,\n", " 'favorited': False,\n", " 'geo': None,\n", - " 'id': 1011137744192638977,\n", - " 'id_str': '1011137744192638977',\n", - " 'in_reply_to_screen_name': 'SunShin93735000',\n", - " 'in_reply_to_status_id': 1010592336487776256,\n", - " 'in_reply_to_status_id_str': '1010592336487776256',\n", - " 'in_reply_to_user_id': 886971481200177153,\n", - " 'in_reply_to_user_id_str': '886971481200177153',\n", + " 'id': 1012514219374231553,\n", + " 'id_str': '1012514219374231553',\n", + " 'in_reply_to_screen_name': None,\n", + " 'in_reply_to_status_id': None,\n", + " 'in_reply_to_status_id_str': None,\n", + " 'in_reply_to_user_id': None,\n", + " 'in_reply_to_user_id_str': None,\n", " 'is_quote_status': False,\n", - " 'lang': 'fr',\n", - " 'metadata': {'iso_language_code': 'fr', 'result_type': 'recent'},\n", + " 'lang': 'en',\n", + " 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'},\n", " 'place': None,\n", + " 'possibly_sensitive': False,\n", " 'retweet_count': 0,\n", " 'retweeted': False,\n", - " 'source': 'Twitter for Android',\n", - " 'text': \"@SunShin93735000 @Kitty_Maggy @le_Parisien Oui ce serait l'idéal mais nos dirigeants n'ont pas assez de c......pour le faire.\",\n", - " 'truncated': False,\n", + " 'source': 'Twitter Web Client',\n", + " 'text': 'You see I do not join the panel because I do not support #peace. I am the advocate for #hatespeech, #anger,… https://t.co/oSqPQyK9GL',\n", + " 'truncated': True,\n", " 'user': {'contributors_enabled': False,\n", - " 'created_at': 'Sun May 20 15:51:30 +0000 2012',\n", - " 'default_profile': True,\n", + " 'created_at': 'Mon Jan 08 21:39:38 +0000 2018',\n", + " 'default_profile': False,\n", " 'default_profile_image': False,\n", - " 'description': '',\n", + " 'description': 'Bobbyism expert | True Messiah | Kingdom of Bob',\n", " 'entities': {'description': {'urls': []}},\n", - " 'favourites_count': 17349,\n", + " 'favourites_count': 54,\n", " 'follow_request_sent': False,\n", - " 'followers_count': 270,\n", + " 'followers_count': 23,\n", " 'following': False,\n", - " 'friends_count': 195,\n", + " 'friends_count': 95,\n", " 'geo_enabled': False,\n", " 'has_extended_profile': False,\n", - " 'id': 585766691,\n", - " 'id_str': '585766691',\n", + " 'id': 950482149827788801,\n", + " 'id_str': '950482149827788801',\n", " 'is_translation_enabled': False,\n", " 'is_translator': False,\n", - " 'lang': 'fr',\n", - " 'listed_count': 2,\n", - " 'location': '',\n", - " 'name': 'Sylvia',\n", + " 'lang': 'en-gb',\n", + " 'listed_count': 0,\n", + " 'location': 'University City, MO',\n", + " 'name': 'BobbyThorman',\n", " 'notifications': False,\n", - " 'profile_background_color': 'C0DEED',\n", + " 'profile_background_color': '000000',\n", " 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',\n", " 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',\n", " 'profile_background_tile': False,\n", - " 'profile_image_url': 'http://pbs.twimg.com/profile_images/3622230672/57d2d1e53af65e4b25f72d8346db5098_normal.jpeg',\n", - " 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/3622230672/57d2d1e53af65e4b25f72d8346db5098_normal.jpeg',\n", - " 'profile_link_color': '1DA1F2',\n", - " 'profile_sidebar_border_color': 'C0DEED',\n", - " 'profile_sidebar_fill_color': 'DDEEF6',\n", - " 'profile_text_color': '333333',\n", - " 'profile_use_background_image': True,\n", + " 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/950482149827788801/1527086667',\n", + " 'profile_image_url': 'http://pbs.twimg.com/profile_images/999299355466121216/uxKqFcGi_normal.jpg',\n", + " 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/999299355466121216/uxKqFcGi_normal.jpg',\n", + " 'profile_link_color': '000000',\n", + " 'profile_sidebar_border_color': '000000',\n", + " 'profile_sidebar_fill_color': '000000',\n", + " 'profile_text_color': '000000',\n", + " 'profile_use_background_image': False,\n", " 'protected': False,\n", - " 'screen_name': 'sylvia9183',\n", - " 'statuses_count': 6955,\n", + " 'screen_name': 'BobbyTHORman',\n", + " 'statuses_count': 1078,\n", " 'time_zone': None,\n", " 'translator_type': 'none',\n", " 'url': None,\n", @@ -91,58 +1095,812 @@ " 'verified': False}}" ] }, - "execution_count": 12, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#Text, location, dates, specified number of results\n", - "def SearchForData(search_term, nTweets):\n", - " # Import and Initialize Sentiment Analyzer\n", - " from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", - " analyzer = SentimentIntensityAnalyzer()\n", - "\n", - " import tweepy; import json\n", - " from apikeys import twitterAccessToken as access_token\n", - " from apikeys import twitterAccessTokenSecret as access_token_secret\n", - " from apikeys import twitterConsumerKey as consumer_key\n", - " from apikeys import twitterConsumerSecretKey as consumer_secret\n", - " # Setup Tweepy API Authentication\n", - " auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n", - " auth.set_access_token(access_token, access_token_secret)\n", - " api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())\n", - "\n", - " maxTweets = 10000000 # Some arbitrary large number\n", - " public_tweets=[]\n", - " # Counter to keep track of the number of tweets retrieved\n", - " counter = 0\n", - " oldest_tweet = None\n", - " unique_ids = []\n", - " # Loop through 5 times (total of 500 tweets)\n", - " desiredTweets = []\n", - " while len(desiredTweets) < min(nTweets,maxTweets):\n", - " # Retrieve 100 most recent tweets -- specifying a max_id\n", - " public_tweets = api.search(search_term, count=100, result_type=\"recent\", max_id=oldest_tweet)\n", - "\n", - " for tweet in public_tweets['statuses']:\n", - " tweet_id = tweet[\"id\"]\n", - "\n", - " # Append tweet_id to ids list if it doesn't already exist\n", - " # This allows checking for duplicate tweets\n", - " if tweet_id not in unique_ids :\n", - " unique_ids.append(tweet_id)\n", - " desiredTweets.append(tweet)\n", - " \n", - " # Reassign the the oldest tweet (i.e. the max_id) subtract 1 so the previous oldest isn't included\n", - " oldest_tweet = tweet_id - 1\n", - " return desiredTweets\n", - "\n", - "\n", - " \n", - "#oneTweet = SearchForData(\"kitty\", 100)\n", - "#oneTweet[0]\n" + "tweets['statuses'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
followersfollowinggeohashtagsstatuses_counttext
0154060FalseNone[]22455@ryapee Hi Rya order ako ulit polyblender mejo...
183FalseNone[]3030Yung dating saling pusa naging aso bigla // 🎶
2643FalseNone[{'text': 'AdoptDontShop', 'indices': [38, 52]}]8529sana meron din dito sa Pilipinas yung #AdoptDo...
32729FalseNone[]28771Aso nga kasi ako, bantay ako dito hahaha 😂
425FalseNone[]14lakas mangahol ng kaklase ko dinaig pa aso nam...
5116FalseNone[]1993@LampanoElla Dun sa aso oo HAHA
6113FalseNone[]4495Me: labas mo dila para lumabas dila ng aso \\nS...
7130FalseNone[]1745cute kong aso https://t.co/twDprm7o5P
854FalseNone[]727me: pabili pong dog food\\ntindero: alin? ung p...
924FalseNone[]296@dsgalarpez hahahaha aso ka na ba ngayon?
10841FalseNone[]29025@DenniceRoselle Uy kawawa mga aso. Di naman si...
11260FalseNone[]8486Nagtanggal tuloy ako nang mga tae nang aso kai...
12260FalseNone[]8486Nakakapikon ung aso 😭😭
13176FalseNone[]4682May mga sakit aso namin hanep
141357FalseNone[]15736i hate when strangers esp. men look at you str...
15422FalseNone[]21045@dnnkthryn Ngek malas. Sa rosewood naman okay ...
16422FalseNone[]21045@dnnkthryn Yup. Iba talaga pag bahay super fre...
17365FalseNone[]2083Tang ina sobrang iba pala pakiramdam pag namat...
1875FalseNone[]243aso't pusa ❤\\ngoodmorning hubby babe ! https:/...
191021FalseNone[]11119RT @akoposimarcelo: Yung buti pa yung mga aso,...
2047FalseNone[]905napaka clingy ng aso ko, nebeyen hehe
21147FalseNone[]3406ang laki ng aso!!! panay naka tahol
22269FalseNone[]21310Alam ata ng aso ko na birthday ko ngayon. Iba ...
23548FalseNone[]3553Nagduduet nanaman yung dalawang aso hays
2488FalseNone[]1470Yung aso naming maligalig na palundag lundag pa
25394FalseNone[]67411Mukha talaga akong tanga kapag nakikipaglaro a...
26105FalseNone[]417RT @akoposimarcelo: Yung buti pa yung mga aso,...
27174FalseNone[]4033nagdisitahulan mga aso ang creepy huhu
2890FalseNone[]828@glbysrcmny aso
29850FalseNone[]11211Distemper virus. may ganyan plang sakit nang m...
.....................
7080FalseNone[]1208Plus two agad aso namen AAHAHAHAHAH saya
71180FalseNone[]6216@jhnlstrpgnsn Hahaha hindi ko aso yun sa ate k...
721280FalseNone[]5483RT @akoposimarcelo: Yung buti pa yung mga aso,...
7374FalseNone[]2055Mama: Tanga Di Mo Pa Pinapakain Yung Aso.\\n\\nT...
7472FalseNone[]456@cescamarii di yan kusa aso yan
75365FalseNone[]2083Nasagasaan aso ko 😢
76301FalseNone[]4788Ngayon ko nalang na appreciate ulit yung ganda...
77561FalseNone[]13927pag gantong nalulungkot ako imbis na maghanap ...
78180FalseNone[]6718creepy ng aso pero mahal ko kayo HAHAHHHAHAHA
79329FalseNone[]1259Pagod bebi ko ako inaantok na tagal ni aso HAH...
805FalseNone[]460So ayun diba may hamster kami si Luxus saka si...
81797FalseNone[]42407@Anniefernando6 @aldenAllTheWay Baka kayo ang ...
82293FalseNone[]3881hindi ako to pramis. hahahahahahaha hindi ako ...
83293FalseNone[]3881HUY ANG WEIRD TALAGA KASI NAGSESAVE AKO NG PIC...
8484FalseNone[]15631Fun fact about you — Sobrang love ko yung mga ...
85293FalseNone[]3881diko maintindihan bakit ako nagsesave ng pictu...
86440FalseNone[]1375Hayaan mo lang na husgahan ka nila.Hindi yung ...
87476FalseNone[]7158@pauiicosta lumabas na naman pagka aso mo haha...
8835FalseNone[]1902nagttampo ako kay potchi, tangina aso lang yon...
89183FalseNone[]1154Ako: pare iiyak ka pag namatay aso mo? \\nRoque...
9062FalseNone[]756Aso ko e pero di na maghahabol🐶 https://t.co/Y...
9144FalseNone[]64hi aso
92167FalseNone[]2591Nangapitbahay nako para sa aso. Happy pill! 😊 ...
93536FalseNone[]6135NP: Banal na aso, santong kabayo\\n\\nNatatawa a...
943228False{'type': 'Point', 'coordinates': [14.61941886,...[]10223Late post: Buti na lang talaga alert ako..kunc...
95242FalseNone[]4463tao,ahas at aso.
96473FalseNone[]5234ALAM NIYO BANG MUNTIKAN NG GAWING PAGKAIN NG A...
97220FalseNone[]10928Dang kyot ng aso nila Sir huhu i want 😍
98141FalseNone[]12781Hindi sa dinidepensahan pero may umuulol na na...
99200FalseNone[]8882RT @Itsmeearlbravo: Di naman siguro ako pinang...
\n", + "

100 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " followers following geo \\\n", + "0 154060 False None \n", + "1 83 False None \n", + "2 643 False None \n", + "3 2729 False None \n", + "4 25 False None \n", + "5 116 False None \n", + "6 113 False None \n", + "7 130 False None \n", + "8 54 False None \n", + "9 24 False None \n", + "10 841 False None \n", + "11 260 False None \n", + "12 260 False None \n", + "13 176 False None \n", + "14 1357 False None \n", + "15 422 False None \n", + "16 422 False None \n", + "17 365 False None \n", + "18 75 False None \n", + "19 1021 False None \n", + "20 47 False None \n", + "21 147 False None \n", + "22 269 False None \n", + "23 548 False None \n", + "24 88 False None \n", + "25 394 False None \n", + "26 105 False None \n", + "27 174 False None \n", + "28 90 False None \n", + "29 850 False None \n", + ".. ... ... ... \n", + "70 80 False None \n", + "71 180 False None \n", + "72 1280 False None \n", + "73 74 False None \n", + "74 72 False None \n", + "75 365 False None \n", + "76 301 False None \n", + "77 561 False None \n", + "78 180 False None \n", + "79 329 False None \n", + "80 5 False None \n", + "81 797 False None \n", + "82 293 False None \n", + "83 293 False None \n", + "84 84 False None \n", + "85 293 False None \n", + "86 440 False None \n", + "87 476 False None \n", + "88 35 False None \n", + "89 183 False None \n", + "90 62 False None \n", + "91 44 False None \n", + "92 167 False None \n", + "93 536 False None \n", + "94 3228 False {'type': 'Point', 'coordinates': [14.61941886,... \n", + "95 242 False None \n", + "96 473 False None \n", + "97 220 False None \n", + "98 141 False None \n", + "99 200 False None \n", + "\n", + " hashtags statuses_count \\\n", + "0 [] 22455 \n", + "1 [] 3030 \n", + "2 [{'text': 'AdoptDontShop', 'indices': [38, 52]}] 8529 \n", + "3 [] 28771 \n", + "4 [] 14 \n", + "5 [] 1993 \n", + "6 [] 4495 \n", + "7 [] 1745 \n", + "8 [] 727 \n", + "9 [] 296 \n", + "10 [] 29025 \n", + "11 [] 8486 \n", + "12 [] 8486 \n", + "13 [] 4682 \n", + "14 [] 15736 \n", + "15 [] 21045 \n", + "16 [] 21045 \n", + "17 [] 2083 \n", + "18 [] 243 \n", + "19 [] 11119 \n", + "20 [] 905 \n", + "21 [] 3406 \n", + "22 [] 21310 \n", + "23 [] 3553 \n", + "24 [] 1470 \n", + "25 [] 67411 \n", + "26 [] 417 \n", + "27 [] 4033 \n", + "28 [] 828 \n", + "29 [] 11211 \n", + ".. ... ... \n", + "70 [] 1208 \n", + "71 [] 6216 \n", + "72 [] 5483 \n", + "73 [] 2055 \n", + "74 [] 456 \n", + "75 [] 2083 \n", + "76 [] 4788 \n", + "77 [] 13927 \n", + "78 [] 6718 \n", + "79 [] 1259 \n", + "80 [] 460 \n", + "81 [] 42407 \n", + "82 [] 3881 \n", + "83 [] 3881 \n", + "84 [] 15631 \n", + "85 [] 3881 \n", + "86 [] 1375 \n", + "87 [] 7158 \n", + "88 [] 1902 \n", + "89 [] 1154 \n", + "90 [] 756 \n", + "91 [] 64 \n", + "92 [] 2591 \n", + "93 [] 6135 \n", + "94 [] 10223 \n", + "95 [] 4463 \n", + "96 [] 5234 \n", + "97 [] 10928 \n", + "98 [] 12781 \n", + "99 [] 8882 \n", + "\n", + " text \n", + "0 @ryapee Hi Rya order ako ulit polyblender mejo... \n", + "1 Yung dating saling pusa naging aso bigla // 🎶 \n", + "2 sana meron din dito sa Pilipinas yung #AdoptDo... \n", + "3 Aso nga kasi ako, bantay ako dito hahaha 😂 \n", + "4 lakas mangahol ng kaklase ko dinaig pa aso nam... \n", + "5 @LampanoElla Dun sa aso oo HAHA \n", + "6 Me: labas mo dila para lumabas dila ng aso \\nS... \n", + "7 cute kong aso https://t.co/twDprm7o5P \n", + "8 me: pabili pong dog food\\ntindero: alin? ung p... \n", + "9 @dsgalarpez hahahaha aso ka na ba ngayon? \n", + "10 @DenniceRoselle Uy kawawa mga aso. Di naman si... \n", + "11 Nagtanggal tuloy ako nang mga tae nang aso kai... \n", + "12 Nakakapikon ung aso 😭😭 \n", + "13 May mga sakit aso namin hanep \n", + "14 i hate when strangers esp. men look at you str... \n", + "15 @dnnkthryn Ngek malas. Sa rosewood naman okay ... \n", + "16 @dnnkthryn Yup. Iba talaga pag bahay super fre... \n", + "17 Tang ina sobrang iba pala pakiramdam pag namat... \n", + "18 aso't pusa ❤\\ngoodmorning hubby babe ! https:/... \n", + "19 RT @akoposimarcelo: Yung buti pa yung mga aso,... \n", + "20 napaka clingy ng aso ko, nebeyen hehe \n", + "21 ang laki ng aso!!! panay naka tahol \n", + "22 Alam ata ng aso ko na birthday ko ngayon. Iba ... \n", + "23 Nagduduet nanaman yung dalawang aso hays \n", + "24 Yung aso naming maligalig na palundag lundag pa \n", + "25 Mukha talaga akong tanga kapag nakikipaglaro a... \n", + "26 RT @akoposimarcelo: Yung buti pa yung mga aso,... \n", + "27 nagdisitahulan mga aso ang creepy huhu \n", + "28 @glbysrcmny aso \n", + "29 Distemper virus. may ganyan plang sakit nang m... \n", + ".. ... \n", + "70 Plus two agad aso namen AAHAHAHAHAH saya \n", + "71 @jhnlstrpgnsn Hahaha hindi ko aso yun sa ate k... \n", + "72 RT @akoposimarcelo: Yung buti pa yung mga aso,... \n", + "73 Mama: Tanga Di Mo Pa Pinapakain Yung Aso.\\n\\nT... \n", + "74 @cescamarii di yan kusa aso yan \n", + "75 Nasagasaan aso ko 😢 \n", + "76 Ngayon ko nalang na appreciate ulit yung ganda... \n", + "77 pag gantong nalulungkot ako imbis na maghanap ... \n", + "78 creepy ng aso pero mahal ko kayo HAHAHHHAHAHA \n", + "79 Pagod bebi ko ako inaantok na tagal ni aso HAH... \n", + "80 So ayun diba may hamster kami si Luxus saka si... \n", + "81 @Anniefernando6 @aldenAllTheWay Baka kayo ang ... \n", + "82 hindi ako to pramis. hahahahahahaha hindi ako ... \n", + "83 HUY ANG WEIRD TALAGA KASI NAGSESAVE AKO NG PIC... \n", + "84 Fun fact about you — Sobrang love ko yung mga ... \n", + "85 diko maintindihan bakit ako nagsesave ng pictu... \n", + "86 Hayaan mo lang na husgahan ka nila.Hindi yung ... \n", + "87 @pauiicosta lumabas na naman pagka aso mo haha... \n", + "88 nagttampo ako kay potchi, tangina aso lang yon... \n", + "89 Ako: pare iiyak ka pag namatay aso mo? \\nRoque... \n", + "90 Aso ko e pero di na maghahabol🐶 https://t.co/Y... \n", + "91 hi aso \n", + "92 Nangapitbahay nako para sa aso. Happy pill! 😊 ... \n", + "93 NP: Banal na aso, santong kabayo\\n\\nNatatawa a... \n", + "94 Late post: Buti na lang talaga alert ako..kunc... \n", + "95 tao,ahas at aso. \n", + "96 ALAM NIYO BANG MUNTIKAN NG GAWING PAGKAIN NG A... \n", + "97 Dang kyot ng aso nila Sir huhu i want 😍 \n", + "98 Hindi sa dinidepensahan pero may umuulol na na... \n", + "99 RT @Itsmeearlbravo: Di naman siguro ako pinang... \n", + "\n", + "[100 rows x 6 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets['Manila,Philippines']" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -150,18 +1908,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.4" } }, "nbformat": 4,