{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Manila , Philippines :  14.5995124,120.9842195,100mi\n",
      "[{'message': 'Rate limit exceeded', 'code': 88}] Trying again after 1 minute.\n",
      "[{'message': 'Rate limit exceeded', 'code': 88}] Trying again after 1 minute.\n",
      "[{'message': 'Rate limit exceeded', 'code': 88}] Trying again after 1 minute.\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mUnboundLocalError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-140-020a8135f50b>\u001b[0m in \u001b[0;36mGetTweetsByPopularCities\u001b[0;34m(search_term, numTweets, translateToLocalLanguage)\u001b[0m\n\u001b[1;32m    214\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 215\u001b[0;31m             \u001b[0mtweetsWorld\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtweetsWorld\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSearchForData\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtranslatedSearch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumTweets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcityCountry\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    216\u001b[0m         \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'tweetsWorld' referenced before assignment",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mRateLimitError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-140-020a8135f50b>\u001b[0m in \u001b[0;36mSearchForData\u001b[0;34m(search_term, nTweets, cityCountry, radius)\u001b[0m\n\u001b[1;32m    130\u001b[0m                     \u001b[0;32mif\u001b[0m \u001b[0mcityCountry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 131\u001b[0;31m                         \u001b[0mtweetsPerDay\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mapi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msearch_term\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnTweetsPerDay\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"recent\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moldest_tweet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgeocode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoords\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muntil\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mday\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    132\u001b[0m                     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/tweepy/binder.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    249\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    251\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/tweepy/binder.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    231\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mis_rate_limit_error_message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_msg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 232\u001b[0;31m                     \u001b[0;32mraise\u001b[0m \u001b[0mRateLimitError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_msg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    233\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mRateLimitError\u001b[0m: [{'message': 'Rate limit exceeded', 'code': 88}]",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-140-020a8135f50b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m    245\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 247\u001b[0;31m \u001b[0mtweetsUS\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtweetsWorld\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGetTweetsByPopularCities\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'trump'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m200\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    248\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtweetsUS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'location'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    249\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtweetsWorld\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'location'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-140-020a8135f50b>\u001b[0m in \u001b[0;36mGetTweetsByPopularCities\u001b[0;34m(search_term, numTweets, translateToLocalLanguage)\u001b[0m\n\u001b[1;32m    215\u001b[0m             \u001b[0mtweetsWorld\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtweetsWorld\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSearchForData\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtranslatedSearch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumTweets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcityCountry\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    216\u001b[0m         \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 217\u001b[0;31m             \u001b[0mtweetsWorld\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSearchForData\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtranslatedSearch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumTweets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcityCountry\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    218\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m         \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-140-020a8135f50b>\u001b[0m in \u001b[0;36mSearchForData\u001b[0;34m(search_term, nTweets, cityCountry, radius)\u001b[0m\n\u001b[1;32m    134\u001b[0m                 \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    135\u001b[0m                     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Trying again after 1 minute.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 136\u001b[0;31m                     \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    137\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    138\u001b[0m                     \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import wikipedia as wp\n",
    "from pygeocoder import Geocoder\n",
    "import time\n",
    "from googletrans import Translator\n",
    "# Import and Initialize Sentiment Analyzer\n",
    "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n",
    "analyzer = SentimentIntensityAnalyzer()\n",
    "import requests\n",
    "import pandas as pd\n",
    "from bs4 import BeautifulSoup\n",
    "import re\n",
    "from datetime import datetime\n",
    "from datetime import date, timedelta\n",
    "\n",
    "import tweepy; import json\n",
    "from apikeys import twitterAccessToken as access_token\n",
    "from apikeys import twitterAccessTokenSecret as access_token_secret\n",
    "from apikeys import twitterConsumerKey as consumer_key\n",
    "from apikeys import twitterConsumerSecretKey as consumer_secret\n",
    "\n",
    "def parse_url( url):\n",
    "    response = requests.get(url)\n",
    "    soup = BeautifulSoup(response.text, 'lxml')\n",
    "    listylist=[]\n",
    "    for table in soup.find_all('table'):\n",
    "        listylist.append(parse_html_table(table))\n",
    "    return listylist\n",
    "\n",
    "def parse_html_table( table):\n",
    "    n_columns = 0; n_rows=0; column_names = []\n",
    "\n",
    "    # Find number of rows and columns\n",
    "    # we also find the column titles if we can\n",
    "    for row in table.find_all('tr'):\n",
    "\n",
    "        # Determine the number of rows in the table\n",
    "        td_tags = row.find_all('td')\n",
    "        if len(td_tags) > 0:\n",
    "            n_rows+=1\n",
    "            if n_columns == 0:\n",
    "                # Set the number of columns for our table\n",
    "                n_columns = len(td_tags)\n",
    "\n",
    "        # Handle column names if we find them\n",
    "        th_tags = row.find_all('th') \n",
    "        if len(th_tags) > 0 and len(column_names) == 0:\n",
    "            for th in th_tags:\n",
    "                column_names.append(th.get_text())\n",
    "\n",
    "    # Safeguard on Column Titles\n",
    "    if len(column_names) > 0 and len(column_names) != n_columns:\n",
    "        raise Exception(\"Column titles do not match the number of columns\")\n",
    "\n",
    "    columns = column_names if len(column_names) > 0 else range(0,n_columns)\n",
    "    df = pd.DataFrame(columns = columns,\n",
    "                      index= range(0,n_rows))\n",
    "    row_marker = 0\n",
    "    for row in table.find_all('tr'):\n",
    "        column_marker = 0\n",
    "        columns = row.find_all('td')\n",
    "        for column in columns:\n",
    "            df.iat[row_marker,column_marker] = column.get_text()\n",
    "            column_marker += 1\n",
    "        if len(columns) > 0:\n",
    "            row_marker += 1\n",
    "\n",
    "    # Convert to float if possible\n",
    "    for col in df:\n",
    "        try:\n",
    "            df[col] = df[col].astype(float)\n",
    "        except ValueError:\n",
    "            pass\n",
    "\n",
    "    return df\n",
    "\n",
    "def getCountryLanguages():\n",
    "    #TODO: Use the .apply to just change the table to one dialect. Imrpove language scope later.\n",
    "    df = parse_url('https://www.infoplease.com/world/countries-world/languages-spoken-each-country-world')\n",
    "    countryLanguages = df[0].rename(columns={0:'country',1:'language'}).set_index('country')\n",
    "    countryLanguages['language'] = [re.sub('\\d+|%|\\(.*\\)|\\s','',i).split(',')[0].split(';')[0] for i in countryLanguages['language']]\n",
    "    return countryLanguages\n",
    "\n",
    "\n",
    "#returns hashtag, followers, following, text, geo, date\n",
    "#cityCountry example: 'paris,france'\n",
    "def SearchForData(search_term, nTweets, cityCountry='',radius=100):\n",
    "    \n",
    "    # Setup Tweepy API Authentication\n",
    "    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
    "    auth.set_access_token(access_token, access_token_secret)\n",
    "    api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())\n",
    "    \n",
    "    #--- Calculate geocordinates from cityCountry --- --- ---- ---- --- --- --- --\n",
    "    geoConvertTries = 0\n",
    "    while True:\n",
    "        try:\n",
    "            result = Geocoder.geocode(cityCountry)\n",
    "        except Exception as error:\n",
    "            #print('errrooooorrrrr: ',error.message)\n",
    "            if 'OVER_QUERY_LIMIT' in str(error):\n",
    "                print('Encountered an error:{0}\\nWaiting 30 seconds and trying again.'.format(error))\n",
    "                time.sleep(30)\n",
    "                if geoConvertTries>10:\n",
    "                    print(\"Could not convert geo. returning empty list\")\n",
    "                    return []\n",
    "            elif not (re.search('^\\w+,\\w+$',cityCountry)):\n",
    "                print(\"cityCountry input format is incorrect. It should be \\'city,Country\\' like \\'paris,france\\'\")\n",
    "                return []\n",
    "            else:\n",
    "                print(\"Could not convert geo. returning empty list\")\n",
    "                return []\n",
    "        else:\n",
    "            break\n",
    "        geoConvertTries+=1\n",
    "    # 34.0934,56.134,50mi\n",
    "    coords = str(result[0].coordinates).replace('(','').replace(')','') + f',{radius}mi'\n",
    "    coords=coords.replace(' ','')\n",
    "    print(cityCountry, \": \", coords)\n",
    "    #--- ---- ----- ---- ---- ---- ---- ---- --- ---- ---- --- ---- --- ---- --- --\n",
    "\n",
    "    #--- grab tweets --- ---- ---- ---- ---- ---- ---- ---- --- --- ---- ---- ----\n",
    "    maxTweets = 10000; oldest_tweet = None; unique_ids = []; desiredTweets = [];nTweetsPerDay=nTweets/8\n",
    "    for day,num in zip([str(date.today() - timedelta(i)).split()[0] for i in range(8)], range(1,9)):\n",
    "        tweetsPerDay=[]\n",
    "        while len(tweetsPerDay) < min(nTweetsPerDay,maxTweets/8):\n",
    "            #--- determine whether to grab tweets by geo or not --- ---- --- ----- --\n",
    "            while True:\n",
    "                try:\n",
    "                    if cityCountry:\n",
    "                        tweetsPerDay = api.search(search_term, count=nTweetsPerDay, result_type=\"recent\", max_id=oldest_tweet, geocode=coords, until=day)\n",
    "                    else:\n",
    "                        tweetsPerDay = api.search(search_term, count=nTweetsPerDay, result_type=\"recent\", max_id=oldest_tweet, until=day)\n",
    "                except Exception as error:\n",
    "                    print(error,'Trying again after 1 minute.')\n",
    "                    time.sleep(60)\n",
    "                else:\n",
    "                    break\n",
    "            #---- ----- ----- ---- ----- ---- ----- ---- ----- ---- ---- ---- ---- --\n",
    "\n",
    "            #--- Dont go through an infinite loop trying to fill tweets that don't exist -----\n",
    "            if len(tweetsPerDay['statuses'])==0:\n",
    "                print(f'No tweets returned while searching for \\'{search_term}\\'\\n',len(desiredTweets)\\\n",
    "                     ,'\\n',day)\n",
    "                return pd.DataFrame(desiredTweets)\n",
    "\n",
    "            #--- Append relevent tweets to output listy list ---- --- ---- ---- ---- --- ---\n",
    "            for tweet in tweetsPerDay['statuses']:\n",
    "                # Append tweet_id to ids list if it doesn't already exist. This allows checking for duplicate tweets\n",
    "                if tweet[\"id\"] not in unique_ids :\n",
    "                    unique_ids.append(tweet['id'])\n",
    "                    desiredTweets.append({'text':tweet['text'], 'vader':analyzer.polarity_scores(tweet['text'])['compound'],\n",
    "                                          'location':cityCountry,\n",
    "                                          'hashtags':tweet['entities']['hashtags'], 'followers':tweet['user']['followers_count'],\n",
    "                                         'friends_count':tweet['user']['friends_count'],'statuses_count':tweet['user']['statuses_count'],\n",
    "                                          'created_at':datetime.strptime(tweet['created_at'],'%a %b %d %H:%M:%S %z %Y')})\n",
    "                \n",
    "                # Reassign the the oldest tweet (i.e. the max_id) subtract 1 so the previous oldest isn't included\n",
    "                oldest_tweet = tweet[\"id\"] - 1\n",
    "            \n",
    "\n",
    "    #--- Print sample tweet --- --- ---- ---- --- ---- ---- --- ---- ---- ---\n",
    "    translator = Translator()\n",
    "    try:\n",
    "        print ('Sample Tweet:',translator.translate(desiredTweets[0]['text'], dest='en').text)\n",
    "    except:\n",
    "        print('there was an error translating sample tweet: ',desiredTweets[0]['text'])\n",
    "    return pd.DataFrame(desiredTweets)\n",
    "\n",
    "\n",
    "def GetTweetsByPopularCities(search_term, numTweets, translateToLocalLanguage = True):\n",
    "    #-- Get the most populated cities from wikipedia (Thank you wikipedia library!) --\n",
    "    html = wp.page(\"List_of_cities_by_population_density\").html().encode(\"UTF-8\")\n",
    "    worldCities = pd.read_html(html)[1]\n",
    "    worldCities = worldCities.drop([2,3,4],axis=1)\n",
    "    worldCities = worldCities.rename(columns={0:'city',1:'population',5:'density',6:'country'})\n",
    "    worldCities = worldCities.iloc[1:]\n",
    "    worldCities['population'] = [int(city.split('\\xa0')[-1].split('[')[0].replace(',','')) for city in worldCities['population']]\n",
    "    worldCities['density'] = [int(city.split('\\xa0')[-1].split('[')[0].replace(',','')) for city in worldCities['density']]\n",
    "    #--- ---- ----- ----- ---- ---- ----- ---- ---- ----- --- ---- ---- ----- ---- ---\n",
    "    \n",
    "    #--- population per cities in United States --- ---- ---- --- ---- --- --- --- ---\n",
    "    html = wp.page(\"List_of_United_States_cities_by_population_density\").html().encode(\"UTF-8\")\n",
    "    UScities = pd.read_html(html)[1]\n",
    "    UScities = UScities.drop([0,2,4,6,8],axis=1)\n",
    "    UScities = UScities.rename(columns={1:'city',3:'state',5: 'land area (mi^2)',7:'density'})\n",
    "    UScities = UScities.iloc[1:]\n",
    "    #df['population']=[int(city.split('\\xa0')[-1].split('[')[0].replace(',','')) for city in df['population']]\n",
    "    UScities['density'] = [float(city.split('\\xa0')[-1].split('[')[0].replace(',','')) for city in UScities['density']]\n",
    "    UScities['land area (mi^2)']=[float(area.split('\\xa0')[-1]) for area in UScities['land area (mi^2)']]\n",
    "    #--- ---- ----- ----- ---- ---- ----- ---- ---- ----- --- ---- ---- ----- ---- ---\n",
    "    \n",
    "    #--- Get tweets by Worlds most densily populated cities ---- --- ---- ---- --- ---\n",
    "    translator = Translator()\n",
    "    comparisons=pd.DataFrame(columns=['time density','sentiment'])\n",
    "    cityCount = 3\n",
    "    for index,row in worldCities.iterrows():\n",
    "        #-- location --- ----- --- ----\n",
    "        city,pop,density,country = row\n",
    "        cityCountry = city+' , '+country\n",
    "\n",
    "        #-- language conversion --- ---- --\n",
    "        languagesDf = getCountryLanguages()\n",
    "        if translateToLocalLanguage:\n",
    "            try:\n",
    "                translatedSearch = translator.translate(search_term, src='en', dest=languagesDf.loc[country,'language']).text\n",
    "            except ValueError:\n",
    "                print(\"could not translate \", languagesDf.loc[country,'language'])\n",
    "                translatedSearch=search_term\n",
    "                print('translated word: ',translatedSearch)\n",
    "        else:\n",
    "            translatedSearch=search_term\n",
    "        #--- --- --- ---- ---- --- --- ---\n",
    "        \n",
    "        try:\n",
    "            tweetsWorld = pd.concat([tweetsWorld, SearchForData(translatedSearch, numTweets, cityCountry, 100)], axis=0)\n",
    "        except:\n",
    "            tweetsWorld = SearchForData(translatedSearch, numTweets, cityCountry, 100)\n",
    "        print('\\n')\n",
    "        time.sleep(4)\n",
    "        #if cityCount==0:\n",
    "        #    break\n",
    "        #else:\n",
    "        #    cityCount-=1\n",
    "    \n",
    "    #--- Add US Cities --- ---- ---- ---- ---- ---\n",
    "    cityCount = 5\n",
    "    for index,row in UScities.iterrows():\n",
    "        #-- location --- ----- --- ----\n",
    "        city,state,area,density = row\n",
    "        cityCountry = state+' , '+city\n",
    "\n",
    "        try:\n",
    "            tweetsUS = pd.concat([tweetsUS, SearchForData(translatedSearch, numTweets, cityCountry, max(area,5))], axis=0)\n",
    "        except:\n",
    "            tweetsUS = SearchForData(translatedSearch, numTweets, cityCountry, max(area,5))\n",
    "        \n",
    "        print('\\n')\n",
    "        time.sleep(4)\n",
    "        #if cityCount==0:\n",
    "        #    break\n",
    "        #else:\n",
    "        #    cityCount-=1\n",
    "    return tweetsUS, tweetsWorld\n",
    "\n",
    "\n",
    "\n",
    "tweetsUS, tweetsWorld = GetTweetsByPopularCities('trump', 200, False)\n",
    "print(tweetsUS.groupby('location').mean())\n",
    "print(tweetsWorld.groupby('location').mean())\n",
    "\n",
    "\n",
    "                                                               #locations are not required inputs\n",
    "#tweets = SearchForData(search_term='baguettes', nTweets=100, cityCountry='paris,france',radius=100)\n",
    "#tweets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>created_at</th>\n",
       "      <th>followers</th>\n",
       "      <th>friends_count</th>\n",
       "      <th>hashtags</th>\n",
       "      <th>location</th>\n",
       "      <th>statuses_count</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2018-06-29 23:17:03+00:00</td>\n",
       "      <td>683</td>\n",
       "      <td>2054</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>33583</td>\n",
       "      <td>I have a strong feeling  ....... that due to t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2018-06-29 22:47:07+00:00</td>\n",
       "      <td>196</td>\n",
       "      <td>1740</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>5359</td>\n",
       "      <td>@tedcruz It must be a cold day in hell, becaus...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2018-06-29 22:17:22+00:00</td>\n",
       "      <td>679</td>\n",
       "      <td>111</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>22461</td>\n",
       "      <td>Loads of respect for @AndrewCMcCarthy - but if...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2018-06-29 21:49:06+00:00</td>\n",
       "      <td>1291</td>\n",
       "      <td>2499</td>\n",
       "      <td>[{'text': 'TuckFrump', 'indices': [66, 76]}]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>62316</td>\n",
       "      <td>@BishTrumpsCray @zeitgeistbabe @IvankaTrump @S...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2018-06-29 21:41:01+00:00</td>\n",
       "      <td>76</td>\n",
       "      <td>108</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>5491</td>\n",
       "      <td>The media no longer has any credibility with p...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2018-06-29 21:38:04+00:00</td>\n",
       "      <td>1407</td>\n",
       "      <td>789</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>14301</td>\n",
       "      <td>Would you rather have Donald Trump or Kanye We...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2018-06-29 21:21:40+00:00</td>\n",
       "      <td>1371</td>\n",
       "      <td>1069</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>100819</td>\n",
       "      <td>Did Poland eventually lose World War II becaus...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2018-06-29 21:16:25+00:00</td>\n",
       "      <td>1464</td>\n",
       "      <td>2907</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>5879</td>\n",
       "      <td>RT @Starshadow: @dimobey @ananavarro \"He was j...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2018-06-29 21:11:44+00:00</td>\n",
       "      <td>49</td>\n",
       "      <td>135</td>\n",
       "      <td>[{'text': 'StutteringJohn', 'indices': [88, 10...</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>583</td>\n",
       "      <td>Hopefully the Donald has a sense of humor abou...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2018-06-29 20:56:39+00:00</td>\n",
       "      <td>1578</td>\n",
       "      <td>1004</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>35862</td>\n",
       "      <td>@flotus isn't it true when Melania Trump first...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2018-06-28 23:43:53+00:00</td>\n",
       "      <td>134</td>\n",
       "      <td>356</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>26806</td>\n",
       "      <td>Oprah’s SICK S*x Vid Out And Completely Demoli...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2018-06-28 23:33:56+00:00</td>\n",
       "      <td>72</td>\n",
       "      <td>128</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>4945</td>\n",
       "      <td>@rob1cox  More from our unbiased MSM.   Need a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2018-06-28 23:20:09+00:00</td>\n",
       "      <td>151</td>\n",
       "      <td>1104</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>1073</td>\n",
       "      <td>RT @RinChupeco: White people, journalists who ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2018-06-28 23:12:38+00:00</td>\n",
       "      <td>248</td>\n",
       "      <td>1989</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>4702</td>\n",
       "      <td>@ESSsubreddit @SenSanders No they didn't. Do y...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>2018-06-28 23:06:50+00:00</td>\n",
       "      <td>366677</td>\n",
       "      <td>201</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>201963</td>\n",
       "      <td>Trump-Putin meeting to follow NATO gathering a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2018-06-28 23:01:51+00:00</td>\n",
       "      <td>293</td>\n",
       "      <td>1382</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>8802</td>\n",
       "      <td>RT @dzIQ990: US President Donald Trump at Russ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>2018-06-28 22:55:40+00:00</td>\n",
       "      <td>1757</td>\n",
       "      <td>1685</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>37555</td>\n",
       "      <td>RT @RinChupeco: White people, journalists who ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>2018-06-28 22:54:25+00:00</td>\n",
       "      <td>3789</td>\n",
       "      <td>4907</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>26129</td>\n",
       "      <td>RT @RinChupeco: White people, journalists who ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2018-06-27 23:57:35+00:00</td>\n",
       "      <td>21</td>\n",
       "      <td>208</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>917</td>\n",
       "      <td>RT @RinChupeco: White people, journalists who ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2018-06-27 23:49:44+00:00</td>\n",
       "      <td>42</td>\n",
       "      <td>66</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>5268</td>\n",
       "      <td>@charliekirk11 @realDonaldTrump Trump has quit...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>2018-06-27 23:45:00+00:00</td>\n",
       "      <td>5863642</td>\n",
       "      <td>1304</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>602468</td>\n",
       "      <td>Trump trade uncertainty weighs on US stocks as...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>2018-06-27 23:38:20+00:00</td>\n",
       "      <td>207</td>\n",
       "      <td>1055</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>4507</td>\n",
       "      <td>RT @RinChupeco: White people, journalists who ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2018-06-27 23:36:29+00:00</td>\n",
       "      <td>510</td>\n",
       "      <td>434</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>32256</td>\n",
       "      <td>What do you think of Trump's presidential run ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>2018-06-27 23:35:27+00:00</td>\n",
       "      <td>32</td>\n",
       "      <td>155</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>1744</td>\n",
       "      <td>RT @marieAnne0915: Im a bit disappointed sa mg...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>2018-06-27 23:34:16+00:00</td>\n",
       "      <td>141</td>\n",
       "      <td>528</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>3211</td>\n",
       "      <td>RT @RinChupeco: White people, journalists who ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>2018-06-27 23:10:01+00:00</td>\n",
       "      <td>544428</td>\n",
       "      <td>212</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>499788</td>\n",
       "      <td>EU chief says Europe must prepare for the wors...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2018-06-26 23:59:58+00:00</td>\n",
       "      <td>7452</td>\n",
       "      <td>879</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>19731</td>\n",
       "      <td>RT @RinChupeco: White people, journalists who ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2018-06-26 23:55:02+00:00</td>\n",
       "      <td>291</td>\n",
       "      <td>162</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>7863</td>\n",
       "      <td>RT @RinChupeco: White people, journalists who ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>2018-06-26 23:54:54+00:00</td>\n",
       "      <td>37</td>\n",
       "      <td>30</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>58215</td>\n",
       "      <td>Missouri Nail Manufacturer Loses Half Its Busi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>2018-06-26 23:53:21+00:00</td>\n",
       "      <td>41</td>\n",
       "      <td>451</td>\n",
       "      <td>[]</td>\n",
       "      <td>Manila , Philippines</td>\n",
       "      <td>14044</td>\n",
       "      <td>RT @RinChupeco: White people, journalists who ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2018-06-26 23:59:58+00:00</td>\n",
       "      <td>906</td>\n",
       "      <td>721</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>107703</td>\n",
       "      <td>RT @kimguilfoyle: A HUGE win for President Tru...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2018-06-26 23:59:58+00:00</td>\n",
       "      <td>1269</td>\n",
       "      <td>365</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>34076</td>\n",
       "      <td>RT @faiza_n_ali: This just happened. The highe...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>2018-06-26 23:59:58+00:00</td>\n",
       "      <td>6250</td>\n",
       "      <td>5658</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>22409</td>\n",
       "      <td>DOJ watchdog report sheds light on love lives ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>2018-06-26 23:59:58+00:00</td>\n",
       "      <td>2233</td>\n",
       "      <td>494</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>311601</td>\n",
       "      <td>RT @true_pundit: Barack Obama Concerned About ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2018-06-26 23:59:58+00:00</td>\n",
       "      <td>4297</td>\n",
       "      <td>4376</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>42347</td>\n",
       "      <td>One NBA Legend Attended A Trump Rally And The ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>2018-06-26 23:59:58+00:00</td>\n",
       "      <td>290</td>\n",
       "      <td>1210</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>144325</td>\n",
       "      <td>RT @MrDane1982: Give me a fucking break, Berni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>2018-06-25 23:59:59+00:00</td>\n",
       "      <td>145</td>\n",
       "      <td>205</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>35478</td>\n",
       "      <td>RT @maddow: NEW:  MSNBC has obtained the first...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>2018-06-25 23:59:58+00:00</td>\n",
       "      <td>2723</td>\n",
       "      <td>1986</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>42025</td>\n",
       "      <td>Related: for some reason I blocked @Nickelodeo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>2018-06-25 23:59:58+00:00</td>\n",
       "      <td>4489</td>\n",
       "      <td>4998</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>27990</td>\n",
       "      <td>RT @Kipnis4Congress: .@PhilMurphyNJ @RepBonnie...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>2018-06-25 23:59:58+00:00</td>\n",
       "      <td>169</td>\n",
       "      <td>607</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>1583</td>\n",
       "      <td>RT @JoeNBC: Trump Administration report shows ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>2018-06-25 23:59:57+00:00</td>\n",
       "      <td>91</td>\n",
       "      <td>186</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>26056</td>\n",
       "      <td>RT @kylegriffin1: The Family Case Management P...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>2018-06-25 23:59:57+00:00</td>\n",
       "      <td>5145</td>\n",
       "      <td>3198</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>425496</td>\n",
       "      <td>RT @NBCNews: A 20-foot-tall inflatable orange ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>2018-06-25 23:59:57+00:00</td>\n",
       "      <td>805</td>\n",
       "      <td>1150</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>60897</td>\n",
       "      <td>RT @maddow: NEW:  MSNBC has obtained the first...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>2018-06-25 23:59:57+00:00</td>\n",
       "      <td>38</td>\n",
       "      <td>126</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>1362</td>\n",
       "      <td>RT @kylegriffin1: The Red Hen passed its most ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>2018-06-24 23:59:59+00:00</td>\n",
       "      <td>5824</td>\n",
       "      <td>5835</td>\n",
       "      <td>[{'text': 'TrumpConcentrationCamps', 'indices'...</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>478</td>\n",
       "      <td>RT @leecaly: Nice message Melania Trump wore o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>2018-06-24 23:59:59+00:00</td>\n",
       "      <td>941</td>\n",
       "      <td>208</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>341641</td>\n",
       "      <td>RT @NBCNews: This Obama-era pilot program kept...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>2018-06-24 23:59:59+00:00</td>\n",
       "      <td>304</td>\n",
       "      <td>313</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>14166</td>\n",
       "      <td>RT @Jamierodr10: Thank you @RealJamesWoods for...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>2018-06-24 23:59:58+00:00</td>\n",
       "      <td>750</td>\n",
       "      <td>248</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>26266</td>\n",
       "      <td>RT @gr8tjude: Virginia Lawmakers Rebuke Anti-T...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>2018-06-24 23:59:58+00:00</td>\n",
       "      <td>2765</td>\n",
       "      <td>4921</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>69653</td>\n",
       "      <td>RT @Amy_Siskind: Some things you need to notic...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>2018-06-24 23:59:58+00:00</td>\n",
       "      <td>1744</td>\n",
       "      <td>228</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>22462</td>\n",
       "      <td>RT @TwitterMoments: California @RepMaxineWater...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>2018-06-24 23:59:58+00:00</td>\n",
       "      <td>216</td>\n",
       "      <td>361</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>12461</td>\n",
       "      <td>RT @JoeNBC: I cannot wait to hear Trump apolog...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>2018-06-24 23:59:58+00:00</td>\n",
       "      <td>33</td>\n",
       "      <td>176</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>94</td>\n",
       "      <td>RT @JoeNBC: Trump Administration report shows ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>2018-06-23 23:59:59+00:00</td>\n",
       "      <td>2143</td>\n",
       "      <td>2597</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>7256</td>\n",
       "      <td>RT @SenSchumer: The Special Counsel’s investig...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>2018-06-23 23:59:59+00:00</td>\n",
       "      <td>37177</td>\n",
       "      <td>6292</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>220250</td>\n",
       "      <td>RT @dicktofel: This story makes a big deal of ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>2018-06-23 23:59:59+00:00</td>\n",
       "      <td>43</td>\n",
       "      <td>110</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>3208</td>\n",
       "      <td>RT @DanRather: When it comes to the Trump Admi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>2018-06-23 23:59:58+00:00</td>\n",
       "      <td>228</td>\n",
       "      <td>507</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>15361</td>\n",
       "      <td>RT @SenSchumer: The Special Counsel’s investig...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>2018-06-23 23:59:58+00:00</td>\n",
       "      <td>40</td>\n",
       "      <td>151</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>290</td>\n",
       "      <td>RT @JoeNBC: “This is beyond narcissism. I used...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>2018-06-23 23:59:58+00:00</td>\n",
       "      <td>39</td>\n",
       "      <td>191</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>483</td>\n",
       "      <td>RT @RubenBaezJr: @PressSec @POTUS Today has be...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>2018-06-23 23:59:57+00:00</td>\n",
       "      <td>288</td>\n",
       "      <td>432</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>2079</td>\n",
       "      <td>RT @GrassrootsSpeak: Dear Kim Kardashian\\n\\nLa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>2018-06-23 23:59:56+00:00</td>\n",
       "      <td>451</td>\n",
       "      <td>444</td>\n",
       "      <td>[]</td>\n",
       "      <td>New York City , New York</td>\n",
       "      <td>3615</td>\n",
       "      <td>@cswany2 @Lawrence I know you're a Trump perso...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>520 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  created_at  followers  friends_count  \\\n",
       "0  2018-06-29 23:17:03+00:00        683           2054   \n",
       "1  2018-06-29 22:47:07+00:00        196           1740   \n",
       "2  2018-06-29 22:17:22+00:00        679            111   \n",
       "3  2018-06-29 21:49:06+00:00       1291           2499   \n",
       "4  2018-06-29 21:41:01+00:00         76            108   \n",
       "5  2018-06-29 21:38:04+00:00       1407            789   \n",
       "6  2018-06-29 21:21:40+00:00       1371           1069   \n",
       "7  2018-06-29 21:16:25+00:00       1464           2907   \n",
       "8  2018-06-29 21:11:44+00:00         49            135   \n",
       "9  2018-06-29 20:56:39+00:00       1578           1004   \n",
       "10 2018-06-28 23:43:53+00:00        134            356   \n",
       "11 2018-06-28 23:33:56+00:00         72            128   \n",
       "12 2018-06-28 23:20:09+00:00        151           1104   \n",
       "13 2018-06-28 23:12:38+00:00        248           1989   \n",
       "14 2018-06-28 23:06:50+00:00     366677            201   \n",
       "15 2018-06-28 23:01:51+00:00        293           1382   \n",
       "16 2018-06-28 22:55:40+00:00       1757           1685   \n",
       "17 2018-06-28 22:54:25+00:00       3789           4907   \n",
       "18 2018-06-27 23:57:35+00:00         21            208   \n",
       "19 2018-06-27 23:49:44+00:00         42             66   \n",
       "20 2018-06-27 23:45:00+00:00    5863642           1304   \n",
       "21 2018-06-27 23:38:20+00:00        207           1055   \n",
       "22 2018-06-27 23:36:29+00:00        510            434   \n",
       "23 2018-06-27 23:35:27+00:00         32            155   \n",
       "24 2018-06-27 23:34:16+00:00        141            528   \n",
       "25 2018-06-27 23:10:01+00:00     544428            212   \n",
       "26 2018-06-26 23:59:58+00:00       7452            879   \n",
       "27 2018-06-26 23:55:02+00:00        291            162   \n",
       "28 2018-06-26 23:54:54+00:00         37             30   \n",
       "29 2018-06-26 23:53:21+00:00         41            451   \n",
       "..                       ...        ...            ...   \n",
       "26 2018-06-26 23:59:58+00:00        906            721   \n",
       "27 2018-06-26 23:59:58+00:00       1269            365   \n",
       "28 2018-06-26 23:59:58+00:00       6250           5658   \n",
       "29 2018-06-26 23:59:58+00:00       2233            494   \n",
       "30 2018-06-26 23:59:58+00:00       4297           4376   \n",
       "31 2018-06-26 23:59:58+00:00        290           1210   \n",
       "32 2018-06-25 23:59:59+00:00        145            205   \n",
       "33 2018-06-25 23:59:58+00:00       2723           1986   \n",
       "34 2018-06-25 23:59:58+00:00       4489           4998   \n",
       "35 2018-06-25 23:59:58+00:00        169            607   \n",
       "36 2018-06-25 23:59:57+00:00         91            186   \n",
       "37 2018-06-25 23:59:57+00:00       5145           3198   \n",
       "38 2018-06-25 23:59:57+00:00        805           1150   \n",
       "39 2018-06-25 23:59:57+00:00         38            126   \n",
       "40 2018-06-24 23:59:59+00:00       5824           5835   \n",
       "41 2018-06-24 23:59:59+00:00        941            208   \n",
       "42 2018-06-24 23:59:59+00:00        304            313   \n",
       "43 2018-06-24 23:59:58+00:00        750            248   \n",
       "44 2018-06-24 23:59:58+00:00       2765           4921   \n",
       "45 2018-06-24 23:59:58+00:00       1744            228   \n",
       "46 2018-06-24 23:59:58+00:00        216            361   \n",
       "47 2018-06-24 23:59:58+00:00         33            176   \n",
       "48 2018-06-23 23:59:59+00:00       2143           2597   \n",
       "49 2018-06-23 23:59:59+00:00      37177           6292   \n",
       "50 2018-06-23 23:59:59+00:00         43            110   \n",
       "51 2018-06-23 23:59:58+00:00        228            507   \n",
       "52 2018-06-23 23:59:58+00:00         40            151   \n",
       "53 2018-06-23 23:59:58+00:00         39            191   \n",
       "54 2018-06-23 23:59:57+00:00        288            432   \n",
       "55 2018-06-23 23:59:56+00:00        451            444   \n",
       "\n",
       "                                             hashtags  \\\n",
       "0                                                  []   \n",
       "1                                                  []   \n",
       "2                                                  []   \n",
       "3        [{'text': 'TuckFrump', 'indices': [66, 76]}]   \n",
       "4                                                  []   \n",
       "5                                                  []   \n",
       "6                                                  []   \n",
       "7                                                  []   \n",
       "8   [{'text': 'StutteringJohn', 'indices': [88, 10...   \n",
       "9                                                  []   \n",
       "10                                                 []   \n",
       "11                                                 []   \n",
       "12                                                 []   \n",
       "13                                                 []   \n",
       "14                                                 []   \n",
       "15                                                 []   \n",
       "16                                                 []   \n",
       "17                                                 []   \n",
       "18                                                 []   \n",
       "19                                                 []   \n",
       "20                                                 []   \n",
       "21                                                 []   \n",
       "22                                                 []   \n",
       "23                                                 []   \n",
       "24                                                 []   \n",
       "25                                                 []   \n",
       "26                                                 []   \n",
       "27                                                 []   \n",
       "28                                                 []   \n",
       "29                                                 []   \n",
       "..                                                ...   \n",
       "26                                                 []   \n",
       "27                                                 []   \n",
       "28                                                 []   \n",
       "29                                                 []   \n",
       "30                                                 []   \n",
       "31                                                 []   \n",
       "32                                                 []   \n",
       "33                                                 []   \n",
       "34                                                 []   \n",
       "35                                                 []   \n",
       "36                                                 []   \n",
       "37                                                 []   \n",
       "38                                                 []   \n",
       "39                                                 []   \n",
       "40  [{'text': 'TrumpConcentrationCamps', 'indices'...   \n",
       "41                                                 []   \n",
       "42                                                 []   \n",
       "43                                                 []   \n",
       "44                                                 []   \n",
       "45                                                 []   \n",
       "46                                                 []   \n",
       "47                                                 []   \n",
       "48                                                 []   \n",
       "49                                                 []   \n",
       "50                                                 []   \n",
       "51                                                 []   \n",
       "52                                                 []   \n",
       "53                                                 []   \n",
       "54                                                 []   \n",
       "55                                                 []   \n",
       "\n",
       "                    location  statuses_count  \\\n",
       "0       Manila , Philippines           33583   \n",
       "1       Manila , Philippines            5359   \n",
       "2       Manila , Philippines           22461   \n",
       "3       Manila , Philippines           62316   \n",
       "4       Manila , Philippines            5491   \n",
       "5       Manila , Philippines           14301   \n",
       "6       Manila , Philippines          100819   \n",
       "7       Manila , Philippines            5879   \n",
       "8       Manila , Philippines             583   \n",
       "9       Manila , Philippines           35862   \n",
       "10      Manila , Philippines           26806   \n",
       "11      Manila , Philippines            4945   \n",
       "12      Manila , Philippines            1073   \n",
       "13      Manila , Philippines            4702   \n",
       "14      Manila , Philippines          201963   \n",
       "15      Manila , Philippines            8802   \n",
       "16      Manila , Philippines           37555   \n",
       "17      Manila , Philippines           26129   \n",
       "18      Manila , Philippines             917   \n",
       "19      Manila , Philippines            5268   \n",
       "20      Manila , Philippines          602468   \n",
       "21      Manila , Philippines            4507   \n",
       "22      Manila , Philippines           32256   \n",
       "23      Manila , Philippines            1744   \n",
       "24      Manila , Philippines            3211   \n",
       "25      Manila , Philippines          499788   \n",
       "26      Manila , Philippines           19731   \n",
       "27      Manila , Philippines            7863   \n",
       "28      Manila , Philippines           58215   \n",
       "29      Manila , Philippines           14044   \n",
       "..                       ...             ...   \n",
       "26  New York City , New York          107703   \n",
       "27  New York City , New York           34076   \n",
       "28  New York City , New York           22409   \n",
       "29  New York City , New York          311601   \n",
       "30  New York City , New York           42347   \n",
       "31  New York City , New York          144325   \n",
       "32  New York City , New York           35478   \n",
       "33  New York City , New York           42025   \n",
       "34  New York City , New York           27990   \n",
       "35  New York City , New York            1583   \n",
       "36  New York City , New York           26056   \n",
       "37  New York City , New York          425496   \n",
       "38  New York City , New York           60897   \n",
       "39  New York City , New York            1362   \n",
       "40  New York City , New York             478   \n",
       "41  New York City , New York          341641   \n",
       "42  New York City , New York           14166   \n",
       "43  New York City , New York           26266   \n",
       "44  New York City , New York           69653   \n",
       "45  New York City , New York           22462   \n",
       "46  New York City , New York           12461   \n",
       "47  New York City , New York              94   \n",
       "48  New York City , New York            7256   \n",
       "49  New York City , New York          220250   \n",
       "50  New York City , New York            3208   \n",
       "51  New York City , New York           15361   \n",
       "52  New York City , New York             290   \n",
       "53  New York City , New York             483   \n",
       "54  New York City , New York            2079   \n",
       "55  New York City , New York            3615   \n",
       "\n",
       "                                                 text  \n",
       "0   I have a strong feeling  ....... that due to t...  \n",
       "1   @tedcruz It must be a cold day in hell, becaus...  \n",
       "2   Loads of respect for @AndrewCMcCarthy - but if...  \n",
       "3   @BishTrumpsCray @zeitgeistbabe @IvankaTrump @S...  \n",
       "4   The media no longer has any credibility with p...  \n",
       "5   Would you rather have Donald Trump or Kanye We...  \n",
       "6   Did Poland eventually lose World War II becaus...  \n",
       "7   RT @Starshadow: @dimobey @ananavarro \"He was j...  \n",
       "8   Hopefully the Donald has a sense of humor abou...  \n",
       "9   @flotus isn't it true when Melania Trump first...  \n",
       "10  Oprah’s SICK S*x Vid Out And Completely Demoli...  \n",
       "11  @rob1cox  More from our unbiased MSM.   Need a...  \n",
       "12  RT @RinChupeco: White people, journalists who ...  \n",
       "13  @ESSsubreddit @SenSanders No they didn't. Do y...  \n",
       "14  Trump-Putin meeting to follow NATO gathering a...  \n",
       "15  RT @dzIQ990: US President Donald Trump at Russ...  \n",
       "16  RT @RinChupeco: White people, journalists who ...  \n",
       "17  RT @RinChupeco: White people, journalists who ...  \n",
       "18  RT @RinChupeco: White people, journalists who ...  \n",
       "19  @charliekirk11 @realDonaldTrump Trump has quit...  \n",
       "20  Trump trade uncertainty weighs on US stocks as...  \n",
       "21  RT @RinChupeco: White people, journalists who ...  \n",
       "22  What do you think of Trump's presidential run ...  \n",
       "23  RT @marieAnne0915: Im a bit disappointed sa mg...  \n",
       "24  RT @RinChupeco: White people, journalists who ...  \n",
       "25  EU chief says Europe must prepare for the wors...  \n",
       "26  RT @RinChupeco: White people, journalists who ...  \n",
       "27  RT @RinChupeco: White people, journalists who ...  \n",
       "28  Missouri Nail Manufacturer Loses Half Its Busi...  \n",
       "29  RT @RinChupeco: White people, journalists who ...  \n",
       "..                                                ...  \n",
       "26  RT @kimguilfoyle: A HUGE win for President Tru...  \n",
       "27  RT @faiza_n_ali: This just happened. The highe...  \n",
       "28  DOJ watchdog report sheds light on love lives ...  \n",
       "29  RT @true_pundit: Barack Obama Concerned About ...  \n",
       "30  One NBA Legend Attended A Trump Rally And The ...  \n",
       "31  RT @MrDane1982: Give me a fucking break, Berni...  \n",
       "32  RT @maddow: NEW:  MSNBC has obtained the first...  \n",
       "33  Related: for some reason I blocked @Nickelodeo...  \n",
       "34  RT @Kipnis4Congress: .@PhilMurphyNJ @RepBonnie...  \n",
       "35  RT @JoeNBC: Trump Administration report shows ...  \n",
       "36  RT @kylegriffin1: The Family Case Management P...  \n",
       "37  RT @NBCNews: A 20-foot-tall inflatable orange ...  \n",
       "38  RT @maddow: NEW:  MSNBC has obtained the first...  \n",
       "39  RT @kylegriffin1: The Red Hen passed its most ...  \n",
       "40  RT @leecaly: Nice message Melania Trump wore o...  \n",
       "41  RT @NBCNews: This Obama-era pilot program kept...  \n",
       "42  RT @Jamierodr10: Thank you @RealJamesWoods for...  \n",
       "43  RT @gr8tjude: Virginia Lawmakers Rebuke Anti-T...  \n",
       "44  RT @Amy_Siskind: Some things you need to notic...  \n",
       "45  RT @TwitterMoments: California @RepMaxineWater...  \n",
       "46  RT @JoeNBC: I cannot wait to hear Trump apolog...  \n",
       "47  RT @JoeNBC: Trump Administration report shows ...  \n",
       "48  RT @SenSchumer: The Special Counsel’s investig...  \n",
       "49  RT @dicktofel: This story makes a big deal of ...  \n",
       "50  RT @DanRather: When it comes to the Trump Admi...  \n",
       "51  RT @SenSchumer: The Special Counsel’s investig...  \n",
       "52  RT @JoeNBC: “This is beyond narcissism. I used...  \n",
       "53  RT @RubenBaezJr: @PressSec @POTUS Today has be...  \n",
       "54  RT @GrassrootsSpeak: Dear Kim Kardashian\\n\\nLa...  \n",
       "55  @cswany2 @Lawrence I know you're a Trump perso...  \n",
       "\n",
       "[520 rows x 7 columns]"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tweets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "import matplotlib.pyplot as plt\n",
    "timeBetween=[]\n",
    "prevCreatedAt = datetime.strptime(tweets[0]['created_at'],'%a %b %d %H:%M:%S %z %Y')\n",
    "for tweet in tweets[1:]:\n",
    "    #print(tweet['created_at'])\n",
    "    timeBetween.append((prevCreatedAt - datetime.strptime(tweet['created_at'],'%a %b %d %H:%M:%S %z %Y')).total_seconds()/60)\n",
    "    prevCreatedAt = datetime.strptime(tweet['created_at'],'%a %b %d %H:%M:%S %z %Y')\n",
    "plt.plot(range(999),timeBetween)\n",
    "plt.show()\n",
    "plt.plot([datetime.strptime(tweet['created_at'],'%a %b %d %H:%M:%S %z %Y') for tweet in tweets][::-1],range(1000))\n",
    "#plt.x('date',rotation='vertical')\n",
    "plt.show()\n",
    "print((datetime.strptime(tweets[-1]['created_at'],'%a %b %d %H:%M:%S %z %Y')-datetime.strptime(tweets[0]['created_at'],'%a %b %d %H:%M:%S %z %Y')).total_seconds()//60//60)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#This code came from the following link:\n",
    "#https://srome.github.io/Parsing-HTML-Tables-in-Python-with-BeautifulSoup-and-pandas/\n",
    "import requests\n",
    "import pandas as pd\n",
    "from bs4 import BeautifulSoup\n",
    "import re\n",
    "\n",
    "class HTMLTableParser():\n",
    "\n",
    "    def parse_url(self, url):\n",
    "        response = requests.get(url)\n",
    "        soup = BeautifulSoup(response.text, 'lxml')\n",
    "        listylist=[]\n",
    "        for table in soup.find_all('table'):\n",
    "            listylist.append(self.parse_html_table(table))\n",
    "        return listylist\n",
    "\n",
    "    def parse_html_table(self, table):\n",
    "        n_columns = 0; n_rows=0; column_names = []\n",
    "\n",
    "        # Find number of rows and columns\n",
    "        # we also find the column titles if we can\n",
    "        for row in table.find_all('tr'):\n",
    "\n",
    "            # Determine the number of rows in the table\n",
    "            td_tags = row.find_all('td')\n",
    "            if len(td_tags) > 0:\n",
    "                n_rows+=1\n",
    "                if n_columns == 0:\n",
    "                    # Set the number of columns for our table\n",
    "                    n_columns = len(td_tags)\n",
    "\n",
    "            # Handle column names if we find them\n",
    "            th_tags = row.find_all('th') \n",
    "            if len(th_tags) > 0 and len(column_names) == 0:\n",
    "                for th in th_tags:\n",
    "                    column_names.append(th.get_text())\n",
    "\n",
    "        # Safeguard on Column Titles\n",
    "        if len(column_names) > 0 and len(column_names) != n_columns:\n",
    "            raise Exception(\"Column titles do not match the number of columns\")\n",
    "\n",
    "        columns = column_names if len(column_names) > 0 else range(0,n_columns)\n",
    "        df = pd.DataFrame(columns = columns,\n",
    "                          index= range(0,n_rows))\n",
    "        row_marker = 0\n",
    "        for row in table.find_all('tr'):\n",
    "            column_marker = 0\n",
    "            columns = row.find_all('td')\n",
    "            for column in columns:\n",
    "                df.iat[row_marker,column_marker] = column.get_text()\n",
    "                column_marker += 1\n",
    "            if len(columns) > 0:\n",
    "                row_marker += 1\n",
    "\n",
    "        # Convert to float if possible\n",
    "        for col in df:\n",
    "            try:\n",
    "                df[col] = df[col].astype(float)\n",
    "            except ValueError:\n",
    "                pass\n",
    "\n",
    "        return df\n",
    "\n",
    "#TODO: Use the .apply to just change the table to one dialect. Imrpove language scope later.\n",
    "obj = HTMLTableParser()\n",
    "df = obj.parse_url('https://www.infoplease.com/world/countries-world/languages-spoken-each-country-world')\n",
    "countryLanguages = df[0].rename(columns={0:'country',1:'language'}).set_index('country')\n",
    "newDict = []\n",
    "for index, row in countryLanguages.iterrows():\n",
    "    language = re.sub('\\d+|%|\\(.*\\)|\\s','',countryLanguages.loc[index].values[0]).split(',')[0].split(';')[0]\n",
    "    newDict.append([index, language]) #print(index,\": \",language)\n",
    "newDict\n",
    "\n",
    "newDf = pd.DataFrame(newDict)\n",
    "newDf = newDf.rename(columns={0:'country',1:'language'}).set_index('country')\n",
    "newDf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "      <th>languages</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>country</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Afghanistan</th>\n",
       "      <td>Dari Persian, Pashtu (both official), other Tu...</td>\n",
       "      <td>DariPersian</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Albania</th>\n",
       "      <td>Albanian (Tosk is the official dialect), Greek</td>\n",
       "      <td>Albanian</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Algeria</th>\n",
       "      <td>Arabic (official), French, Berber dialects</td>\n",
       "      <td>Arabic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Andorra</th>\n",
       "      <td>Catalán (official), French, Castilian, Portuguese</td>\n",
       "      <td>Catalán</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Angola</th>\n",
       "      <td>Portuguese (official), Bantu and other African...</td>\n",
       "      <td>Portuguese</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Antigua and Barbuda</th>\n",
       "      <td>English (official), local dialects</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Argentina</th>\n",
       "      <td>Spanish (official), English, Italian, German, ...</td>\n",
       "      <td>Spanish</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Armenia</th>\n",
       "      <td>Armenian 98%, Yezidi, Russian</td>\n",
       "      <td>Armenian</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Australia</th>\n",
       "      <td>English 79%, native and other languages</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Austria</th>\n",
       "      <td>German (official nationwide); Slovene, Croatia...</td>\n",
       "      <td>German</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Azerbaijan</th>\n",
       "      <td>Azerbaijani Turkic 89%, Russian 3%, Armenian 2...</td>\n",
       "      <td>AzerbaijaniTurkic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bahamas</th>\n",
       "      <td>English (official), Creole (among Haitian immi...</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bahrain</th>\n",
       "      <td>Arabic, English, Farsi, Urdu</td>\n",
       "      <td>Arabic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bangladesh</th>\n",
       "      <td>Bangla (official), English</td>\n",
       "      <td>Bangla</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Barbados</th>\n",
       "      <td>English</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Belarus</th>\n",
       "      <td>Belorussian (White Russian), Russian, other</td>\n",
       "      <td>Belorussian</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Belgium</th>\n",
       "      <td>Dutch (Flemish) 60%, French 40%, German less t...</td>\n",
       "      <td>Dutch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Belize</th>\n",
       "      <td>English (official), Spanish, Mayan, Garifuna (...</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Benin</th>\n",
       "      <td>French (official), Fon, Yoruba, tribal languages</td>\n",
       "      <td>French</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bhutan</th>\n",
       "      <td>Dzongkha (official), Tibetan dialects (among B...</td>\n",
       "      <td>Dzongkha</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bolivia</th>\n",
       "      <td>Spanish, Quechua, Aymara (all official)</td>\n",
       "      <td>Spanish</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bosnia and Herzegovina</th>\n",
       "      <td>Bosnian, Croatian, Serbian</td>\n",
       "      <td>Bosnian</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Botswana</th>\n",
       "      <td>English 2% (official), Setswana 78%, Kalanga 8...</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Brazil</th>\n",
       "      <td>Portuguese (official), Spanish, English, French</td>\n",
       "      <td>Portuguese</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Brunei</th>\n",
       "      <td>Malay (official), English, Chinese</td>\n",
       "      <td>Malay</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bulgaria</th>\n",
       "      <td>Bulgarian 85%, Turkish 10%, Roma 4%</td>\n",
       "      <td>Bulgarian</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Burkina Faso</th>\n",
       "      <td>French (official); native African (Sudanic) la...</td>\n",
       "      <td>Frenchlanguages</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Burundi</th>\n",
       "      <td>Kirundi and French (official), Swahili</td>\n",
       "      <td>KirundiandFrench</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cambodia</th>\n",
       "      <td>Khmer 95% (official), French, English</td>\n",
       "      <td>Khmer</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cameroon</th>\n",
       "      <td>French, English (both official); 24 major Afri...</td>\n",
       "      <td>French</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Swaziland</th>\n",
       "      <td>English, siSwati (both official)</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sweden</th>\n",
       "      <td>Swedish, small Sami- and Finnish-speaking mino...</td>\n",
       "      <td>Swedish</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Switzerland</th>\n",
       "      <td>German 64%, French 20%, Italian 7% (all offici...</td>\n",
       "      <td>German</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Syria</th>\n",
       "      <td>Arabic (official); Kurdish, Armenian, Aramaic,...</td>\n",
       "      <td>Arabic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Taiwan</th>\n",
       "      <td>Chinese (Mandarin, official), Taiwanese (Min),...</td>\n",
       "      <td>Chinese</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tajikistan</th>\n",
       "      <td>Tajik (official), Russian widely used in gover...</td>\n",
       "      <td>Tajik</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tanzania</th>\n",
       "      <td>Swahili, English (both official); Arabic; many...</td>\n",
       "      <td>Swahili</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thailand</th>\n",
       "      <td>Thai (Siamese), English (secondary language of...</td>\n",
       "      <td>Thai</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Togo</th>\n",
       "      <td>French (official, commerce); Ewé, Mina (south)...</td>\n",
       "      <td>French</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tonga</th>\n",
       "      <td>Tongan (an Austronesian language), English</td>\n",
       "      <td>Tongan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Trinidad and Tobago</th>\n",
       "      <td>English (official), Hindi, French, Spanish, Ch...</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tunisia</th>\n",
       "      <td>Arabic (official, commerce), French (commerce)</td>\n",
       "      <td>Arabic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Turkey</th>\n",
       "      <td>Turkish (official), Kurdish, Dimli, Azeri, Kab...</td>\n",
       "      <td>Turkish</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Turkmenistan</th>\n",
       "      <td>Turkmen 72%; Russian 12%; Uzbek 9%, other 7%</td>\n",
       "      <td>Turkmen</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tuvalu</th>\n",
       "      <td>Tuvaluan, English, Samoan, Kiribati (on the is...</td>\n",
       "      <td>Tuvaluan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Uganda</th>\n",
       "      <td>English (official), Ganda or Luganda, other Ni...</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ukraine</th>\n",
       "      <td>Ukrainian 67%, Russian 24%, Romanian, Polish, ...</td>\n",
       "      <td>Ukrainian</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>United Arab Emirates</th>\n",
       "      <td>Arabic (official), Persian, English, Hindi, Urdu</td>\n",
       "      <td>Arabic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>United Kingdom</th>\n",
       "      <td>English, Welsh, Scots Gaelic</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>United States</th>\n",
       "      <td>English 82%, Spanish 11% (2000)</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Uruguay</th>\n",
       "      <td>Spanish, Portunol, or Brazilero</td>\n",
       "      <td>Spanish</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Uzbekistan</th>\n",
       "      <td>Uzbek 74.3%, Russian 14.2%, Tajik 4.4%, other ...</td>\n",
       "      <td>Uzbek.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Vanuatu</th>\n",
       "      <td>Bislama 23% (a Melanesian pidgin English), Eng...</td>\n",
       "      <td>Bislama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Vatican City (Holy See)</th>\n",
       "      <td>Italian, Latin, French, various other languages</td>\n",
       "      <td>Italian</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Venezuela</th>\n",
       "      <td>Spanish (official), numerous indigenous dialects</td>\n",
       "      <td>Spanish</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Vietnam</th>\n",
       "      <td>Vietnamese (official); English (increasingly f...</td>\n",
       "      <td>Vietnamese</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Western Sahara (proposed state)</th>\n",
       "      <td>Hassaniya Arabic, Moroccan Arabic</td>\n",
       "      <td>HassaniyaArabic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yemen</th>\n",
       "      <td>Arabic</td>\n",
       "      <td>Arabic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Zambia</th>\n",
       "      <td>English (official); major vernaculars: Bemba, ...</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Zimbabwe</th>\n",
       "      <td>English (official), Shona, Ndebele (Sindebele)...</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>198 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                          language  \\\n",
       "country                                                                              \n",
       "Afghanistan                      Dari Persian, Pashtu (both official), other Tu...   \n",
       "Albania                             Albanian (Tosk is the official dialect), Greek   \n",
       "Algeria                                 Arabic (official), French, Berber dialects   \n",
       "Andorra                          Catalán (official), French, Castilian, Portuguese   \n",
       "Angola                           Portuguese (official), Bantu and other African...   \n",
       "Antigua and Barbuda                             English (official), local dialects   \n",
       "Argentina                        Spanish (official), English, Italian, German, ...   \n",
       "Armenia                                              Armenian 98%, Yezidi, Russian   \n",
       "Australia                                  English 79%, native and other languages   \n",
       "Austria                          German (official nationwide); Slovene, Croatia...   \n",
       "Azerbaijan                       Azerbaijani Turkic 89%, Russian 3%, Armenian 2...   \n",
       "Bahamas                          English (official), Creole (among Haitian immi...   \n",
       "Bahrain                                               Arabic, English, Farsi, Urdu   \n",
       "Bangladesh                                              Bangla (official), English   \n",
       "Barbados                                                                   English   \n",
       "Belarus                                Belorussian (White Russian), Russian, other   \n",
       "Belgium                          Dutch (Flemish) 60%, French 40%, German less t...   \n",
       "Belize                           English (official), Spanish, Mayan, Garifuna (...   \n",
       "Benin                             French (official), Fon, Yoruba, tribal languages   \n",
       "Bhutan                           Dzongkha (official), Tibetan dialects (among B...   \n",
       "Bolivia                                    Spanish, Quechua, Aymara (all official)   \n",
       "Bosnia and Herzegovina                                  Bosnian, Croatian, Serbian   \n",
       "Botswana                         English 2% (official), Setswana 78%, Kalanga 8...   \n",
       "Brazil                             Portuguese (official), Spanish, English, French   \n",
       "Brunei                                          Malay (official), English, Chinese   \n",
       "Bulgaria                                       Bulgarian 85%, Turkish 10%, Roma 4%   \n",
       "Burkina Faso                     French (official); native African (Sudanic) la...   \n",
       "Burundi                                     Kirundi and French (official), Swahili   \n",
       "Cambodia                                     Khmer 95% (official), French, English   \n",
       "Cameroon                         French, English (both official); 24 major Afri...   \n",
       "...                                                                            ...   \n",
       "Swaziland                                         English, siSwati (both official)   \n",
       "Sweden                           Swedish, small Sami- and Finnish-speaking mino...   \n",
       "Switzerland                      German 64%, French 20%, Italian 7% (all offici...   \n",
       "Syria                            Arabic (official); Kurdish, Armenian, Aramaic,...   \n",
       "Taiwan                           Chinese (Mandarin, official), Taiwanese (Min),...   \n",
       "Tajikistan                       Tajik (official), Russian widely used in gover...   \n",
       "Tanzania                         Swahili, English (both official); Arabic; many...   \n",
       "Thailand                         Thai (Siamese), English (secondary language of...   \n",
       "Togo                             French (official, commerce); Ewé, Mina (south)...   \n",
       "Tonga                                   Tongan (an Austronesian language), English   \n",
       "Trinidad and Tobago              English (official), Hindi, French, Spanish, Ch...   \n",
       "Tunisia                             Arabic (official, commerce), French (commerce)   \n",
       "Turkey                           Turkish (official), Kurdish, Dimli, Azeri, Kab...   \n",
       "Turkmenistan                          Turkmen 72%; Russian 12%; Uzbek 9%, other 7%   \n",
       "Tuvalu                           Tuvaluan, English, Samoan, Kiribati (on the is...   \n",
       "Uganda                           English (official), Ganda or Luganda, other Ni...   \n",
       "Ukraine                          Ukrainian 67%, Russian 24%, Romanian, Polish, ...   \n",
       "United Arab Emirates              Arabic (official), Persian, English, Hindi, Urdu   \n",
       "United Kingdom                                        English, Welsh, Scots Gaelic   \n",
       "United States                                      English 82%, Spanish 11% (2000)   \n",
       "Uruguay                                            Spanish, Portunol, or Brazilero   \n",
       "Uzbekistan                       Uzbek 74.3%, Russian 14.2%, Tajik 4.4%, other ...   \n",
       "Vanuatu                          Bislama 23% (a Melanesian pidgin English), Eng...   \n",
       "Vatican City (Holy See)            Italian, Latin, French, various other languages   \n",
       "Venezuela                         Spanish (official), numerous indigenous dialects   \n",
       "Vietnam                          Vietnamese (official); English (increasingly f...   \n",
       "Western Sahara (proposed state)                  Hassaniya Arabic, Moroccan Arabic   \n",
       "Yemen                                                                       Arabic   \n",
       "Zambia                           English (official); major vernaculars: Bemba, ...   \n",
       "Zimbabwe                         English (official), Shona, Ndebele (Sindebele)...   \n",
       "\n",
       "                                         languages  \n",
       "country                                             \n",
       "Afghanistan                            DariPersian  \n",
       "Albania                                   Albanian  \n",
       "Algeria                                     Arabic  \n",
       "Andorra                                    Catalán  \n",
       "Angola                                  Portuguese  \n",
       "Antigua and Barbuda                        English  \n",
       "Argentina                                  Spanish  \n",
       "Armenia                                   Armenian  \n",
       "Australia                                  English  \n",
       "Austria                                     German  \n",
       "Azerbaijan                       AzerbaijaniTurkic  \n",
       "Bahamas                                    English  \n",
       "Bahrain                                     Arabic  \n",
       "Bangladesh                                  Bangla  \n",
       "Barbados                                   English  \n",
       "Belarus                                Belorussian  \n",
       "Belgium                                      Dutch  \n",
       "Belize                                     English  \n",
       "Benin                                       French  \n",
       "Bhutan                                    Dzongkha  \n",
       "Bolivia                                    Spanish  \n",
       "Bosnia and Herzegovina                     Bosnian  \n",
       "Botswana                                   English  \n",
       "Brazil                                  Portuguese  \n",
       "Brunei                                       Malay  \n",
       "Bulgaria                                 Bulgarian  \n",
       "Burkina Faso                       Frenchlanguages  \n",
       "Burundi                           KirundiandFrench  \n",
       "Cambodia                                     Khmer  \n",
       "Cameroon                                    French  \n",
       "...                                            ...  \n",
       "Swaziland                                  English  \n",
       "Sweden                                     Swedish  \n",
       "Switzerland                                 German  \n",
       "Syria                                       Arabic  \n",
       "Taiwan                                     Chinese  \n",
       "Tajikistan                                   Tajik  \n",
       "Tanzania                                   Swahili  \n",
       "Thailand                                      Thai  \n",
       "Togo                                        French  \n",
       "Tonga                                       Tongan  \n",
       "Trinidad and Tobago                        English  \n",
       "Tunisia                                     Arabic  \n",
       "Turkey                                     Turkish  \n",
       "Turkmenistan                               Turkmen  \n",
       "Tuvalu                                    Tuvaluan  \n",
       "Uganda                                     English  \n",
       "Ukraine                                  Ukrainian  \n",
       "United Arab Emirates                        Arabic  \n",
       "United Kingdom                             English  \n",
       "United States                              English  \n",
       "Uruguay                                    Spanish  \n",
       "Uzbekistan                                  Uzbek.  \n",
       "Vanuatu                                    Bislama  \n",
       "Vatican City (Holy See)                    Italian  \n",
       "Venezuela                                  Spanish  \n",
       "Vietnam                                 Vietnamese  \n",
       "Western Sahara (proposed state)    HassaniyaArabic  \n",
       "Yemen                                       Arabic  \n",
       "Zambia                                     English  \n",
       "Zimbabwe                                   English  \n",
       "\n",
       "[198 rows x 2 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = parse_url('https://www.infoplease.com/world/countries-world/languages-spoken-each-country-world')\n",
    "countryLanguages = df[0].rename(columns={0:'country',1:'language'}).set_index('country')\n",
    "countryLanguages['languages'] = [re.sub('\\d+|%|\\(.*\\)|\\s','',i).split(',')[0].split(';')[0] for i in countryLanguages['language']]\n",
    "countryLanguages    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "translator=Translator()\n",
    "translator.translate('Hola me llamo Jennifer 😜😜', dest='en').text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'contributors': None,\n",
       " 'coordinates': None,\n",
       " 'created_at': 'Sat Jun 30 19:33:55 +0000 2018',\n",
       " 'entities': {'hashtags': [{'indices': [87, 93], 'text': 'Tibet'},\n",
       "   {'indices': [94, 103], 'text': 'Tibetans'},\n",
       "   {'indices': [104, 114], 'text': 'FreeTibet'}],\n",
       "  'symbols': [],\n",
       "  'urls': [{'display_url': 'shar.es/anBniE',\n",
       "    'expanded_url': 'https://shar.es/anBniE',\n",
       "    'indices': [47, 70],\n",
       "    'url': 'https://t.co/zAKFBK5Mna'},\n",
       "   {'display_url': 'twitter.com/i/web/status/1…',\n",
       "    'expanded_url': 'https://twitter.com/i/web/status/1013143614623440896',\n",
       "    'indices': [116, 139],\n",
       "    'url': 'https://t.co/9GU9LWHwGp'}],\n",
       "  'user_mentions': [{'id': 23065876,\n",
       "    'id_str': '23065876',\n",
       "    'indices': [75, 85],\n",
       "    'name': 'Tsem Tulku Rinpoche',\n",
       "    'screen_name': 'tsemtulku'}]},\n",
       " 'favorite_count': 0,\n",
       " 'favorited': False,\n",
       " 'geo': None,\n",
       " 'id': 1013143614623440896,\n",
       " 'id_str': '1013143614623440896',\n",
       " 'in_reply_to_screen_name': None,\n",
       " 'in_reply_to_status_id': None,\n",
       " 'in_reply_to_status_id_str': None,\n",
       " 'in_reply_to_user_id': None,\n",
       " 'in_reply_to_user_id_str': None,\n",
       " 'is_quote_status': False,\n",
       " 'lang': 'en',\n",
       " 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'},\n",
       " 'place': None,\n",
       " 'possibly_sensitive': False,\n",
       " 'retweet_count': 0,\n",
       " 'retweeted': False,\n",
       " 'source': '<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>',\n",
       " 'text': 'The Sixty-Four Yogini Temple of Hirapur, India https://t.co/zAKFBK5Mna via @tsemtulku\\n\\n#Tibet\\n#Tibetans\\n#FreeTibet… https://t.co/9GU9LWHwGp',\n",
       " 'truncated': True,\n",
       " 'user': {'contributors_enabled': False,\n",
       "  'created_at': 'Wed Nov 18 10:24:35 +0000 2015',\n",
       "  'default_profile': False,\n",
       "  'default_profile_image': False,\n",
       "  'description': '',\n",
       "  'entities': {'description': {'urls': []}},\n",
       "  'favourites_count': 7051,\n",
       "  'follow_request_sent': False,\n",
       "  'followers_count': 288,\n",
       "  'following': False,\n",
       "  'friends_count': 181,\n",
       "  'geo_enabled': True,\n",
       "  'has_extended_profile': True,\n",
       "  'id': 4277739734,\n",
       "  'id_str': '4277739734',\n",
       "  'is_translation_enabled': False,\n",
       "  'is_translator': False,\n",
       "  'lang': 'en',\n",
       "  'listed_count': 74,\n",
       "  'location': 'Bentung, Pahang',\n",
       "  'name': 'Chris Chong',\n",
       "  'notifications': False,\n",
       "  'profile_background_color': '000000',\n",
       "  'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',\n",
       "  'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',\n",
       "  'profile_background_tile': False,\n",
       "  'profile_banner_url': 'https://pbs.twimg.com/profile_banners/4277739734/1524960980',\n",
       "  'profile_image_url': 'http://pbs.twimg.com/profile_images/780584831935586304/GLHvkq-Q_normal.jpg',\n",
       "  'profile_image_url_https': 'https://pbs.twimg.com/profile_images/780584831935586304/GLHvkq-Q_normal.jpg',\n",
       "  'profile_link_color': '1B95E0',\n",
       "  'profile_sidebar_border_color': '000000',\n",
       "  'profile_sidebar_fill_color': '000000',\n",
       "  'profile_text_color': '000000',\n",
       "  'profile_use_background_image': False,\n",
       "  'protected': False,\n",
       "  'screen_name': 'chrischong90',\n",
       "  'statuses_count': 9983,\n",
       "  'time_zone': None,\n",
       "  'translator_type': 'none',\n",
       "  'url': None,\n",
       "  'utc_offset': None,\n",
       "  'verified': False}}"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Setup Tweepy API Authentication\n",
    "auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
    "auth.set_access_token(access_token, access_token_secret)\n",
    "api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())\n",
    "tweets = api.search('dalailama')\n",
    "tweets['statuses'][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>population</th>\n",
       "      <th>density</th>\n",
       "      <th>country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Manila</td>\n",
       "      <td>1780148</td>\n",
       "      <td>107561</td>\n",
       "      <td>Philippines</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Ebeye</td>\n",
       "      <td>15000</td>\n",
       "      <td>107143</td>\n",
       "      <td>Marshall Islands</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Pateros (Municipality)</td>\n",
       "      <td>64147</td>\n",
       "      <td>79114</td>\n",
       "      <td>Philippines</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Mumbai</td>\n",
       "      <td>12478447</td>\n",
       "      <td>73837</td>\n",
       "      <td>India</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Dhaka</td>\n",
       "      <td>8523137</td>\n",
       "      <td>73583</td>\n",
       "      <td>Bangladesh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Bnei Brak</td>\n",
       "      <td>200162</td>\n",
       "      <td>73159</td>\n",
       "      <td>Israel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Caloocan</td>\n",
       "      <td>1489040</td>\n",
       "      <td>72302</td>\n",
       "      <td>Philippines</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Levallois-Perret</td>\n",
       "      <td>63436</td>\n",
       "      <td>68458</td>\n",
       "      <td>France</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Le Pré-Saint-Gervais</td>\n",
       "      <td>18121</td>\n",
       "      <td>67047</td>\n",
       "      <td>France</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Chennai</td>\n",
       "      <td>4681087</td>\n",
       "      <td>66961</td>\n",
       "      <td>India</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Vincennes</td>\n",
       "      <td>48689</td>\n",
       "      <td>66371</td>\n",
       "      <td>France</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Saint-Mandé</td>\n",
       "      <td>22627</td>\n",
       "      <td>65115</td>\n",
       "      <td>France</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Bally</td>\n",
       "      <td>291972</td>\n",
       "      <td>64031</td>\n",
       "      <td>India</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Kolkata</td>\n",
       "      <td>4486679</td>\n",
       "      <td>62813</td>\n",
       "      <td>India</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Saint-Josse-ten-Noode</td>\n",
       "      <td>27548</td>\n",
       "      <td>62404</td>\n",
       "      <td>Belgium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Kathmandu</td>\n",
       "      <td>1183000</td>\n",
       "      <td>61972</td>\n",
       "      <td>Nepal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Subang Jaya</td>\n",
       "      <td>1683589</td>\n",
       "      <td>38482</td>\n",
       "      <td>Malaysia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Neapoli</td>\n",
       "      <td>27084</td>\n",
       "      <td>60186</td>\n",
       "      <td>Greece</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Montrouge</td>\n",
       "      <td>48410</td>\n",
       "      <td>59705</td>\n",
       "      <td>France</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Malé</td>\n",
       "      <td>133412</td>\n",
       "      <td>59559</td>\n",
       "      <td>Maldives</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Malabon</td>\n",
       "      <td>353337</td>\n",
       "      <td>58607</td>\n",
       "      <td>Philippines</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>Guttenberg, New Jersey</td>\n",
       "      <td>11481</td>\n",
       "      <td>58577</td>\n",
       "      <td>United States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Pasig</td>\n",
       "      <td>669773</td>\n",
       "      <td>55958</td>\n",
       "      <td>Philippines</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Paris</td>\n",
       "      <td>2265886</td>\n",
       "      <td>55673</td>\n",
       "      <td>France</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Mislata</td>\n",
       "      <td>43756</td>\n",
       "      <td>54695</td>\n",
       "      <td>Spain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Macau</td>\n",
       "      <td>643100</td>\n",
       "      <td>54790</td>\n",
       "      <td>China</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Kallithea</td>\n",
       "      <td>100050</td>\n",
       "      <td>54733</td>\n",
       "      <td>Greece</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Nea Smyrni</td>\n",
       "      <td>73090</td>\n",
       "      <td>53717</td>\n",
       "      <td>Greece</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Howrah</td>\n",
       "      <td>1072161</td>\n",
       "      <td>53670</td>\n",
       "      <td>India</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Pasay</td>\n",
       "      <td>392869</td>\n",
       "      <td>53554</td>\n",
       "      <td>Philippines</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>San Juan</td>\n",
       "      <td>121430</td>\n",
       "      <td>52946</td>\n",
       "      <td>Philippines</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>Colombo</td>\n",
       "      <td>323257</td>\n",
       "      <td>52871</td>\n",
       "      <td>Sri Lanka</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>L'Hospitalet de Llobregat</td>\n",
       "      <td>252171</td>\n",
       "      <td>52536</td>\n",
       "      <td>Spain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>Union City, New Jersey</td>\n",
       "      <td>66455</td>\n",
       "      <td>51810</td>\n",
       "      <td>United States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>Cairo</td>\n",
       "      <td>11742120</td>\n",
       "      <td>50180</td>\n",
       "      <td>Egypt</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Makati</td>\n",
       "      <td>529039</td>\n",
       "      <td>50080</td>\n",
       "      <td>Philippines</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>West New York, New Jersey</td>\n",
       "      <td>49708</td>\n",
       "      <td>49362</td>\n",
       "      <td>United States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>Saint-Gilles</td>\n",
       "      <td>46931</td>\n",
       "      <td>48234</td>\n",
       "      <td>Belgium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Bandung</td>\n",
       "      <td>3075478</td>\n",
       "      <td>48054</td>\n",
       "      <td>Indonesia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Monaco</td>\n",
       "      <td>36950</td>\n",
       "      <td>47372</td>\n",
       "      <td>Monaco</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>Boulogne-Billancourt</td>\n",
       "      <td>113085</td>\n",
       "      <td>47240</td>\n",
       "      <td>France</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>Quezon City</td>\n",
       "      <td>2936116</td>\n",
       "      <td>45999</td>\n",
       "      <td>Philippines</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         city  population  density           country\n",
       "1                      Manila     1780148   107561       Philippines\n",
       "2                       Ebeye       15000   107143  Marshall Islands\n",
       "3      Pateros (Municipality)       64147    79114       Philippines\n",
       "4                      Mumbai    12478447    73837             India\n",
       "5                       Dhaka     8523137    73583        Bangladesh\n",
       "6                   Bnei Brak      200162    73159            Israel\n",
       "7                    Caloocan     1489040    72302       Philippines\n",
       "8            Levallois-Perret       63436    68458            France\n",
       "9       Le Pré-Saint-Gervais       18121    67047            France\n",
       "10                    Chennai     4681087    66961             India\n",
       "11                  Vincennes       48689    66371            France\n",
       "12               Saint-Mandé       22627    65115            France\n",
       "13                      Bally      291972    64031             India\n",
       "14                    Kolkata     4486679    62813             India\n",
       "15      Saint-Josse-ten-Noode       27548    62404           Belgium\n",
       "16                  Kathmandu     1183000    61972             Nepal\n",
       "17                Subang Jaya     1683589    38482          Malaysia\n",
       "18                    Neapoli       27084    60186            Greece\n",
       "19                  Montrouge       48410    59705            France\n",
       "20                      Malé      133412    59559          Maldives\n",
       "21                    Malabon      353337    58607       Philippines\n",
       "22     Guttenberg, New Jersey       11481    58577     United States\n",
       "23                      Pasig      669773    55958       Philippines\n",
       "24                      Paris     2265886    55673            France\n",
       "25                    Mislata       43756    54695             Spain\n",
       "26                      Macau      643100    54790             China\n",
       "27                  Kallithea      100050    54733            Greece\n",
       "28                 Nea Smyrni       73090    53717            Greece\n",
       "29                     Howrah     1072161    53670             India\n",
       "30                      Pasay      392869    53554       Philippines\n",
       "31                   San Juan      121430    52946       Philippines\n",
       "32                    Colombo      323257    52871         Sri Lanka\n",
       "33  L'Hospitalet de Llobregat      252171    52536             Spain\n",
       "34     Union City, New Jersey       66455    51810     United States\n",
       "35                      Cairo    11742120    50180             Egypt\n",
       "36                     Makati      529039    50080       Philippines\n",
       "37  West New York, New Jersey       49708    49362     United States\n",
       "38               Saint-Gilles       46931    48234           Belgium\n",
       "39                    Bandung     3075478    48054         Indonesia\n",
       "40                     Monaco       36950    47372            Monaco\n",
       "41       Boulogne-Billancourt      113085    47240            France\n",
       "42                Quezon City     2936116    45999       Philippines"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import wikipedia as wp\n",
    "from pygeocoder import Geocoder\n",
    "import time\n",
    "import re\n",
    "    \n",
    "#-- Get the most populated cities from wikipedia (Thank you wikipedia library!) --\n",
    "html = wp.page(\"List_of_cities_by_population_density\").html().encode(\"UTF-8\")\n",
    "df = pd.read_html(html)[1]\n",
    "df=df.drop([2,3,4],axis=1)\n",
    "df=df.rename(columns={0:'city',1:'population',5:'density',6:'country'})\n",
    "df=df.iloc[1:]\n",
    "df['population']=[int(city.split('\\xa0')[-1].split('[')[0].replace(',','')) for city in df['population']]\n",
    "df['density']=[int(city.split('\\xa0')[-1].split('[')[0].replace(',','')) for city in df['density']]\n",
    "df\n",
    "#--- ---- ----- ----- ---- ---- ----- ---- ---- ----- --- ---- ---- ----- ---- ---"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1780148,\n",
       " 15000,\n",
       " 64147,\n",
       " 12478447,\n",
       " 8523137,\n",
       " 200162,\n",
       " 1489040,\n",
       " 63436,\n",
       " 18121,\n",
       " 4681087,\n",
       " 48689,\n",
       " 22627,\n",
       " 291972,\n",
       " 4486679,\n",
       " 27548,\n",
       " 1183000,\n",
       " 1683589,\n",
       " 27084,\n",
       " 48410,\n",
       " 133412,\n",
       " 353337,\n",
       " 11481,\n",
       " 669773,\n",
       " 2265886,\n",
       " 43756,\n",
       " 643100,\n",
       " 100050,\n",
       " 73090,\n",
       " 1072161,\n",
       " 392869,\n",
       " 121430,\n",
       " 323257,\n",
       " 252171,\n",
       " 66455,\n",
       " 11742120,\n",
       " 529039,\n",
       " 49708,\n",
       " 46931,\n",
       " 3075478,\n",
       " 36950,\n",
       " 113085,\n",
       " 2936116]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[int(city.split('\\xa0')[-1].split('[')[0].replace(',','')) for city in df['population']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>followers</th>\n",
       "      <th>following</th>\n",
       "      <th>geo</th>\n",
       "      <th>hashtags</th>\n",
       "      <th>statuses_count</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>154060</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>22455</td>\n",
       "      <td>@ryapee Hi Rya order ako ulit polyblender mejo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>83</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>3030</td>\n",
       "      <td>Yung dating saling pusa naging aso bigla // 🎶</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>643</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[{'text': 'AdoptDontShop', 'indices': [38, 52]}]</td>\n",
       "      <td>8529</td>\n",
       "      <td>sana meron din dito sa Pilipinas yung #AdoptDo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2729</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>28771</td>\n",
       "      <td>Aso nga kasi ako, bantay ako dito hahaha 😂</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>25</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>14</td>\n",
       "      <td>lakas mangahol ng kaklase ko dinaig pa aso nam...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>116</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>1993</td>\n",
       "      <td>@LampanoElla Dun sa aso oo HAHA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>113</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>4495</td>\n",
       "      <td>Me: labas mo dila para lumabas dila ng aso \\nS...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>130</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>1745</td>\n",
       "      <td>cute kong aso https://t.co/twDprm7o5P</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>54</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>727</td>\n",
       "      <td>me: pabili pong dog food\\ntindero: alin? ung p...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>24</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>296</td>\n",
       "      <td>@dsgalarpez hahahaha aso ka na ba ngayon?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>841</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>29025</td>\n",
       "      <td>@DenniceRoselle Uy kawawa mga aso. Di naman si...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>260</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>8486</td>\n",
       "      <td>Nagtanggal tuloy ako nang mga tae nang aso kai...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>260</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>8486</td>\n",
       "      <td>Nakakapikon ung aso 😭😭</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>176</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>4682</td>\n",
       "      <td>May mga sakit aso namin hanep</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1357</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>15736</td>\n",
       "      <td>i hate when strangers esp. men look at you str...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>422</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>21045</td>\n",
       "      <td>@dnnkthryn Ngek malas. Sa rosewood naman okay ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>422</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>21045</td>\n",
       "      <td>@dnnkthryn Yup. Iba talaga pag bahay super fre...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>365</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>2083</td>\n",
       "      <td>Tang ina sobrang iba pala pakiramdam pag namat...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>75</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>243</td>\n",
       "      <td>aso't pusa ❤\\ngoodmorning hubby babe ! https:/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1021</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>11119</td>\n",
       "      <td>RT @akoposimarcelo: Yung buti pa yung mga aso,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>47</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>905</td>\n",
       "      <td>napaka clingy ng aso ko, nebeyen hehe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>147</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>3406</td>\n",
       "      <td>ang laki ng aso!!! panay naka tahol</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>269</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>21310</td>\n",
       "      <td>Alam ata ng aso ko na birthday ko ngayon. Iba ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>548</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>3553</td>\n",
       "      <td>Nagduduet nanaman yung dalawang aso hays</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>88</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>1470</td>\n",
       "      <td>Yung aso naming maligalig na palundag lundag pa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>394</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>67411</td>\n",
       "      <td>Mukha talaga akong tanga kapag nakikipaglaro a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>105</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>417</td>\n",
       "      <td>RT @akoposimarcelo: Yung buti pa yung mga aso,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>174</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>4033</td>\n",
       "      <td>nagdisitahulan mga aso ang creepy huhu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>90</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>828</td>\n",
       "      <td>@glbysrcmny aso</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>850</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>11211</td>\n",
       "      <td>Distemper virus. may ganyan plang sakit nang m...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>80</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>1208</td>\n",
       "      <td>Plus two agad aso namen AAHAHAHAHAH saya</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>180</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>6216</td>\n",
       "      <td>@jhnlstrpgnsn Hahaha hindi ko aso yun sa ate k...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>1280</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>5483</td>\n",
       "      <td>RT @akoposimarcelo: Yung buti pa yung mga aso,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>74</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>2055</td>\n",
       "      <td>Mama: Tanga Di Mo Pa Pinapakain Yung Aso.\\n\\nT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74</th>\n",
       "      <td>72</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>456</td>\n",
       "      <td>@cescamarii di yan kusa aso yan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>365</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>2083</td>\n",
       "      <td>Nasagasaan aso ko 😢</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>301</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>4788</td>\n",
       "      <td>Ngayon ko nalang na appreciate ulit yung ganda...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>561</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>13927</td>\n",
       "      <td>pag gantong nalulungkot ako imbis na maghanap ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>180</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>6718</td>\n",
       "      <td>creepy ng aso pero mahal ko kayo HAHAHHHAHAHA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>329</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>1259</td>\n",
       "      <td>Pagod bebi ko ako inaantok na tagal ni aso HAH...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>5</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>460</td>\n",
       "      <td>So ayun diba may hamster kami si Luxus saka si...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>81</th>\n",
       "      <td>797</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>42407</td>\n",
       "      <td>@Anniefernando6 @aldenAllTheWay Baka kayo ang ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>293</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>3881</td>\n",
       "      <td>hindi ako to pramis. hahahahahahaha hindi ako ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>293</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>3881</td>\n",
       "      <td>HUY ANG WEIRD TALAGA KASI NAGSESAVE AKO NG PIC...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>84</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>15631</td>\n",
       "      <td>Fun fact about you — Sobrang love ko yung mga ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>293</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>3881</td>\n",
       "      <td>diko maintindihan bakit ako nagsesave ng pictu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>440</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>1375</td>\n",
       "      <td>Hayaan mo lang na husgahan ka nila.Hindi yung ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>476</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>7158</td>\n",
       "      <td>@pauiicosta lumabas na naman pagka aso mo haha...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>35</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>1902</td>\n",
       "      <td>nagttampo ako kay potchi, tangina aso lang yon...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89</th>\n",
       "      <td>183</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>1154</td>\n",
       "      <td>Ako: pare iiyak ka pag namatay aso mo? \\nRoque...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90</th>\n",
       "      <td>62</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>756</td>\n",
       "      <td>Aso ko e pero di na maghahabol🐶 https://t.co/Y...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>44</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>64</td>\n",
       "      <td>hi aso</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>92</th>\n",
       "      <td>167</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>2591</td>\n",
       "      <td>Nangapitbahay nako para sa aso. Happy pill! 😊 ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>536</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>6135</td>\n",
       "      <td>NP: Banal na aso, santong kabayo\\n\\nNatatawa a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94</th>\n",
       "      <td>3228</td>\n",
       "      <td>False</td>\n",
       "      <td>{'type': 'Point', 'coordinates': [14.61941886,...</td>\n",
       "      <td>[]</td>\n",
       "      <td>10223</td>\n",
       "      <td>Late post: Buti na lang talaga alert ako..kunc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>242</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>4463</td>\n",
       "      <td>tao,ahas at aso.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>473</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>5234</td>\n",
       "      <td>ALAM NIYO BANG MUNTIKAN NG GAWING PAGKAIN NG A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>220</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>10928</td>\n",
       "      <td>Dang kyot ng aso nila Sir huhu i want 😍</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>141</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>12781</td>\n",
       "      <td>Hindi sa dinidepensahan pero may umuulol na na...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>200</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>[]</td>\n",
       "      <td>8882</td>\n",
       "      <td>RT @Itsmeearlbravo: Di naman siguro ako pinang...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    followers  following                                                geo  \\\n",
       "0      154060      False                                               None   \n",
       "1          83      False                                               None   \n",
       "2         643      False                                               None   \n",
       "3        2729      False                                               None   \n",
       "4          25      False                                               None   \n",
       "5         116      False                                               None   \n",
       "6         113      False                                               None   \n",
       "7         130      False                                               None   \n",
       "8          54      False                                               None   \n",
       "9          24      False                                               None   \n",
       "10        841      False                                               None   \n",
       "11        260      False                                               None   \n",
       "12        260      False                                               None   \n",
       "13        176      False                                               None   \n",
       "14       1357      False                                               None   \n",
       "15        422      False                                               None   \n",
       "16        422      False                                               None   \n",
       "17        365      False                                               None   \n",
       "18         75      False                                               None   \n",
       "19       1021      False                                               None   \n",
       "20         47      False                                               None   \n",
       "21        147      False                                               None   \n",
       "22        269      False                                               None   \n",
       "23        548      False                                               None   \n",
       "24         88      False                                               None   \n",
       "25        394      False                                               None   \n",
       "26        105      False                                               None   \n",
       "27        174      False                                               None   \n",
       "28         90      False                                               None   \n",
       "29        850      False                                               None   \n",
       "..        ...        ...                                                ...   \n",
       "70         80      False                                               None   \n",
       "71        180      False                                               None   \n",
       "72       1280      False                                               None   \n",
       "73         74      False                                               None   \n",
       "74         72      False                                               None   \n",
       "75        365      False                                               None   \n",
       "76        301      False                                               None   \n",
       "77        561      False                                               None   \n",
       "78        180      False                                               None   \n",
       "79        329      False                                               None   \n",
       "80          5      False                                               None   \n",
       "81        797      False                                               None   \n",
       "82        293      False                                               None   \n",
       "83        293      False                                               None   \n",
       "84         84      False                                               None   \n",
       "85        293      False                                               None   \n",
       "86        440      False                                               None   \n",
       "87        476      False                                               None   \n",
       "88         35      False                                               None   \n",
       "89        183      False                                               None   \n",
       "90         62      False                                               None   \n",
       "91         44      False                                               None   \n",
       "92        167      False                                               None   \n",
       "93        536      False                                               None   \n",
       "94       3228      False  {'type': 'Point', 'coordinates': [14.61941886,...   \n",
       "95        242      False                                               None   \n",
       "96        473      False                                               None   \n",
       "97        220      False                                               None   \n",
       "98        141      False                                               None   \n",
       "99        200      False                                               None   \n",
       "\n",
       "                                            hashtags  statuses_count  \\\n",
       "0                                                 []           22455   \n",
       "1                                                 []            3030   \n",
       "2   [{'text': 'AdoptDontShop', 'indices': [38, 52]}]            8529   \n",
       "3                                                 []           28771   \n",
       "4                                                 []              14   \n",
       "5                                                 []            1993   \n",
       "6                                                 []            4495   \n",
       "7                                                 []            1745   \n",
       "8                                                 []             727   \n",
       "9                                                 []             296   \n",
       "10                                                []           29025   \n",
       "11                                                []            8486   \n",
       "12                                                []            8486   \n",
       "13                                                []            4682   \n",
       "14                                                []           15736   \n",
       "15                                                []           21045   \n",
       "16                                                []           21045   \n",
       "17                                                []            2083   \n",
       "18                                                []             243   \n",
       "19                                                []           11119   \n",
       "20                                                []             905   \n",
       "21                                                []            3406   \n",
       "22                                                []           21310   \n",
       "23                                                []            3553   \n",
       "24                                                []            1470   \n",
       "25                                                []           67411   \n",
       "26                                                []             417   \n",
       "27                                                []            4033   \n",
       "28                                                []             828   \n",
       "29                                                []           11211   \n",
       "..                                               ...             ...   \n",
       "70                                                []            1208   \n",
       "71                                                []            6216   \n",
       "72                                                []            5483   \n",
       "73                                                []            2055   \n",
       "74                                                []             456   \n",
       "75                                                []            2083   \n",
       "76                                                []            4788   \n",
       "77                                                []           13927   \n",
       "78                                                []            6718   \n",
       "79                                                []            1259   \n",
       "80                                                []             460   \n",
       "81                                                []           42407   \n",
       "82                                                []            3881   \n",
       "83                                                []            3881   \n",
       "84                                                []           15631   \n",
       "85                                                []            3881   \n",
       "86                                                []            1375   \n",
       "87                                                []            7158   \n",
       "88                                                []            1902   \n",
       "89                                                []            1154   \n",
       "90                                                []             756   \n",
       "91                                                []              64   \n",
       "92                                                []            2591   \n",
       "93                                                []            6135   \n",
       "94                                                []           10223   \n",
       "95                                                []            4463   \n",
       "96                                                []            5234   \n",
       "97                                                []           10928   \n",
       "98                                                []           12781   \n",
       "99                                                []            8882   \n",
       "\n",
       "                                                 text  \n",
       "0   @ryapee Hi Rya order ako ulit polyblender mejo...  \n",
       "1       Yung dating saling pusa naging aso bigla // 🎶  \n",
       "2   sana meron din dito sa Pilipinas yung #AdoptDo...  \n",
       "3          Aso nga kasi ako, bantay ako dito hahaha 😂  \n",
       "4   lakas mangahol ng kaklase ko dinaig pa aso nam...  \n",
       "5                     @LampanoElla Dun sa aso oo HAHA  \n",
       "6   Me: labas mo dila para lumabas dila ng aso \\nS...  \n",
       "7               cute kong aso https://t.co/twDprm7o5P  \n",
       "8   me: pabili pong dog food\\ntindero: alin? ung p...  \n",
       "9           @dsgalarpez hahahaha aso ka na ba ngayon?  \n",
       "10  @DenniceRoselle Uy kawawa mga aso. Di naman si...  \n",
       "11  Nagtanggal tuloy ako nang mga tae nang aso kai...  \n",
       "12                             Nakakapikon ung aso 😭😭  \n",
       "13                      May mga sakit aso namin hanep  \n",
       "14  i hate when strangers esp. men look at you str...  \n",
       "15  @dnnkthryn Ngek malas. Sa rosewood naman okay ...  \n",
       "16  @dnnkthryn Yup. Iba talaga pag bahay super fre...  \n",
       "17  Tang ina sobrang iba pala pakiramdam pag namat...  \n",
       "18  aso't pusa ❤\\ngoodmorning hubby babe ! https:/...  \n",
       "19  RT @akoposimarcelo: Yung buti pa yung mga aso,...  \n",
       "20              napaka clingy ng aso ko, nebeyen hehe  \n",
       "21                ang laki ng aso!!! panay naka tahol  \n",
       "22  Alam ata ng aso ko na birthday ko ngayon. Iba ...  \n",
       "23           Nagduduet nanaman yung dalawang aso hays  \n",
       "24    Yung aso naming maligalig na palundag lundag pa  \n",
       "25  Mukha talaga akong tanga kapag nakikipaglaro a...  \n",
       "26  RT @akoposimarcelo: Yung buti pa yung mga aso,...  \n",
       "27             nagdisitahulan mga aso ang creepy huhu  \n",
       "28                                    @glbysrcmny aso  \n",
       "29  Distemper virus. may ganyan plang sakit nang m...  \n",
       "..                                                ...  \n",
       "70           Plus two agad aso namen AAHAHAHAHAH saya  \n",
       "71  @jhnlstrpgnsn Hahaha hindi ko aso yun sa ate k...  \n",
       "72  RT @akoposimarcelo: Yung buti pa yung mga aso,...  \n",
       "73  Mama: Tanga Di Mo Pa Pinapakain Yung Aso.\\n\\nT...  \n",
       "74                    @cescamarii di yan kusa aso yan  \n",
       "75                                Nasagasaan aso ko 😢  \n",
       "76  Ngayon ko nalang na appreciate ulit yung ganda...  \n",
       "77  pag gantong nalulungkot ako imbis na maghanap ...  \n",
       "78      creepy ng aso pero mahal ko kayo HAHAHHHAHAHA  \n",
       "79  Pagod bebi ko ako inaantok na tagal ni aso HAH...  \n",
       "80  So ayun diba may hamster kami si Luxus saka si...  \n",
       "81  @Anniefernando6 @aldenAllTheWay Baka kayo ang ...  \n",
       "82  hindi ako to pramis. hahahahahahaha hindi ako ...  \n",
       "83  HUY ANG WEIRD TALAGA KASI NAGSESAVE AKO NG PIC...  \n",
       "84  Fun fact about you — Sobrang love ko yung mga ...  \n",
       "85  diko maintindihan bakit ako nagsesave ng pictu...  \n",
       "86  Hayaan mo lang na husgahan ka nila.Hindi yung ...  \n",
       "87  @pauiicosta lumabas na naman pagka aso mo haha...  \n",
       "88  nagttampo ako kay potchi, tangina aso lang yon...  \n",
       "89  Ako: pare iiyak ka pag namatay aso mo? \\nRoque...  \n",
       "90  Aso ko e pero di na maghahabol🐶 https://t.co/Y...  \n",
       "91                                             hi aso  \n",
       "92  Nangapitbahay nako para sa aso. Happy pill! 😊 ...  \n",
       "93  NP: Banal na aso, santong kabayo\\n\\nNatatawa a...  \n",
       "94  Late post: Buti na lang talaga alert ako..kunc...  \n",
       "95                                   tao,ahas at aso.  \n",
       "96  ALAM NIYO BANG MUNTIKAN NG GAWING PAGKAIN NG A...  \n",
       "97            Dang kyot ng aso nila Sir huhu i want 😍  \n",
       "98  Hindi sa dinidepensahan pero may umuulol na na...  \n",
       "99  RT @Itsmeearlbravo: Di naman siguro ako pinang...  \n",
       "\n",
       "[100 rows x 6 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tweets['Manila,Philippines']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Delft, Holland'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "\n",
    "# Setup Tweepy API Authentication\n",
    "auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
    "auth.set_access_token(access_token, access_token_secret)\n",
    "api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())\n",
    "tweetsa=api.search(q='trump', geocode='52.132633,5.29126,100km')\n",
    "tweetsa['statuses'][0]['user']['location']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "If using all scalar values, you must pass an index",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-114-f7373bd5071b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m+=\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m    328\u001b[0m                                  dtype=dtype, copy=copy)\n\u001b[1;32m    329\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 330\u001b[0;31m             \u001b[0mmgr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_init_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    331\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMaskedArray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    332\u001b[0m             \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmrecords\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mmrecords\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_init_dict\u001b[0;34m(self, data, index, columns, dtype)\u001b[0m\n\u001b[1;32m    459\u001b[0m             \u001b[0marrays\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    460\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 461\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_arrays_to_mgr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    462\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    463\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_init_ndarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_arrays_to_mgr\u001b[0;34m(arrays, arr_names, index, columns, dtype)\u001b[0m\n\u001b[1;32m   6161\u001b[0m     \u001b[0;31m# figure out the index, if necessary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6162\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6163\u001b[0;31m         \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mextract_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   6164\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6165\u001b[0m         \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_ensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mextract_index\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m   6200\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6201\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mindexes\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mraw_lengths\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6202\u001b[0;31m             raise ValueError('If using all scalar values, you must pass'\n\u001b[0m\u001b[1;32m   6203\u001b[0m                              ' an index')\n\u001b[1;32m   6204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: If using all scalar values, you must pass an index"
     ]
    }
   ],
   "source": [
    "df=pd.DataFrame({'a':1,'b':2,'c':3})\n",
    "df2=pd.DataFrame({'a':1,'b':2,'c':3})\n",
    "df+=df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fri Jun 29 20:43:37 +0000 2018\n",
      "Thu Jun 28 14:21:15 +0000 2018\n",
      "Wed Jun 27 23:51:55 +0000 2018\n",
      "Tue Jun 26 23:12:07 +0000 2018\n",
      "Mon Jun 25 19:23:04 +0000 2018\n",
      "Sun Jun 24 23:58:25 +0000 2018\n",
      "Sat Jun 23 23:06:10 +0000 2018\n",
      "Fri Jun 22 17:51:26 +0000 2018\n",
      "Thu Jun 21 22:46:17 +0000 2018\n"
     ]
    },
    {
     "ename": "IndexError",
     "evalue": "list index out of range",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-82-e5c117ad83b9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mday\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoday\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mtimedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m70\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mtweets\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mapi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'cat'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mgeocode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'34.0934,56.134,200mi'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muntil\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mday\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'1012848185839058950'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtweets\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'statuses'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'created_at'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mIndexError\u001b[0m: list index out of range"
     ]
    }
   ],
   "source": [
    "from datetime import date, timedelta\n",
    "date.today() - timedelta(0)\n",
    "for day in [str(date.today() - timedelta(i)).split()[0] for i in range(70)]:\n",
    "    tweets=api.search('cat',geocode = '34.0934,56.134,200mi', until=day, max_id='1012848185839058950')\n",
    "    print(tweets['statuses'][0]['created_at'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 2\n",
      "2 3\n"
     ]
    }
   ],
   "source": [
    "for a,b in zip([1,2],[2,3]):\n",
    "    print(a,b)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state</th>\n",
       "      <th>city</th>\n",
       "      <th>density</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>57116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>51810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>49362</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>39066</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New York</td>\n",
       "      <td>27788</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New York</td>\n",
       "      <td>27016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>24577</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>24060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>23216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>22437</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New York</td>\n",
       "      <td>21635</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Miami</td>\n",
       "      <td>Florida</td>\n",
       "      <td>21484</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>21254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Miami</td>\n",
       "      <td>Florida</td>\n",
       "      <td>20518</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>19179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Boston</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>18431</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>18297</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>18218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Louisville</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>18100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>18801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>San Francisco</td>\n",
       "      <td>California</td>\n",
       "      <td>18679</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>Miami</td>\n",
       "      <td>Florida</td>\n",
       "      <td>17023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>16896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>16557</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>16377</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Boston</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>16354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Providence</td>\n",
       "      <td>Rhode Island</td>\n",
       "      <td>16146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>16093</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Boston</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>16036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>16036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>104</th>\n",
       "      <td>Detroit</td>\n",
       "      <td>Michigan</td>\n",
       "      <td>10900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105</th>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>10897</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>10882</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>10855</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>108</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New York</td>\n",
       "      <td>10847</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>10841</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>110</th>\n",
       "      <td>San Francisco</td>\n",
       "      <td>California</td>\n",
       "      <td>10752</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>10744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>112</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>10667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>10590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>114</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>10582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>10572</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>10556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117</th>\n",
       "      <td>Miami</td>\n",
       "      <td>Florida</td>\n",
       "      <td>10474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>10398</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119</th>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>10397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>San Francisco</td>\n",
       "      <td>California</td>\n",
       "      <td>10368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>10358</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>Boston</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>10351</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New York</td>\n",
       "      <td>10337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>10256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125</th>\n",
       "      <td>Cleveland</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>10208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New York</td>\n",
       "      <td>10188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New York</td>\n",
       "      <td>10187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>10178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>New York City</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>10144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>130</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>10126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>131</th>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>10107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>132</th>\n",
       "      <td>Chicago</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>10094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>10065</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>133 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             state           city  density\n",
       "1    New York City     New Jersey    57116\n",
       "2    New York City     New Jersey    51810\n",
       "3    New York City     New Jersey    49362\n",
       "4    New York City     New Jersey    39066\n",
       "5    New York City       New York    27788\n",
       "6    New York City       New York    27016\n",
       "7    New York City     New Jersey    24577\n",
       "8    New York City     New Jersey    24060\n",
       "9      Los Angeles     California    23216\n",
       "10   New York City     New Jersey    22437\n",
       "11   New York City       New York    21635\n",
       "12           Miami        Florida    21484\n",
       "13     Los Angeles     California    21254\n",
       "14           Miami        Florida    20518\n",
       "15     Los Angeles     California    19179\n",
       "16          Boston  Massachusetts    18431\n",
       "17     Los Angeles     California    18297\n",
       "18   New York City     New Jersey    18218\n",
       "19      Louisville       Kentucky    18100\n",
       "20   New York City     New Jersey    18801\n",
       "21   San Francisco     California    18679\n",
       "22           Miami        Florida    17023\n",
       "23     Los Angeles     California    16896\n",
       "24    Philadelphia   Pennsylvania    16557\n",
       "25   New York City     New Jersey    16377\n",
       "26          Boston  Massachusetts    16354\n",
       "27      Providence   Rhode Island    16146\n",
       "28   New York City     New Jersey    16093\n",
       "29          Boston  Massachusetts    16036\n",
       "30     Los Angeles     California    16036\n",
       "..             ...            ...      ...\n",
       "104        Detroit       Michigan    10900\n",
       "105   Philadelphia   Pennsylvania    10897\n",
       "106   Philadelphia   Pennsylvania    10882\n",
       "107  New York City     New Jersey    10855\n",
       "108  New York City       New York    10847\n",
       "109  New York City     New Jersey    10841\n",
       "110  San Francisco     California    10752\n",
       "111  New York City     New Jersey    10744\n",
       "112    Los Angeles     California    10667\n",
       "113  New York City     New Jersey    10590\n",
       "114  New York City     New Jersey    10582\n",
       "115    Los Angeles     California    10572\n",
       "116  New York City     New Jersey    10556\n",
       "117          Miami        Florida    10474\n",
       "118    Los Angeles     California    10398\n",
       "119   Philadelphia   Pennsylvania    10397\n",
       "120  San Francisco     California    10368\n",
       "121  New York City     New Jersey    10358\n",
       "122         Boston  Massachusetts    10351\n",
       "123  New York City       New York    10337\n",
       "124   Philadelphia   Pennsylvania    10256\n",
       "125      Cleveland           Ohio    10208\n",
       "126  New York City       New York    10188\n",
       "127  New York City       New York    10187\n",
       "128    Los Angeles     California    10178\n",
       "129  New York City     New Jersey    10144\n",
       "130    Los Angeles     California    10126\n",
       "131   Philadelphia   Pennsylvania    10107\n",
       "132        Chicago       Illinois    10094\n",
       "133    Los Angeles     California    10065\n",
       "\n",
       "[133 rows x 3 columns]"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#-- Get the most populated cities from wikipedia (Thank you wikipedia library!) --\n",
    "html = wp.page(\"List_of_United_States_cities_by_population_density\").html().encode(\"UTF-8\")\n",
    "df = pd.read_html(html)[1]\n",
    "df=df.drop([0,1,4,5,6,8],axis=1)\n",
    "df=df.rename(columns={2:'state',3:'city',7:'density'})\n",
    "df=df.iloc[1:]\n",
    "#df['population']=[int(city.split('\\xa0')[-1].split('[')[0].replace(',','')) for city in df['population']]\n",
    "df['density']=[int(city.split('\\xa0')[-1].split('[')[0].replace(',','').split('.')[0]) for city in df['density']]\n",
    "df\n",
    "    #--- ---- ----- ----- ---- ---- ----- ---- ---- ----- --- ---- ---- ----- ---- ---"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}