From 17eccb01a199d1ca162048ec5dc4504668c2e327 Mon Sep 17 00:00:00 2001 From: zen-gineer Date: Sat, 30 Jun 2018 19:42:05 -0700 Subject: [PATCH] asdf --- DataSearch-Copy1.ipynb | 40 +++------------------------------------- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/DataSearch-Copy1.ipynb b/DataSearch-Copy1.ipynb index 83856da..2c10ad0 100644 --- a/DataSearch-Copy1.ipynb +++ b/DataSearch-Copy1.ipynb @@ -2,45 +2,11 @@ "cells": [ { "cell_type": "code", - "execution_count": 140, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Manila , Philippines : 14.5995124,120.9842195,100mi\n", - "[{'message': 'Rate limit exceeded', 'code': 88}] Trying again after 1 minute.\n", - "[{'message': 'Rate limit exceeded', 'code': 88}] Trying again after 1 minute.\n", - "[{'message': 'Rate limit exceeded', 'code': 88}] Trying again after 1 minute.\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mUnboundLocalError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36mGetTweetsByPopularCities\u001b[0;34m(search_term, numTweets, translateToLocalLanguage)\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 215\u001b[0;31m \u001b[0mtweetsWorld\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtweetsWorld\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSearchForData\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtranslatedSearch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumTweets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcityCountry\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 216\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'tweetsWorld' referenced before assignment", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mRateLimitError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36mSearchForData\u001b[0;34m(search_term, nTweets, cityCountry, radius)\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcityCountry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 131\u001b[0;31m \u001b[0mtweetsPerDay\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mapi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msearch_term\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnTweetsPerDay\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"recent\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moldest_tweet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgeocode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoords\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muntil\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mday\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 132\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/tweepy/binder.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 251\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/tweepy/binder.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 231\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_rate_limit_error_message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_msg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 232\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRateLimitError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_msg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 233\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mRateLimitError\u001b[0m: [{'message': 'Rate limit exceeded', 'code': 88}]", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 247\u001b[0;31m \u001b[0mtweetsUS\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtweetsWorld\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGetTweetsByPopularCities\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'trump'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m200\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 248\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtweetsUS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'location'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtweetsWorld\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'location'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mGetTweetsByPopularCities\u001b[0;34m(search_term, numTweets, translateToLocalLanguage)\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[0mtweetsWorld\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtweetsWorld\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSearchForData\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtranslatedSearch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumTweets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcityCountry\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 217\u001b[0;31m \u001b[0mtweetsWorld\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSearchForData\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtranslatedSearch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumTweets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcityCountry\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 218\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mSearchForData\u001b[0;34m(search_term, nTweets, cityCountry, radius)\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 135\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Trying again after 1 minute.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 136\u001b[0;31m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 137\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "import wikipedia as wp\n", @@ -167,7 +133,7 @@ " maxTweets = 10000; oldest_tweet = None; unique_ids = []; desiredTweets = [];nTweetsPerDay=nTweets/8\n", " for day,num in zip([str(date.today() - timedelta(i)).split()[0] for i in range(8)], range(1,9)):\n", " tweetsPerDay=[]\n", - " while len(tweetsPerDay) < min(nTweetsPerDay,maxTweets/8):\n", + " while len(desiredTweets) < min(nTweetsPerDay*num,maxTweets/8):\n", " #--- determine whether to grab tweets by geo or not --- ---- --- ----- --\n", " while True:\n", " try:\n",