Skip to content

Commit

Permalink
Merge pull request rhiever#21 from nyoung85/master
Browse files Browse the repository at this point in the history
To resolve rhiever#20
  • Loading branch information
Randy Olson committed Mar 10, 2016
2 parents 897727a + 2e8bfdd commit ecd7b87
Show file tree
Hide file tree
Showing 2 changed files with 284 additions and 17 deletions.
240 changes: 240 additions & 0 deletions us-weather-history/KSAF.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
date,actual_mean_temp,actual_min_temp,actual_max_temp,average_min_temp,average_max_temp,record_min_temp,record_max_temp,record_min_temp_year,record_max_temp_year,actual_precipitation,average_precipitation,record_precipitation
2014-7-1,73,57,86,61,95,51,97,2004,1999,0.02
2014-7-2,70,57,82,62,91,51,96,2004,2012,0.17
2014-7-3,70,57,84,51,88,51,98,1980,2003,0.03
2014-7-4,68,52,84,52,92,51,99,2005,2003,0.00
2014-7-5,74,61,87,57,94,48,94,2004,1980,0.00
2014-7-6,74,57,91,59,98,53,98,2004,1980,0.00
2014-7-7,74,57,91,56,94,55,96,1997,2003,0.35
2014-7-8,72,57,86,55,92,51,96,2005,2005,0.26
2014-7-9,70,59,84,57,93,52,98,2015,2003,0.08
2014-7-10,74,62,87,58,94,52,94,2008,1980,0.00
2014-7-11,74,62,90,55,96,48,96,1999,1980,0.01
2014-7-12,71,55,89,58,96,53,96,2002,1980,0.00
2014-7-13,70,57,82,53,91,53,99,1980,2003,0.14
2014-7-14,73,60,87,54,88,54,98,1980,2003,0.05
2014-7-15,66,57,75,54,90,51,97,2002,2010,0.22
2014-7-16,68,57,81,60,95,51,96,2005,2007,0.21
2014-7-17,70,57,84,61,96,51,98,2013,2006,0.00
2014-7-18,74,60,89,56,97,52,97,1998,1980,0.00
2014-7-19,76,63,90,57,98,53,100,2000,2010,0.00
2014-7-20,74,57,95,57,94,54,99,1999,2010,0.13
2014-7-21,78,64,93,58,91,52,95,1999,2005,0.01
2014-7-22,78,62,93,54,91,51,95,2000,1996,0.06
2014-7-23,72,60,84,53,89,53,93,1980,2003,0.43
2014-7-24,74,61,89,52,88,52,96,1980,2003,0.00
2014-7-25,77,61,93,59,90,55,96,2004,2003,0.00
2014-7-26,80,64,96,59,96,51,96,2004,1980,0.00
2014-7-27,74,60,89,60,95,53,95,2004,1980,1.14
2014-7-28,70,59,81,61,96,52,96,2004,1980,0.01
2014-7-29,68,60,75,54,92,52,93,2004,2001,0.12
2014-7-30,72,60,84,57,92,50,93,2004,2008,0.00
2014-7-31,66,57,78,62,93,54,95,2003,2012,0.02
2014-8-1,64,55,73,52,95,52,95,1980,1980,0.27
2014-8-2,66,59,73,65,96,54,96,1998,1980,0.02
2014-8-3,66,55,78,57,91,54,95,1998,2008,0.00
2014-8-4,68,60,78,57,89,55,93,1998,2003,1.26
2014-8-5,68,55,82,61,91,53,95,1998,2003,0.00
2014-8-6,70,55,86,63,92,51,95,1998,2015,0.00
2014-8-7,68,57,80,53,92,53,95,1980,2011,0.00
2014-8-8,67,50,84,59,84,50,93,2014,2003,0.00
2014-8-9,70,55,84,58,93,52,93,1999,1980,0.00
2014-8-10,68,53,84,55,84,51,93,2000,2011,0.07
2014-8-11,66,52,80,58,85,51,96,2004,2012,0.00
2014-8-12,69,57,82,53,84,53,96,1980,2002,0.00
2014-8-13,71,63,80,56,76,51,95,1999,1996,0.01
2014-8-14,70,57,82,59,86,51,96,1998,1996,0.09
2014-8-16,72,55,89,51,82,51,93,1980,2002,0.00
2014-8-17,71,55,87,48,83,48,93,1980,2002,0.00
2014-8-18,72,57,87,53,88,46,93,2008,2015,0.00
2014-8-19,70,57,87,57,90,53,95,1997,2007,0.00
2014-8-20,72,61,84,50,82,50,95,1980,2007,0.00
2014-8-21,70,55,86,45,84,45,96,1980,2007,0.00
2014-8-22,62,55,68,47,90,47,95,1980,2007,0.10
2014-8-23,63,48,79,52,89,48,93,2014,2010,0.00
2014-8-24,66,50,84,54,92,48,92,2002,1980,0.00
2014-8-25,70,55,87,52,87,45,93,2004,2007,0.00
2014-8-26,66,55,79,56,84,46,93,2001,2002,0.22
2014-8-27,64,55,77,48,83,48,93,1980,2011,0.16
2014-8-28,62,48,78,47,84,46,91,2006,1997,0.00
2014-8-29,65,50,80,47,86,46,91,2005,1997,0.00
2014-8-30,68,53,86,48,85,48,95,1980,2011,0.00
2014-8-31,72,55,89,53,88,48,95,2002,2011,0.00
2014-9-1,70,51,89,46,85,45,91,1980,2013,0.00
2014-9-2,73,57,89,46,86,42,93,1980,2012,0.00
2014-9-3,72,55,90,47,88,44,90,1980,2011,0.00
2014-9-5,68,57,80,48,89,44,91,1980,2010,0.00
2014-9-6,65,55,77,52,84,48,90,1978,2010,0.00
2014-9-7,66,51,82,52,81,46,89,2003,2000,0.00
2014-9-8,70,57,82,48,81,39,87,2001,2007,0.00
2014-9-10,68,57,80,53,77,41,89,2001,2015,0.00
2014-9-12,56,46,66,45,78,43,88,1978,2004,0.00
2014-9-14,63,46,82,46,82,39,91,2005,1997,0.00
2014-9-15,68,53,84,48,81,37,89,2005,2010,0.00
2014-9-16,66,57,77,48,83,42,90,2002,2000,0.00
2014-9-17,60,55,66,45,84,37,89,2006,2010,0.00
2014-9-18,66,50,84,42,78,34,91,2006,2010,0.00
2014-9-19,69,57,82,40,83,30,89,2006,2010,0.00
2014-9-20,71,54,89,42,78,39,89,1978,2014,0.00
2014-9-21,68,55,82,47,76,39,88,2006,2010,0.00
2014-9-22,68,60,75,42,76,37,84,1980,2012,0.04
2014-9-23,70,59,81,42,75,36,84,2006,1998,0.00
2014-9-24,66,50,82,44,78,36,86,2006,1998,0.00
2014-9-25,66,52,81,43,71,28,86,2000,1998,0.00
2014-9-26,63,48,79,49,69,33,87,2000,1999,0.00
2014-9-27,64,53,77,48,72,32,88,1996,2010,0.00
2014-9-29,58,46,73,39,79,30,90,1999,2010,0.02
2014-9-30,56,42,72,40,83,37,84,1999,1978,0.00
2014-10-1,56,42,73,39,84,37,89,1980,2010,0.00
2014-10-2,52,39,68,42,84,37,87,2001,1980,0.00
2014-10-3,57,41,73,37,79,34,84,1980,2012,0.00
2014-10-4,58,37,78,34,77,33,80,1980,2007,0.00
2014-10-5,58,42,75,35,81,33,83,1980,1980,0.00
2014-10-7,60,46,77,33,79,33,81,1978,1980,0.00
2014-10-8,59,48,71,34,79,30,79,2012,1978,0.00
2014-10-10,51,48,54,35,77,32,81,2001,1999,0.26
2014-10-11,56,46,66,33,77,26,82,2001,2015,0.00
2014-10-12,52,39,66,33,77,28,81,1997,2007,0.01
2014-10-13,48,37,60,37,76,23,82,1997,2015,0.00
2014-10-14,50,32,68,29,66,27,81,1978,2015,0.00
2014-10-15,54,37,73,33,73,32,80,1997,2015,0.00
2014-10-16,58,42,75,33,69,30,80,1997,2011,0.00
2014-10-17,57,43,73,28,68,24,78,1980,1978,0.00
2014-10-18,56,48,64,28,66,21,82,1980,2003,0.01
2014-10-19,54,39,70,27,65,17,80,1980,2003,0.00
2014-10-20,56,42,73,25,72,21,81,1980,2003,0.08
2014-10-21,54,46,63,28,69,21,81,1996,2003,0.00
2014-10-22,58,46,70,30,59,5,80,1996,2003,0.00
2014-10-23,56,41,72,30,64,21,79,1996,2003,0.00
2014-10-25,60,42,79,29,57,19,79,1980,2014,0.00
2014-10-26,56,41,73,28,61,21,77,1997,2007,0.00
2014-10-27,57,45,69,32,55,21,70,2012,2001,0.00
2014-10-28,51,39,64,28,54,26,72,2012,2008,0.00
2014-10-29,52,35,69,25,54,22,73,1980,2001,0.00
2014-10-30,54,37,71,24,65,21,73,1980,2010,0.00
2014-10-31,52,39,64,32,65,26,104,1980,1996,0.00
2014-11-1,52,39,66,34,63,23,73,1977,2008,0.00
2014-11-3,42,32,53,34,64,18,69,2011,2010,0.00
2014-11-5,45,28,62,29,68,24,72,2003,1977,0.00
2014-11-6,46,30,61,35,67,19,72,2008,1977,0.00
2014-11-7,47,28,66,34,68,19,71,2000,1977,0.00
2014-11-8,44,28,60,31,52,15,71,2000,2007,0.00
2014-11-9,48,34,61,20,58,9,69,1977,2006,0.00
2014-11-10,52,37,66,21,52,10,70,1977,2005,0.00
2014-11-11,43,27,59,29,54,16,70,2012,1999,0.00
2014-11-12,23,15,30,28,52,15,70,2014,1999,0.00
2014-11-14,40,28,53,27,58,15,71,1997,1999,0.00
2014-11-15,44,34,55,27,54,9,66,1997,1999,0.00
2014-11-16,32,19,46,25,48,12,66,1997,1999,0.03
2014-11-17,24,12,37,24,53,12,68,2014,2008,0.00
2014-11-19,34,19,48,23,58,17,69,2000,2007,0.00
2014-11-20,38,23,53,26,57,20,66,1978,1996,0.00
2014-11-21,38,26,51,26,51,19,62,1998,1996,0.00
2014-11-22,40,26,55,31,49,17,66,1999,2006,0.00
2014-11-23,36,24,44,24,54,12,63,1999,1998,0.00
2014-11-24,26,12,39,29,54,12,62,2014,1998,0.00
2014-11-25,26,10,43,29,51,10,60,2014,1998,0.00
2014-11-26,42,30,53,24,59,10,63,2000,1998,0.00
2014-11-27,44,28,59,25,50,10,64,2001,1998,0.00
2014-11-28,42,26,57,21,46,10,62,2001,1999,0.00
2014-11-29,44,28,60,20,48,6,60,2004,2014,0.00
2014-11-30,40,21,60,24,47,1,63,2004,2003,0.00
2014-12-3,40,28,53,24,48,14,59,2004,2012,0.00
2014-12-4,41,36,46,20,54,12,62,2006,1977,0.61
2014-12-5,42,33,52,20,54,0,57,2011,2012,0.00
2014-12-6,42,32,51,14,52,-2,57,2011,2012,0.03
2014-12-8,42,30,55,18,55,-2,61,2005,1977,0.00
2014-12-9,42,32,52,16,58,6,59,2012,1977,0.00
2014-12-10,42,28,55,18,57,-2,60,2012,1977,0.00
2014-12-11,44,30,57,20,55,3,57,1997,1993,0.00
2014-12-12,44,32,55,18,48,3,57,1997,1977,0.00
2014-12-13,42,30,55,18,47,10,57,2001,2010,0.00
2014-12-14,34,25,43,18,49,9,59,1999,2010,0.02
2014-12-15,32,23,42,16,51,9,59,1999,2010,0.00
2014-12-16,32,25,39,10,35,10,54,1993,1998,0.00
2014-12-17,30,26,34,13,40,7,53,2005,1998,0.17
2014-12-18,29,19,39,14,39,1,53,1996,1998,0.00
2014-12-19,30,21,39,10,45,10,51,1993,2003,0.00
2014-12-20,30,21,39,16,41,10,54,1999,1998,0.00
2014-12-21,38,32,45,11,35,9,54,2012,2005,0.00
2014-12-22,41,28,52,17,34,3,55,1999,2005,0.00
2014-12-23,26,18,35,7,30,5,60,2004,2005,0.00
2014-12-25,35,25,45,9,43,3,55,1997,2005,0.00
2014-12-26,20,10,30,13,44,0,55,1997,2005,0.00
2014-12-27,16,3,30,18,41,1,51,1997,1999,0.00
2014-12-28,23,12,34,22,50,6,54,2007,2005,0.00
2014-12-29,23,9,37,15,48,5,57,2012,2005,0.00
2014-12-30,16,10,21,11,48,10,57,2014,1998,0.00
2014-12-31,16,8,25,17,49,0,57,2010,1996,0.00
2015-1-10,36,24,46,26,53,10,57,2000,2005,0.00
2015-1-11,36,24,48,32,50,8,54,2011,2000,0.00
2015-1-12,32,25,39,29,43,3,57,2013,2000,0.00
2015-1-13,30,26,33,22,48,-9,55,1999,2000,0.06
2015-1-14,29,24,34,19,47,3,55,2013,2003,0.05
2015-1-15,34,21,46,22,47,1,57,2013,2000,0.00
2015-1-16,34,23,46,32,36,3,61,2007,2000,0.00
2015-1-17,39,23,53,28,44,1,64,2008,2000,0.00
2015-1-18,34,19,50,28,40,6,57,2001,1999,0.00
2015-1-20,40,30,51,21,29,-4,61,2012,2005,0.00
2015-1-21,32,27,36,21,31,10,57,2006,2000,0.01
2015-1-22,21,12,30,22,38,3,55,2007,2003,0.00
2015-1-24,31,18,44,10,48,8,57,2014,2013,0.00
2015-1-25,38,23,52,7,30,7,55,1978,1999,0.00
2015-1-26,43,26,60,11,38,11,60,1978,2015,0.00
2015-1-28,46,36,57,23,47,10,62,2014,2003,0.00
2015-1-29,42,37,48,23,51,9,57,2000,2011,0.00
2015-1-30,32,28,37,23,53,10,59,2013,2016,0.11
2015-4-19,49,36,63,33,56,22,75,1973,1954,0.00
2015-5-1,62,46,81,37,66,28,81,2004,2015,0.00
2015-5-2,64,48,81,29,67,1,81,2001,2006,0.00
2015-5-3,59,43,75,36,69,21,84,2011,2000,0.00
2015-5-4,60,48,73,29,72,29,87,1980,2000,0.04
2015-5-6,56,46,70,36,75,25,84,1999,2000,0.00
2015-5-7,56,41,72,41,76,30,84,2007,2000,0.00
2015-5-9,48,37,55,32,67,27,84,1964,2004,0.00
2015-5-10,47,30,64,40,73,30,88,2003,2000,0.00
2015-5-11,54,37,71,38,76,28,82,2003,2000,0.00
2015-5-12,54,46,63,36,73,32,84,2005,2006,0.00
2015-5-13,56,45,69,34,74,29,84,1980,2003,0.02
2015-5-14,57,45,69,42,77,27,85,2014,1978,0.00
2015-5-15,50,42,57,41,79,28,89,2014,1978,0.00
2015-5-17,52,37,66,38,71,37,87,1978,2003,0.00
2015-5-18,57,42,73,36,67,35,84,1978,1998,0.00
2015-5-19,56,46,66,38,74,36,89,1978,2006,0.00
2015-5-20,56,42,72,40,81,30,90,2011,2005,0.00
2015-5-21,50,45,57,45,69,35,90,2003,2005,0.00
2015-5-22,55,41,70,44,76,32,91,2001,2005,0.00
2015-5-23,55,44,68,43,82,36,95,2008,2000,0.00
2015-5-24,50,39,64,39,80,37,93,1998,2000,0.00
2015-5-25,53,42,66,37,79,33,90,1980,2006,0.00
2015-5-26,55,37,75,38,80,37,90,2015,2001,0.00
2015-5-27,62,48,77,42,81,39,88,1998,2001,0.00
2015-5-28,64,48,80,35,79,33,91,1980,2000,0.00
2015-5-29,62,46,80,40,80,37,95,1980,2000,0.00
2015-5-31,66,52,81,39,85,34,95,1980,2002,0.00
2015-6-1,70,55,87,47,87,41,93,1980,2002,0.00
2015-6-2,68,50,86,44,84,35,91,1980,1998,0.00
2015-6-3,67,48,86,39,79,33,91,1980,2006,0.00
2015-6-5,70,61,80,44,86,36,96,1980,2006,0.00
2015-6-6,72,57,84,42,83,39,93,1999,2006,0.00
2015-6-7,68,53,84,42,82,37,91,2007,2004,0.00
2015-6-8,70,53,88,48,87,39,93,2007,2013,0.00
2015-6-9,71,60,84,55,85,39,91,1998,2001,0.00
2015-6-10,70,60,82,49,86,41,98,1999,2013,0.00
2015-6-12,65,48,82,50,90,43,97,1998,2013,0.00
2015-6-14,66,51,81,45,91,39,95,2001,2006,0.00
2015-6-15,66,51,82,45,92,39,97,1980,2008,0.00
2015-6-16,68,55,82,49,90,42,93,1997,2002,0.00
2015-6-17,68,51,86,55,91,46,96,2005,2008,0.00
2015-6-18,72,55,90,50,90,39,95,1998,2002,0.00
2015-6-20,74,55,93,44,89,42,97,1980,2012,0.00
2015-6-21,80,63,97,52,91,43,97,2003,2015,0.00
2015-6-22,78,60,99,46,90,45,99,2003,2015,0.00
2015-6-23,75,60,91,47,92,43,96,1980,2011,0.00
2015-6-24,75,61,89,53,95,47,99,1980,2012,0.00
2015-6-25,76,62,91,54,94,48,96,2003,2007,0.00
2015-6-26,73,55,91,49,95,48,97,1978,1980,0.00
2015-6-27,74,61,84,56,94,48,102,2006,2013,0.00
2015-6-28,72,55,91,54,91,46,98,2010,1998,0.00
2015-6-29,72,57,90,58,92,50,99,2005,1998,0.00
2015-6-30,72,55,91,53,84,50,99,1978,2012,0.00
61 changes: 44 additions & 17 deletions us-weather-history/wunderground_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,45 +33,72 @@ def parse_station(station):
current_date.day)) as in_file:
soup = BeautifulSoup(in_file.read(), 'html.parser')

weather_data = soup.find(id='historyTable').find_all('span', class_='wx-value')
weather_data_rows = soup.find(id='historyTable').find_all('tr')
weather_data = []
for i in range(len(weather_data_rows)):
soup1 = weather_data_rows[i]
weather_data.append(soup1.find_all('span', class_='wx-value'))
weather_data = [x for x in weather_data if x != []]

if len(weather_data[4]) < 2:
weather_data[4].append(None)
weather_data[4].append(None)


weather_data_units = soup.find(id='historyTable').find_all('td')


try:
actual_mean_temp = weather_data[0].text
actual_max_temp = weather_data[2].text
average_max_temp = weather_data[3].text
record_max_temp = weather_data[4].text
actual_min_temp = weather_data[5].text
average_min_temp = weather_data[6].text
record_min_temp = weather_data[7].text
actual_mean_temp = weather_data[0][0].text
actual_max_temp = weather_data[1][0].text
average_max_temp = weather_data[1][1].text
if weather_data[1][2]:
record_max_temp = weather_data[1][2].text
actual_min_temp = weather_data[2][0].text
average_min_temp = weather_data[2][1].text
record_min_temp = weather_data[2][2].text
record_max_temp_year = weather_data_units[
9].text.split('(')[-1].strip(')')
record_min_temp_year = weather_data_units[
13].text.split('(')[-1].strip(')')

actual_precipitation = weather_data[9].text
actual_precipitation = weather_data[4][0].text
if actual_precipitation == 'T':
actual_precipitation = '0.0'
average_precipitation = weather_data[10].text
record_precipitation = weather_data[11].text
actual_precipitation = '0.0'

#Test whether station collects average or record precipitation data
if weather_data[4][1]:
average_precipitation = weather_data[4][1].text
else:
average_precipitation = None
if weather_data[4][2]:
record_precipitation = weather_data[4][2].text
else:
record_precipitation = None

# Verify that the parsed data is valid
if (record_max_temp_year == '-1' or record_min_temp_year == '-1' or
int(record_max_temp) < max(int(actual_max_temp), int(average_max_temp)) or
int(record_min_temp) > min(int(actual_min_temp), int(average_min_temp)) or
float(actual_precipitation) > float(record_precipitation) or
float(average_precipitation) > float(record_precipitation)):
((record_precipitation is not None or average_precipitation is not None) and
(float(actual_precipitation) > float(record_precipitation) or
float(average_precipitation) > float(record_precipitation)))):
raise Exception

out_file.write('{}-{}-{},'.format(current_date.year, current_date.month, current_date.day))
out_file.write(','.join([actual_mean_temp, actual_min_temp, actual_max_temp,
average_min_temp, average_max_temp,
record_min_temp, record_max_temp,
record_min_temp_year, record_max_temp_year,
actual_precipitation, average_precipitation,
record_precipitation]))
actual_precipitation,]))
if average_precipitation:
out_file.write(',{}'.format(average_precipitation))
if record_precipitation:
out_file.write(',{}'.format(record_precipitation))

out_file.write('\n')
current_date += timedelta(days=1)

except:
# If the web page is formatted improperly, signal that the page may need
# to be downloaded again.
Expand Down Expand Up @@ -105,5 +132,5 @@ def parse_station(station):

# Parse the stations used in this article
for station in ['KCLT', 'KCQT', 'KHOU', 'KIND', 'KJAX',
'KMDW', 'KNYC', 'KPHL', 'KPHX', 'KSEA']:
'KMDW', 'KNYC', 'KPHL', 'KPHX', 'KSEA', 'KSAF']:
parse_station(station)

0 comments on commit ecd7b87

Please sign in to comment.