-
Notifications
You must be signed in to change notification settings - Fork 8
/
parse-postcodes.rb
64 lines (49 loc) · 1.54 KB
/
parse-postcodes.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env ruby
$:.unshift "#{File.dirname(__FILE__)}/lib"
require 'mechanize_proxy'
require 'configuration'
require 'people'
conf = Configuration.new
agent = MechanizeProxy.new
agent.cache_subdirectory = "parse-postcodes"
puts "Reading Australia post office data..."
data = CSV.readlines("data/pc-full_20080529.csv")
# Ignore header
data.shift
valid_postcodes = data.map {|row| row.first}.uniq.sort
def extract_divisions_from_page(page)
postcodes = []
page.search('table').first.search('> tr').each do |row_tag|
td_tag = row_tag.search('> td')[3]
if td_tag
postcode = td_tag.search('a').inner_text
if postcode.nil?
puts "Nil postcode in division #{division}"
end
postcodes << postcode
end
end
postcodes
end
def other_pages?(page)
table_tag = page.search('table')[1]
!table_tag.search('> tr > td > a').map {|e| e.inner_text}.empty?
end
file = File.open("data/postcodes.csv", "w")
file.puts("Postcode,Electoral division name")
file.puts(",")
valid_postcodes.each do |postcode|
page = agent.get("http://apps.aec.gov.au/esearch/LocalitySearchResults.aspx?filter=#{postcode}&filterby=Postcode")
divisions = extract_divisions_from_page(page)
if other_pages?(page)
puts "WARNING: Multiple pages of data for postcode #{postcode}"
file.puts("*** Double check data for postcode #{postcode} by hand ***")
end
if divisions.empty?
puts "No divisions for postcode #{postcode}"
else
divisions.uniq.sort.each do |division|
file.puts "#{postcode},#{division}"
end
end
end