forked from bruno/openaustralia-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse-member-links.rb
executable file
·97 lines (82 loc) · 2.92 KB
/
parse-member-links.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env ruby
$:.unshift "#{File.dirname(__FILE__)}/lib"
require 'mechanize_proxy'
require 'name'
require 'people'
require 'configuration'
conf = Configuration.new
agent = MechanizeProxy.new
agent.cache_subdirectory = "parse-member-links"
puts "Reading member data..."
people = PeopleCSVReader.read_members
puts "Personal home page & Contact Details (Gov website)..."
def extract_links(name, people, agent, link, x)
person = people.find_person_by_name_current_on_date(name, Date.today)
if person
sub_page = agent.click(link)
home_page_tag = sub_page.links.find{|l| l.text =~ /personal home page/i}
params = {:id => person.id, :mp_contactdetails => sub_page.uri}
params[:mp_website] = home_page_tag.uri if home_page_tag
x.personinfo(params)
else
puts "WARNING: Could not find person with name #{name.full_name}"
end
end
xml = File.open("#{conf.members_xml_path}/websites.xml", 'w')
x = Builder::XmlMarkup.new(:target => xml, :indent => 1)
x.instruct!
x.publicwhip do
if conf.write_xml_representatives
agent.get(conf.alternative_current_house_members_url).links.each do |link|
if link.to_s =~ /Member for/
name = Name.last_title_first(link.text.split(',')[0..1].join(','))
extract_links(name, people, agent, link, x)
end
end
end
if conf.write_xml_senators
agent.get(conf.alternative_current_senate_members_url).links.each do |link|
if link.to_s =~ /Senator/
name = Name.last_title_first(link.to_s.split('-')[0..-2].join('-'))
extract_links(name, people, agent, link, x)
end
end
end
end
xml.close
if conf.write_xml_representatives
puts "Q&A Links..."
# First get mapping between constituency name and web page
page = agent.get(conf.qanda_electorate_url)
map = {}
page.links[35..184].each do |link|
map[link.text.downcase] = (page.uri + link.uri).to_s
end
# Hack to deal with "Flynn" constituency incorrectly spelled as "Flyn"
map["flynn"] = "http://www.abc.net.au/tv/qanda/mp-profiles/flyn.htm"
bad_divisions = []
# Check that the links point to valid pages
map.each_pair do |division, url|
begin
agent.get(url)
rescue WWW::Mechanize::ResponseCodeError
bad_divisions << division
puts "ERROR: Invalid url #{url} for division #{division}"
end
end
# Clear out bad divisions
bad_divisions.each { |division| map.delete(division) }
xml = File.open("#{conf.members_xml_path}/links-abc-qanda.xml", 'w')
x = Builder::XmlMarkup.new(:target => xml, :indent => 1)
x.instruct!
x.publicwhip do
people.find_current_members(House.representatives).each do |member|
short_division = member.division.downcase[0..3]
link = map[member.division.downcase]
puts "ERROR: Couldn't lookup division #{member.division}" if link.nil?
x.personinfo(:id => member.person.id, :mp_biography_qanda => link)
end
end
xml.close
end
system(conf.web_root + "/twfy/scripts/mpinfoin.pl links")