-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrade
76 lines (61 loc) · 2.98 KB
/
trade
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import re
# Create instance of a driver
driver = webdriver.Chrome()
# Navitgate to web page of interest
driver.get('https://www.drinktrade.com/coffee/all-coffee/')
# Perform actions on HTML elements, entering text and submitting the form
# Find number of products on page
number = driver.find_element_by_xpath('//span[@class="filter-qty text-body-2-desktop"]').get_attribute('innerText').split(' ')[0]
# Find url to coffee detail
detail_xpath = ['//div[@data-reactid="318"]/div[{}]/div/a'.format(x) for x in range(1, int(number)+ 1)]
# List comprehension to construct all the detail urls
urls = [driver.find_element_by_xpath(path).get_attribute('href') for path in detail_xpath]
# Create csv file to save scraped data
csv_file = open('trade.csv', 'w', encoding='utf-8')
writer = csv.writer(csv_file)
# Navigate to product webpage
i = 0
for url in urls:
driver.get(url)
# Initialize an empty dictionary for each coffee
coffee = {}
# Use relative xpath to locate the title, text, username, date.
name = driver.find_element_by_xpath('//h1[@class="product-name text-display-2-mobile text-display-2-desktop"]').text
roaster = driver.find_element_by_xpath('//h2[@class="roaster-name text-title-mobile text-title-desktop"]').text
descriptor = driver.find_element_by_xpath('//div[@class="description-container text-body-2-mobile text-body-2-desktop"]').text
price = driver.find_element_by_xpath('//span[@class="product-price"]').text
weight = driver.find_element_by_xpath('//span[@class="product-size"]').text
location = driver.find_element_by_xpath('//div[@class="location"]/div[2]').text
roasternotes = driver.find_element_by_xpath('//div[@class="roaster-notes-body"]').text
blend_single = driver.find_element_by_xpath('//a[@data-reactid="121"]').text
roastlevel = driver.find_element_by_xpath('//div[@data-reactid="209"]').text
classification = driver.find_element_by_xpath('//div[@data-reactid="216"]').text
try:
flavornotes = driver.find_element_by_xpath('//ul[@class="taste-features"]/li').text
except:
flavornotes = 'none'
add_info = driver.find_elements_by_xpath('//div[@class="coffee-info-list"]/ul/li/div')
extra = [info.text for info in add_info]
coffee['name'] = name
coffee['roaster'] = roaster
coffee['descriptor'] = descriptor
coffee['price'] = price
coffee['weight'] = weight
coffee['location'] = location
coffee['roasternotes'] = roasternotes
coffee['blend_single'] = blend_single
coffee['roastlevel'] = roastlevel
coffee['classification'] = classification
coffee['flavornotes'] = flavornotes
coffee['extra'] = extra
writer.writerow(coffee.values())
i += 1
print('Coffee ' + str(i) + ' of ' + str(number))
print('-' * 50)
csv_file.close()
driver.close()