I need to get the availability for each postcode and prices for a category of product - cafetera de goteo - and can’t find the way. Would someone guide me on this?
@kasalo I’m working on extracting product details from Alcampo using Selenium and BeautifulSoup4. I’ve optimized my code, and it’s working fine. Feel free to use it for testing.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
import csv
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
driver = webdriver.Chrome()
driver.get('https://www.compraonline.alcampo.es/categories/electrodom%C3%A9sticos/cafeteras/cafeteras-de-goteo/OC2312?source=navigation')
WebDriverWait(driver, 60).until(
EC.presence_of_element_located((By.ID, "onetrust-accept-btn-handler")))
cookies = driver.find_element(By.ID, "onetrust-accept-btn-handler")
cookies.click()
with open('products.csv', mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Product Title', 'Available'])
try:
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
product_initial = soup.find_all('script', attrs={'data-test':'initial-state-script'})
if product_initial:
data = json.loads(product_initial[0].get_text(strip=True).replace('window.__INITIAL_STATE__=',''))
data = data['data']['products']['productEntities']
print(f'Found {len(product_elements)} product elements.')
for key, product in data.items():
try:
title = product['name']
ava_element = str(product['available'])
writer.writerow([ title, ava_element])
except:
print("")
except:
print("error")
driver.quit()
@MrWhite can we get product details like name, price, availability for specific post_code?
Greeting @Fireberg , I have updated the above code and added part of postcode. I have tried with postcode “08001” you can try any other.
and try to use it will headless. Here is the code.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import csv
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
driver = webdriver.Chrome()
driver.get('https://www.compraonline.alcampo.es/')
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.ID, "onetrust-accept-btn-handler")))
cookies = driver.find_element(By.ID, "onetrust-accept-btn-handler")
cookies.click()
time.sleep(2)
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.CSS_SELECTOR, "span.box-close")))
time.sleep(5)
driver.find_element(By.CSS_SELECTOR, "span.box-close").click()
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-test=delivery-destination-header-button]")))
driver.find_element(By.CSS_SELECTOR, "div[data-test=delivery-destination-header-button]").click()
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.CSS_SELECTOR, "button[data-test=delivery-method-change-address-button]")))
driver.find_element(By.CSS_SELECTOR, "button[data-test=delivery-method-change-address-button]").click()
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.ID, "search-postcode-input")))
time.sleep(1)
driver.find_element(By.ID, "search-postcode-input").click()
time.sleep(1)
driver.find_element(By.ID, "search-postcode-input").send_keys('08001'+ Keys.RETURN)
time.sleep(2)
driver.find_element(By.CSS_SELECTOR, "button[data-test=address-search-button]").click()
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.CSS_SELECTOR, "button[data-test=select-temporary-home-address]")))
driver.find_element(By.CSS_SELECTOR, "button[data-test=select-temporary-home-address]").click()
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.ID, "change-destination-confirmation-button")))
time.sleep(1)
driver.find_element(By.ID, "change-destination-confirmation-button").click()
driver.get('https://www.compraonline.alcampo.es/categories/electrodom%C3%A9sticos/cafeteras/cafeteras-de-goteo/OC2312?source=navigation')
with open('products.csv', mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Product Title', 'Available'])
try:
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
product_initial = soup.find_all('script', attrs={'data-test':'initial-state-script'})
if product_initial:
data = json.loads(product_initial[0].get_text(strip=True).replace('window.__INITIAL_STATE__=',''))
data = data['data']['products']['productEntities']
print(f'Found {len(data)} product elements.')
for key, product in data.items():
try:
title = product['name']
ava_element = str(product['available'])
writer.writerow([ title, ava_element])
except:
print("")
except:
print("error")
driver.quit()
@MrWhite thanks its working as expected. Will it also work with multiple post_code and categories?
Yes, add loop on postcode and on category part. it will work fine, for large number of scraping please do use proxies. so, security won’t detect it’s a bot.
Great Thanks. I will folllow the steps.
Thank you guys! I’ll check it out and see if it works for me
python_test.sh (4.4 KB)