How to scrape otto.de?

darkrace · November 28, 2024, 3:47pm

@proxyrackubair i have added wait for element

from selenium.webdriver.support.ui import WebDriverWait

and checked it it is working fine i have tested it with “football” search total 814 items found.

attaching the code here it is working perfectly fine along with pagination.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
import csv
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json

driver = webdriver.Chrome()
driver.get('https://www.otto.de/')
WebDriverWait(driver, 60).until(
    EC.presence_of_element_located((By.ID, "onetrust-accept-btn-handler")))

cookies = driver.find_element(By.ID, "onetrust-accept-btn-handler")
cookies.click()
WebDriverWait(driver, 60).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, ".squirrel_searchfield.js_squirrel_searchbar__input.svelte-11jrfxz")))
search_bar = driver.find_element(By.CSS_SELECTOR, ".squirrel_searchfield.js_squirrel_searchbar__input.svelte-11jrfxz")
search_bar.click()
search_bar.send_keys("football" + Keys.RETURN)
sleep(8)


with open('products.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Product Number', 'Product Title', 'Product Price'])
    while True:
        try:
            initial_height = driver.execute_script("return document.body.scrollHeight")
            scroll_position = 0
            total_scrolls = 25
            for _ in range(total_scrolls):
                driver.execute_script(f"window.scrollTo(0, {scroll_position + initial_height / total_scrolls});")
                scroll_position += initial_height / total_scrolls
                sleep(7 / total_scrolls)
            sleep(8)

            page_source = driver.page_source
            soup = BeautifulSoup(page_source, 'html.parser')

            product_elements = soup.find_all('article', attrs={'data-product-listing-type': 'SearchResultPage'})
            print(f'Found {len(product_elements)} product elements.')



            for idx, product in enumerate(product_elements, 1):
                title_element = product.find('p', class_='find_tile__name pl_copy100')
                title = title_element.get_text(strip=True) if title_element else 'No Title Found'

                price_element = product.find('span', class_='find_tile__retailPrice pl_headline50 find_tile__priceValue')  # Try with one class
                if not price_element:
                    price_element = product.find('span', class_='find_tile__retailPrice pl_headline50 find_tile__priceValue find_tile__priceValue--red')  # Try with another class
                price = price_element.get_text(strip=True) if price_element else 'No Price Found'

                writer.writerow([idx, title, price])
                print(f"{idx}. Title: {title}")
                print(f"{idx}. Price: {price}")
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div#avContent,div#reptile-tilelist-bracket")))
            cat_rule = driver.find_element(By.CSS_SELECTOR,'div#avContent,div#reptile-tilelist-bracket').get_attribute('data-rule')
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "li#reptile-paging-bottom-next > button")))
            nextpage = driver.find_element(By.CSS_SELECTOR,'li#reptile-paging-bottom-next > button').get_attribute('data-page')

            if nextpage:
                nextpage = json.loads(nextpage)
                url = driver.current_url.split("?")[0]
                url = f"{url}?l=gq&o={nextpage.get('o')}"
                driver.get(url)
            else:
                break
        except:
            break
driver.quit()

please check now

proxyrackubair · November 28, 2024, 4:05pm

I can confirm this works. Thanks

Topic		Replies	Views
How can I scrape alcampo.es if I want to know the availability of the products? Scraping Help	8	230	December 20, 2024
About the Scraping Help category Scraping Help	0	62	November 2, 2024
See the amount of traffic that is going through like 50kb so you know its working Mobile Proxies	1	149	April 28, 2025
Feature requests for Mobile Proxies Application Mobile Proxies	1	127	January 20, 2025
Unique ip feature and pof support? Mobile Proxies	1	149	April 28, 2025

How to scrape otto.de?

Related topics