clemfromspace / scrapy-selenium

Scrapy middleware to handle javascript pages using selenium

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Question about follow Requests

KauSaal opened this issue · comments

Hello,
I have a short question about follow Requests in Scrapy-Selenium,
In my code, I loop through all options in nested select elements. On the last select element the page is reloaded and I want to build a Scrapy Request out of a Subpage/Div ('canvas_post') to parse it:

import bs4
from scrapy import Request
from scrapy import Spider
from scrapy_selenium import SeleniumRequest
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from time import sleep

from firmware.items import FirmwareImage
from firmware.loader import FirmwareLoader

class EdimaxDESpider(Spider):
    name = "edimax_de"
    vendor = "Edimax"

    start_urls = ["https://www.edimax.com/edimax/download/download/data/edimax/de/download/"]

    def start_requests(self):
        url = "https://www.edimax.com/edimax/download/download/data/edimax/de/download/"
        yield SeleniumRequest(url=url, callback=self.parse, wait_time = 10)
    def parse(self, response):
        driver = response.request.meta['driver']
        # Find the select element by its class name
        solution_box = driver.find_element(By.CLASS_NAME, 'step1_select_cb')
        solution_select = Select(driver.find_element(By.CLASS_NAME, 'step1_select_cb'))
        # Get all option elements within the select element
        option_elements = solution_box.find_elements(By.TAG_NAME, ('option'))
        # Extract the value attribute from each option element
        options = [option_element.get_attribute('value') for option_element in option_elements]
        for option in options:
            if option != '':
                solution_select.select_by_value(option)
                sleep(1)
                # find the category box and select an option
                category_box = Select(driver.find_element(By.CLASS_NAME, 'step2_select_cb'))
                category_element = driver.find_element(By.CLASS_NAME, 'step2_select_cb')
                # Get all option elements within the category element
                option_elements = category_element.find_elements(By.TAG_NAME, ('option'))
                # Extract the value attribute from each option element
                options = [option_element.get_attribute('value') for option_element in option_elements]
                # loop through option
                for option in options:
                    if option != "":
                        category_box.select_by_value(option)
                        sleep(1)
                        # find the modelNo box and select an option
                        modelNo_box = Select(driver.find_element(By.CLASS_NAME, 'step3_select_cb'))
                        modelNo_element = driver.find_element(By.CLASS_NAME, 'step3_select_cb')
                        # Get all option elements within the modelNo element
                        option_elements = modelNo_element.find_elements(By.TAG_NAME, ('option'))
                        # Extract the value attribute from each option element
                        options = [option_element.get_attribute('value') for option_element in option_elements]
                        # loop through options
                        for option in options:
                            if option != '':
                                modelNo_box.select_by_value(option)
                                sleep(5)
                                html_from_page = driver.page_source
                                soup = bs4.BeautifulSoup(html_from_page, 'html.parser')
                                yield Request(soup,
                                                      callback=self.parse_product)
    def parse_product(self, response):
        print("IM HERE")
        canvas = response.css("#side2 > div.canvas_post").getall()
        print("ELEMENT CANVAS POST")`

The print statements in parse_product are never printed and I also dont get scrapy request log messages like when I'm using scrapy without selenium
Hope someone can give me an hint and thanks in advance
KauSaal