Not able to Login using Scrapy

随声附和 提交于 2021-01-29 13:39:38

问题


I'm trying to log in using python scrapy. But it is not working.

For reference

import quotes as q
import loginspidernew as login
import scrapy
from scrapy.crawler import CrawlerProcess
class ValidateURL:

    def checkURL(self,urls):
        try:    
            if(urls):
                for key, value in urls.items():
                    if value['login_details']:
                        self.runScrap(value)                                      

        except:
            return False

    def runScrap(self,data):                          
        if data:
            process = CrawlerProcess()
            process.crawl(login.LoginSpider,login_url='http://quotes.toscrape.com/login', start_urls=[data['url']], credentials=data)
            process.start()

from scrapy.http import Request, FormRequest
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
# from scrapy.selector import HtmlXPathSelector
from scrapy.http import FormRequest
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.response import open_in_browser
from bs4 import BeautifulSoup

class LoginSpider(CrawlSpider):
    name = 'loginspider'

    def init_request(self):
        return Request(url=self.login_url, callback=self.start_requests)

    def start_requests(self):
        print ("\n start_request is here \n")
        yield Request(
        url = self.login_url,
        callback = self.login,
        dont_filter = True
        )

    def fetch_form_data(self,response):
        if all(field in response.text for field in self.credentials['fields_in_response']):
            inputs =response.xpath('//form//input').extract()
            soup_dict={}
            for key,i in enumerate(inputs):
                soup = BeautifulSoup(i, 'html.parser')
                inp_type   =   soup.input['type'] if soup.input.has_attr('type') else None
                inp_value  =   soup.input['value'] if soup.input.has_attr('value')  else None
                inp_name   =   soup.input['name'] if soup.input.has_attr('name')  else None
                soup_dict[key]= {'name':inp_name,'value':inp_value,'type':inp_type}
            login_cred= self.credentials['login_details']
            form_data={}
            for key,value in soup_dict.items():
                if value['name'] != None and value['type'] == 'text':
                    form_data[value['name']]=login_cred['name']
                elif value['name'] != None and value['type'] == 'password':
                    form_data[value['name']]=login_cred['pwd']
                elif value['name'] != None and value['type'] == 'hidden':
                    form_data[value['name']]=value['value']
                else:
                    pass
            return form_data      

    def login(self, response):
        print ("\n Login is here! \n")
        formdata=self.fetch_form_data(response)  
        return FormRequest.from_response(response,
        formdata,
        callback=self.check_login_response)

    def check_login_response(self, response):
        open_in_browser(response)
        print(type(self.credentials['fields_in_main_page']))
        print ("\n Check_login_response \n")
        if all(field in response.text for field in self.credentials['fields_in_main_page']):
            print("Worked, logged in")
            #return self.parse_item
        else:
            print("Not logged in")
            return            

Code executes properly and no error is showing in the spider. I think it is something related to the website. May I know what is missing and how can I overcome this issue. Even after when I try to login from the browser it shows invalid csrf token. Is there anything extra need to be added let me know.

来源:https://stackoverflow.com/questions/61317215/not-able-to-login-using-scrapy

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!