Logging in to LinkedIn with python requests sessions

前端 未结 5 1300
傲寒
傲寒 2020-12-02 19:57

I\'m trying to log into LinkedIn using Python requests:

import sys
import requests
from BeautifulSoup import BeautifulSoup


payload={
    \'session-key\' :          


        
5条回答
  •  自闭症患者
    2020-12-02 20:24

    2020 version of @garromark's accepted solution:

    import http.cookiejar as cookielib
    import os
    import urllib
    import re
    import string
    from bs4 import BeautifulSoup
    
    username = ""
    password = ""
    
    cookie_filename = "parser.cookies.txt"
    
    
    class LinkedInParser(object):
    
        def __init__(self, login, password):
            """ Start up... """
            self.login = login
            self.password = password
    
            # Simulate browser with cookies enabled
            self.cj = cookielib.MozillaCookieJar(cookie_filename)
            if os.access(cookie_filename, os.F_OK):
                self.cj.load()
            self.opener = urllib.request.build_opener(
                urllib.request.HTTPRedirectHandler(),
                urllib.request.HTTPHandler(debuglevel=0),
                urllib.request.HTTPSHandler(debuglevel=0),
                urllib.request.HTTPCookieProcessor(self.cj)
            )
            self.opener.addheaders = [
                ('User-agent', 'Mozilla/5.0')
            ]
    
            # Login
            self.loginPage()
    
            title = self.loadTitle()
            print(title)
    
            # self.cj.save()
    
        def loadPage(self, url, data=None):
            """
            Utility function to load HTML from URLs for us with hack to continue despite 404
            """
            # We'll print the url in case of infinite loop
            # print "Loading URL: %s" % url
            try:
                if data is not None:
                    response = self.opener.open(url, data)
                else:
                    response = self.opener.open(url)
                content = ''.join([str(l) for l in response.readlines()])
                print("Page loaded: %s \n Content: %s \n" % (url, content))
                return content
            except Exception as e:
                # If URL doesn't load for ANY reason, try again...
                # Quick and dirty solution for 404 returns because of network problems
                # However, this could infinite loop if there's an actual problem
                print("Exception on %s load: %s" % (url, e))
                # return self.loadPage(url, data)
    
        def loadSoup(self, url, data=None):
            """
            Combine loading of URL, HTML, and parsing with BeautifulSoup
            """
            html = self.loadPage(url, data)
            soup = BeautifulSoup(html, "html5lib")
            return soup
    
        def loginPage(self):
            """
            Handle login. This should populate our cookie jar.
            """
            soup = self.loadSoup("https://www.linkedin.com/login")
            loginCsrfParam = soup.find("input", {"name": "loginCsrfParam"})['value']
            csrfToken = soup.find("input", {"name": "csrfToken"})['value']
            sIdString = soup.find("input", {"name": "sIdString"})['value']
            print("loginCsrfParam: %s" % loginCsrfParam)
            print("csrfToken: %s" % csrfToken)
            print("sIdString: %s" % sIdString)
            login_data = urllib.parse.urlencode({
                'session_key': self.login,
                'session_password': self.password,
                'loginCsrfParam': loginCsrfParam,
                'csrfToken': csrfToken,
                'sIdString': sIdString
            }).encode('utf8')
    
            self.loadPage("https://www.linkedin.com/checkpoint/lg/login-submit", login_data)
    
        def loadTitle(self):
            soup = self.loadSoup("https://www.linkedin.com/feed/")
            return soup.find("title")
    
    
    parser = LinkedInParser(username, password)
    
    

提交回复
热议问题