I\'m trying to log into LinkedIn using Python requests:
import sys
import requests
from BeautifulSoup import BeautifulSoup
payload={
\'session-key\' :
2020 version of @garromark's accepted solution:
import http.cookiejar as cookielib
import os
import urllib
import re
import string
from bs4 import BeautifulSoup
username = ""
password = ""
cookie_filename = "parser.cookies.txt"
class LinkedInParser(object):
def __init__(self, login, password):
""" Start up... """
self.login = login
self.password = password
# Simulate browser with cookies enabled
self.cj = cookielib.MozillaCookieJar(cookie_filename)
if os.access(cookie_filename, os.F_OK):
self.cj.load()
self.opener = urllib.request.build_opener(
urllib.request.HTTPRedirectHandler(),
urllib.request.HTTPHandler(debuglevel=0),
urllib.request.HTTPSHandler(debuglevel=0),
urllib.request.HTTPCookieProcessor(self.cj)
)
self.opener.addheaders = [
('User-agent', 'Mozilla/5.0')
]
# Login
self.loginPage()
title = self.loadTitle()
print(title)
# self.cj.save()
def loadPage(self, url, data=None):
"""
Utility function to load HTML from URLs for us with hack to continue despite 404
"""
# We'll print the url in case of infinite loop
# print "Loading URL: %s" % url
try:
if data is not None:
response = self.opener.open(url, data)
else:
response = self.opener.open(url)
content = ''.join([str(l) for l in response.readlines()])
print("Page loaded: %s \n Content: %s \n" % (url, content))
return content
except Exception as e:
# If URL doesn't load for ANY reason, try again...
# Quick and dirty solution for 404 returns because of network problems
# However, this could infinite loop if there's an actual problem
print("Exception on %s load: %s" % (url, e))
# return self.loadPage(url, data)
def loadSoup(self, url, data=None):
"""
Combine loading of URL, HTML, and parsing with BeautifulSoup
"""
html = self.loadPage(url, data)
soup = BeautifulSoup(html, "html5lib")
return soup
def loginPage(self):
"""
Handle login. This should populate our cookie jar.
"""
soup = self.loadSoup("https://www.linkedin.com/login")
loginCsrfParam = soup.find("input", {"name": "loginCsrfParam"})['value']
csrfToken = soup.find("input", {"name": "csrfToken"})['value']
sIdString = soup.find("input", {"name": "sIdString"})['value']
print("loginCsrfParam: %s" % loginCsrfParam)
print("csrfToken: %s" % csrfToken)
print("sIdString: %s" % sIdString)
login_data = urllib.parse.urlencode({
'session_key': self.login,
'session_password': self.password,
'loginCsrfParam': loginCsrfParam,
'csrfToken': csrfToken,
'sIdString': sIdString
}).encode('utf8')
self.loadPage("https://www.linkedin.com/checkpoint/lg/login-submit", login_data)
def loadTitle(self):
soup = self.loadSoup("https://www.linkedin.com/feed/")
return soup.find("title")
parser = LinkedInParser(username, password)