I am trying to log in to an ASP.NET website using the requests
module in Python.
While logging in manually in the website I can see the following header
I think this is cleaner and more generic.
import requests
from bs4 import BeautifulSoup
url="http://www11.davidsonsinc.com/Login/Login.aspx"
username="username"
password="password"
session = requests.Session()
# Dont botter with headers at first
# s.headers.update(headers)
response = session.get(url)
soup = BeautifulSoup(response.content)
login_data = {}
# get the aspnet state form data needed with bsoup
aspnetstates = ['__VIEWSTATE', '__VIEWSTATEGENERATOR', '__EVENTVALIDATION', '__EVENTTARGET',
'__EVENTARGUMENT', '__VIEWSTATEENCRYPTED' ];
for aspnetstate in aspnetstates: # search for existing aspnet states and get its values
result = soup.find('input', {'name': aspnetstate})
if not (result is None): # when existent (some may not be needed!)
login_data.update({aspnetstate : result['value']})
login_data.update(
{"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$UserName" : username,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$Password" : password,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$LoginButton" : "Log In"})
response = session.post(url, data=login_data)
import requests
from bs4 import BeautifulSoup
URL="http://www11.davidsonsinc.com/Login/Login.aspx"
headers={"User-Agent":"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36"}
username="username"
password="password"
s=requests.Session()
s.headers.update(headers)
r=s.get(URL)
soup=BeautifulSoup(r.content)
VIEWSTATE=soup.find(id="__VIEWSTATE")['value']
VIEWSTATEGENERATOR=soup.find(id="__VIEWSTATEGENERATOR")['value']
EVENTVALIDATION=soup.find(id="__EVENTVALIDATION")['value']
login_data={"__VIEWSTATE":VIEWSTATE,
"__VIEWSTATEGENERATOR":VIEWSTATEGENERATOR,
"__EVENTVALIDATION":EVENTVALIDATION,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$UserName":username,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$Password":password,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$LoginButton":"Log In"}
r=s.post(URL, data=login_data)
print r.url
I was initially using requests+bs4 as well however I was running into similar issues with the ASPX site I'm scrapping. I found another library called robobrowser that wraps requests+bs4. With this you no longer have to manually set items such as "__VIEWSTATE" and friends when interacting with ASPX sites.
from robobrowser import RoboBrowser
url = ' http://www11.davidsonsinc.com'
login_url = url + '/Login/Login.aspx'
username = "username"
password = "password"
browser = RoboBrowser(history=True)
# This retrieves __VIEWSTATE and friends
browser.open(login_url)
signin = browser.get_form(id='aspnetForm')
signin["ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$UserName"].value = username
signin["ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$Password"].value = password
signin["ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$LoginButton"].value = "Log In"
browser.submit_form(signin)
print browser.url