1 # coding:utf-8
2 # 用webdriver登录并获取cookies,并用requests发送请求,以豆瓣为例
3 from selenium import webdriver
4 import requests
5 import time
6 import json
7 import sys
8 reload(sys)
9 sys.setdefaultencoding('utf-8')
10
11 def main():
12 # 从命令行参数获取登录用户名和密码
13 user_name = sys.argv[1]
14 password = sys.argv[2]
15
16 # 豆瓣登录页面URL
17 login_url = 'https://www.douban.com/accounts/login'
18
19 # 获取chrome的配置
20 opt = webdriver.ChromeOptions()
21 # 在运行的时候不弹出浏览器窗口
22 # opt.set_headless()
23
24 # 获取driver对象
25 driver = webdriver.Chrome(chrome_options = opt)
26 # 打开登录页面
27 driver.get(login_url)
28
29 print 'opened login page...'
30
31 # 向浏览器发送用户名、密码,并点击登录按钮
32 driver.find_element_by_name('form_email').send_keys(user_name)
33 driver.find_element_by_name('form_password').send_keys(password)
34 # 多次登录需要输入验证码,这里给一个手工输入验证码的时间
35 time.sleep(6)
36 driver.find_element_by_class_name('btn-submit').submit()
37 print 'submited...'
38 # 等待2秒钟
39 time.sleep(2)
40
41 # 创建一个requests session对象
42 s = requests.Session()
43 # 从driver中获取cookie列表(是一个列表,列表的每个元素都是一个字典)
44 cookies = driver.get_cookies()
45 # 把cookies设置到session中
46 for cookie in cookies:
47 s.cookies.set(cookie['name'],cookie['value'])
48 # 关闭driver
49 driver.close()
50
51 # 需要登录才能看到的页面URL
52 page_url = 'https://www.douban.com/accounts/'
53 # 获取该页面的HTML
54 resp = s.get(page_url)
55 resp.encoding = 'utf-8'
56 print 'status_code = {0}'.format(resp.status_code)
57 # 将网页内容存入文件
58 with open('html.txt','w+') as fout:
59 fout.write(resp.text)
60
61 print 'end'
62
63 if __name__ == '__main__':
64 main()
来源:https://www.cnblogs.com/cmbobo/p/12298204.html