昨日作业:自动登陆抽屉新热榜
1 from selenium import webdriver
2 import time
3
4 driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe')
5
6 # 把窗口转成全屏
7 driver.maximize_window()
8
9 try:
10 driver.get('https://dig.chouti.com/')
11 driver.implicitly_wait(10)
12 time.sleep(5)
13
14 # 1、点击登录
15 login_btn = driver.find_element_by_id('login_btn')
16 login_btn.click()
17 time.sleep(2)
18
19 # 2、输入用户名
20 phone = driver.find_element_by_class_name('login-phone')
21 phone.send_keys('15622792660')
22
23 # 3、输入密码
24 pwd = driver.find_element_by_class_name('pwd-password-input')
25 pwd.send_keys('kermit46709394')
26
27 # 4、确认登录
28 login_submit = driver.find_element_by_class_name('btn-large')
29 login_submit.click()
30
31 time.sleep(20)
32
33 # 捕获异常并打印
34 except Exception as e:
35 print(e)
36
37 finally:
38 driver.close()
今日内容:
注意: selenium驱动的浏览器是干净的,没有任何缓存。 1、selenium剩余用法 2、selenium万能登录破解 3、selenium爬取京东商品信息 4、破解极验滑动验证码Xpath语法:今日作业: 1、总结课堂知识点,写博客 2、爬取京东商品信息 3、滑动验证(提高题)1。selenium选择器之Xpath:
1 from selenium import webdriver
2
3 driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe')
4
5
6 try:
7 # 隐式等待: 写在get请求前
8 driver.implicitly_wait(5)
9
10 driver.get('https://doc.scrapy.org/en/latest/_static/selectors-sample1.html')
11
12 # 显式等待: 写在get请求后
13 # wait.until(...)
14
15 '''
16
17 <html>
18 <head>
19 <base href='http://example.com/' />
20 <title>Example website</title>
21 </head>
22 <body>
23 <div id='images'>
24 <a href='image1.html'>Name: My image 1 <br /><img src='image1_thumb.jpg' /></a>
25 <a href='image2.html'>Name: My image 2 <br /><img src='image2_thumb.jpg' /></a>
26 <a href='image3.html'>Name: My image 3 <br /><img src='image3_thumb.jpg' /></a>
27 <a href='image4.html'>Name: My image 4 <br /><img src='image4_thumb.jpg' /></a>
28 <a href='image5.html'>Name: My image 5 <br /><img src='image5_thumb.jpg' /></a>
29 </div>
30 </body>
31 </html>
32 '''
33 # 根据xpath语法查找元素
34 # / 从根节点开始找第一个
35 html = driver.find_element_by_xpath('/html')
36 # html = driver.find_element_by_xpath('/head') # 报错
37 print(html.tag_name)
38
39 # // 从根节点开始找任意一个节点
40 div = driver.find_element_by_xpath('//div')
41 print(div.tag_name)
42
43 # @
44 # 查找id为images的div节点
45 div = driver.find_element_by_xpath('//div[@id="images"]')
46 print(div.tag_name)
47 print(div.text)
48
49 # 找到第一个a节点
50 a = driver.find_element_by_xpath('//a')
51 print(a.tag_name)
52
53 # 找到所有a节点
54 a_s = driver.find_elements_by_xpath('//a')
55 print(a_s)
56
57 # 找到第一个a节点的href属性
58 # get_attribute:获取节点中某个属性
59 a = driver.find_element_by_xpath('//a').get_attribute('href')
60 print(a)
61
62 finally:
63 driver.close()
2.selenium剩余操作:
1 ''''''
2 '''
3 点击、清除操作
4 '''
5 # from selenium import webdriver
6 # from selenium.webdriver.common.keys import Keys
7 # import time
8 #
9 # driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe')
10 #
11 # try:
12 # driver.implicitly_wait(10)
13 # # 1、往jd发送请求
14 # driver.get('https://www.jd.com/')
15 # # 找到输入框输入围城
16 # input_tag = driver.find_element_by_id('key')
17 # input_tag.send_keys('围城')
18 # # 键盘回车
19 # input_tag.send_keys(Keys.ENTER)
20 # time.sleep(2)
21 # # 找到输入框输入墨菲定律
22 # input_tag = driver.find_element_by_id('key')
23 # input_tag.clear()
24 # input_tag.send_keys('墨菲定律')
25 # # 找到搜索按钮点击搜索
26 # button = driver.find_element_by_class_name('button')
27 # button.click()
28 # time.sleep(10)
29 #
30 # finally:
31 # driver.close()
32
33
34 '''
35 获取cookies (了解)
36 '''
37 # from selenium import webdriver
38 # import time
39 #
40 # driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe')
41 #
42 # try:
43 # driver.implicitly_wait(10)
44 # driver.get('https://www.zhihu.com/explore')
45 # print(driver.get_cookies())
46 #
47 # time.sleep(10)
48 # finally:
49 # driver.close()
50
51 '''
52 选项卡
53 '''
54 #选项卡管理:切换选项卡,有js的方式windows.open,有windows快捷键:
55 # ctrl+t等,最通用的就是js的方式
56 # import time
57 # from selenium import webdriver
58 #
59 # browser = webdriver.Chrome()
60 # try:
61 # browser.get('https://www.baidu.com')
62 #
63 # # execute_script: 执行javascrpit代码
64 # # 弹窗操作
65 # # browser.execute_script('alert("tank")')
66 # # 新建浏览器窗口
67 # browser.execute_script(
68 # '''
69 # window.open();
70 # '''
71 # )
72 # time.sleep(1)
73 # print(browser.window_handles) # 获取所有的选项卡
74 # # 切换到第二个窗口
75 # # 新:
76 # browser.switch_to.window(browser.window_handles[1])
77 # # 旧:
78 # # browser.switch_to_window(browser.window_handles[1])
79 #
80 # # 第二个窗口往淘宝发送请求
81 # browser.get('https://www.taobao.com')
82 # time.sleep(5)
83 #
84 # # 切换到第一个窗口
85 # browser.switch_to_window(browser.window_handles[0])
86 # browser.get('https://www.sina.com.cn')
87 #
88 # time.sleep(10)
89 # finally:
90 # browser.close()
91
92
93 '''
94 ActionChangs动作链
95 '''
96 # from selenium import webdriver
97 # from selenium.webdriver import ActionChains
98 # import time
99 #
100 # driver = webdriver.Chrome()
101 # driver.implicitly_wait(10)
102 # driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
103 #
104 # try:
105 #
106 # # driver.switch_to_frame('iframeResult')
107 # # 切换到id为iframeResult的窗口内
108 # driver.switch_to.frame('iframeResult')
109 #
110 # # 源位置
111 # draggable = driver.find_element_by_id('draggable')
112 #
113 # # 目标位置
114 # droppable = driver.find_element_by_id('droppable')
115 #
116 # # 调用ActionChains,必须把驱动对象传进去
117 # # 得到一个动作链对象,复制给一个变量
118 # actions = ActionChains(driver)
119 #
120 # # 方式一: 机器人
121 # # 瞬间把源图片位置秒移到目标图片位置
122 # # actions.drag_and_drop(draggable, droppable) # 编写一个行为
123 # # actions.perform() # 执行编写好的行为
124 #
125 #
126 # # 方式二: 模拟人的行为
127 # source = draggable.location['x']
128 # target = droppable.location['x']
129 # print(source, target)
130 #
131 # distance = target - source
132 # print(distance)
133 #
134 # # perform:每个动作都要调用perform执行
135 #
136 # # 点击并摁住源图片
137 # ActionChains(driver).click_and_hold(draggable).perform()
138 #
139 # s = 0
140 # while s < distance:
141 # # 执行位移操作
142 # ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform()
143 # s += 2
144 #
145 # # 释放动作链
146 # ActionChains(driver).release().perform()
147 #
148 # time.sleep(10)
149 #
150 #
151 # finally:
152 # driver.close()
153
154
155 '''
156 前进、后退
157 '''
158 # from selenium import webdriver
159 # import time
160 #
161 # driver = webdriver.Chrome()
162 #
163 # try:
164 # driver.implicitly_wait(10)
165 # driver.get('https://www.jd.com/')
166 # driver.get('https://www.baidu.com/')
167 # driver.get('https://www.cnblogs.com/')
168 #
169 # time.sleep(2)
170 #
171 # # 回退操作
172 # driver.back()
173 # time.sleep(1)
174 # # 前进操作
175 # driver.forward()
176 # time.sleep(1)
177 # driver.back()
178 # time.sleep(10)
179 #
180 # finally:
181 # driver.close()
3.破解登陆:
1 from selenium import webdriver
2 from selenium.webdriver import ChromeOptions
3 import time
4 r'''
5 步骤:
6 1、打开文件的查看,显示隐藏文件
7 2、找到C:\Users\administortra\AppData\Local\Google\Chrome\User Data
8 删除Default文件
9 3、重新打开浏览器,并登陆百度账号
10 - 此时会创建一个新的Default缓存文件
11 4、添加cookies
12 5、关闭谷歌浏览器后执行程序
13 '''
14 # 获取options对象,参数对象
15 options = ChromeOptions()
16
17 # 获取cookies保存路径
18 # 'C:\Users\administortra\AppData\Local\Google\Chrome\User Data'
19 profile_directory = r'--user-data-dir=C:\Users\administortra\AppData\Local\Google\Chrome\User Data'
20
21 # 添加用户信息目录
22 options.add_argument(profile_directory)
23
24 # 把参数加载到当前驱动中 chrome_options默认参数,用来接收options对象
25 driver = webdriver.Chrome(chrome_options=options)
26
27 try:
28 driver.implicitly_wait(10)
29 driver.get('https://www.baidu.com/')
30 '''
31 BDUSS:*****
32 '''
33 # 添加用户cookies信息
34 # name、value必须小写
35 driver.add_cookie({"name": "BDUSS", "value": "用户session字符串"})
36
37 # 刷新操作
38 driver.refresh()
39
40 time.sleep(10)
41
42 finally:
43 driver.close()
4.selenium爬取京东商品信息:
1 # ''''''
2 # '''
3 # 爬取京东商品信息:
4 # 请求url:
5 # https://www.jd.com/
6 # 提取商品信息:
7 # 1.商品详情页
8 # 2.商品名称
9 # 3.商品价格
10 # 4.评价人数
11 # 5.商品商家
12 # '''
13 # from selenium import webdriver
14 # from selenium.webdriver.common.keys import Keys
15 # import time
16 #
17 # driver = webdriver.Chrome()
18 #
19 # try:
20 # driver.implicitly_wait(10)
21 # # 1、往京东主页发送请求
22 # driver.get('https://www.jd.com/')
23 #
24 # # 2、输入商品名称,并回车搜索
25 # input_tag = driver.find_element_by_id('key')
26 # input_tag.send_keys('macbook')
27 # input_tag.send_keys(Keys.ENTER)
28 # time.sleep(2)
29 #
30 # # 通过JS控制滚轮滑动获取所有商品信息
31 # js_code = '''
32 # window.scrollTo(0,5000);
33 # '''
34 # driver.execute_script(js_code) # 执行js代码
35 #
36 # # 等待数据加载
37 # time.sleep(2)
38 #
39 # # 3、查找所有商品div
40 # # good_div = driver.find_element_by_id('J_goodsList')
41 # good_list = driver.find_elements_by_class_name('gl-item')
42 # n = 1
43 # for good in good_list:
44 # # 根据属性选择器查找
45 # # 商品链接
46 # good_url = good.find_element_by_css_selector(
47 # '.p-img a').get_attribute('href')
48 #
49 # # 商品名称
50 # good_name = good.find_element_by_css_selector(
51 # '.p-name em').text.replace("\n", "--")
52 #
53 # # 商品价格
54 # good_price = good.find_element_by_class_name(
55 # 'p-price').text.replace("\n", ":")
56 #
57 # # 评价人数
58 # good_commit = good.find_element_by_class_name(
59 # 'p-commit').text.replace("\n", " ")
60 #
61 # # 商品商家
62 # good_from = good.find_element_by_class_name(
63 # 'J_im_icon').text.replace("\n", " ")
64 #
65 # good_content = f'''
66 # 商品链接: {good_url}
67 # 商品名称: {good_name}
68 # 商品价格: {good_price}
69 # 评价人数: {good_commit}
70 # 商品商家: {good_from}
71 # \n
72 # '''
73 # print(good_content)
74 # with open('jd.txt', 'a', encoding='utf-8') as f:
75 # f.write(good_content)
76 #
77 # next_tag = driver.find_element_by_link_text('下一页')
78 #
79 # next_tag.click()
80 #
81 # time.sleep(10)
82 #
83 #
84 # finally:
85 # driver.close()
86
87
88
89 ''''''
90 '''
91 爬取京东商品信息:
92 请求url:
93 https://www.jd.com/
94 提取商品信息:
95 1.商品详情页
96 2.商品名称
97 3.商品价格
98 4.评价人数
99 5.商品商家
100 '''
101 from selenium import webdriver
102 from selenium.webdriver.common.keys import Keys
103 import time
104
105
106 def get_good(driver):
107 try:
108
109 # 通过JS控制滚轮滑动获取所有商品信息
110 js_code = '''
111 window.scrollTo(0,5000);
112 '''
113 driver.execute_script(js_code) # 执行js代码
114
115 # 等待数据加载
116 time.sleep(2)
117
118 # 3、查找所有商品div
119 # good_div = driver.find_element_by_id('J_goodsList')
120 good_list = driver.find_elements_by_class_name('gl-item')
121 n = 1
122 for good in good_list:
123 # 根据属性选择器查找
124 # 商品链接
125 good_url = good.find_element_by_css_selector(
126 '.p-img a').get_attribute('href')
127
128 # 商品名称
129 good_name = good.find_element_by_css_selector(
130 '.p-name em').text.replace("\n", "--")
131
132 # 商品价格
133 good_price = good.find_element_by_class_name(
134 'p-price').text.replace("\n", ":")
135
136 # 评价人数
137 good_commit = good.find_element_by_class_name(
138 'p-commit').text.replace("\n", " ")
139
140 good_content = f'''
141 商品链接: {good_url}
142 商品名称: {good_name}
143 商品价格: {good_price}
144 评价人数: {good_commit}
145 \n
146 '''
147 print(good_content)
148 with open('jd.txt', 'a', encoding='utf-8') as f:
149 f.write(good_content)
150
151 next_tag = driver.find_element_by_class_name('pn-next')
152 next_tag.click()
153
154 time.sleep(2)
155
156 # 递归调用函数
157 get_good(driver)
158
159 time.sleep(10)
160
161 finally:
162 driver.close()
163
164
165 if __name__ == '__main__':
166
167 good_name = input('请输入爬取商品信息:').strip()
168
169 driver = webdriver.Chrome()
170 driver.implicitly_wait(10)
171 # 1、往京东主页发送请求
172 driver.get('https://www.jd.com/')
173
174 # 2、输入商品名称,并回车搜索
175 input_tag = driver.find_element_by_id('key')
176 input_tag.send_keys(good_name)
177 input_tag.send_keys(Keys.ENTER)
178 time.sleep(2)
179
180 get_good(driver)
5.破解极验滑动验证:
1 ''''''
2 '''
3 破解极验滑动验证
4 博客园登录url:
5 https://account.cnblogs.com/signin?returnUrl=https%3A%2F%2Fwww.cnblogs.com%2F
6 1、输入用户名与密码,并点击登录
7 2、弹出滑动验证,获取有缺口与完整的图片
8 3、通过像素点进行比对,获取滑动位移距离
9 4、模拟人的行为轨迹
10 5、开始滑动
11 '''
12 from selenium import webdriver # 用来驱动浏览器的
13 from selenium.webdriver import ActionChains # 破解滑动验证码的时候用的 可以拖动图片
14 import time
15 from PIL import Image # pip3 install pillow
16 import random
17
18 option = webdriver.ChromeOptions()
19 option.add_argument('disable-infobars')
20
21 driver = webdriver.Chrome(chrome_options=option)
22
23
24 def get_snap(driver):
25 # selenium自带的截图网页全屏图片
26 driver.save_screenshot('snap.png')
27
28 img = driver.find_element_by_class_name('geetest_canvas_img')
29
30 left = img.location['x']
31
32 upper = img.location['y']
33
34 right = left + img.size['width']
35 lower = upper + img.size['height']
36
37 # print(left, upper, right, lower)
38 img_obj = Image.open('snap.png')
39
40 # 对屏幕进行截取,获取滑动验证图片
41 image = img_obj.crop((left, upper, right, lower))
42
43 return image
44
45
46 def get_image1(driver):
47 time.sleep(0.2)
48 js_code = '''
49 var x = document.getElementsByClassName('geetest_canvas_fullbg')[0].style.display="block";
50 console.log(x)
51 '''
52
53 time.sleep(1)
54 driver.execute_script(js_code)
55
56 # 截取图片
57 img_obj = get_snap(driver)
58
59 return img_obj
60
61
62 def get_image2(driver):
63 time.sleep(0.2)
64
65 js_code = '''
66 var x = document.getElementsByClassName('geetest_canvas_fullbg')[0].style.display="none";
67 console.log(x)
68 '''
69
70 driver.execute_script(js_code)
71
72 time.sleep(1)
73
74 # 截取图片
75 img_obj = get_snap(driver)
76
77 return img_obj
78
79
80 def get_distance(image1, image2):
81 # 初始值
82 start = 60
83
84 # 滑块色差
85 color_num = 60
86
87 for x in range(start, image1.size[0]):
88 for y in range(image1.size[1]):
89
90 rgb1 = image1.load()[x, y]
91
92 rgb2 = image2.load()[x, y]
93
94 r = abs(rgb1[0] - rgb2[0])
95 g = abs(rgb1[1] - rgb2[1])
96 b = abs(rgb1[2] - rgb2[2])
97
98 if not (r < color_num and g < color_num and b < color_num):
99 return x - 7
100
101
102 def get_stacks(distance):
103 distance += 20
104
105 '''
106 匀加速\减速运行
107 v = v0 + a * t
108
109 位移:
110 s = v * t + 0.5 * a * (t**2)
111 '''
112
113 # 初速度
114 v0 = 0
115
116 # 加减速度列表
117 a_list = [3, 4, 5]
118
119 # 时间
120 t = 0.2
121
122 # 初始位置
123 s = 0
124
125 # 向前滑动轨迹
126 forward_stacks = []
127
128 mid = distance * 3 / 5
129
130 while s < distance:
131 if s < mid:
132 a = a_list[random.randint(0, 2)]
133
134 else:
135 a = -a_list[random.randint(0, 2)]
136
137 v = v0
138
139 stack = v * t + 0.5 * a * (t ** 2)
140
141 # 每次拿到的位移
142 stack = round(stack)
143
144 s += stack
145
146 v0 = v + a * t
147
148 forward_stacks.append(stack)
149
150 back_stacks = [-1, -1, -2, -3, -2, -3, -2, -2, -3, -1]
151
152 return {'forward_stacks': forward_stacks, 'back_stacks': back_stacks}
153
154
155 def main():
156 try:
157
158 driver.get('https://passport.cnblogs.com/user/signin')
159 driver.implicitly_wait(5)
160
161 # 1.输入用户名与密码,点击登录
162 username = driver.find_element_by_id('LoginName')
163 password = driver.find_element_by_id('Password')
164 login_button = driver.find_element_by_class_name('ladda-label')
165 time.sleep(1)
166 username.send_keys('_tank_')
167 time.sleep(1)
168 password.send_keys('k46709394.')
169
170 # 这里需要等待账号密码输入完毕后再点击登录按钮,否则的不弹框
171 time.sleep(1)
172 login_button.click()
173 # time.sleep(3)
174
175 # 2.点击滑动验证按钮,获取图片
176 geetest_button = driver.find_element_by_class_name('geetest_slider_button')
177 geetest_button.click()
178
179 time.sleep(0.2)
180
181 # 3.针对完整的图片进行截取
182 image1 = get_image1(driver)
183
184 # 4.针对有缺口的图片进行截取
185 image2 = get_image2(driver)
186
187 # 5.对比两张图片,获取滑动距离
188 distance = get_distance(image1, image2)
189
190 # 6.模拟人为滑动轨迹
191 stacks = get_stacks(distance)
192
193 # 7.根据滑动轨迹进行滑动
194 forward_stacks = stacks['forward_stacks']
195 back_stacks = stacks['back_stacks']
196
197 slider_button = driver.find_element_by_class_name('geetest_slider_button')
198 time.sleep(0.2)
199
200 ActionChains(driver).click_and_hold(slider_button).perform()
201
202 time.sleep(0.2)
203 for forward_stack in forward_stacks:
204 ActionChains(driver).move_by_offset(xoffset=forward_stack, yoffset=0).perform()
205 time.sleep(0.1)
206 for back_stack in back_stacks:
207 ActionChains(driver).move_by_offset(xoffset=back_stack, yoffset=0).perform()
208 time.sleep(0.1)
209
210 time.sleep(0.2)
211
212 ActionChains(driver).move_by_offset(xoffset=5, yoffset=0).perform()
213 ActionChains(driver).move_by_offset(xoffset=-5, yoffset=0).perform()
214
215 ActionChains(driver).release().perform()
216
217 time.sleep(50)
218
219
220 finally:
221 driver.close()
222
223
224 if __name__ == '__main__':
225 main()
来源:https://www.cnblogs.com/xm123456/p/11047849.html