1 from pyquery import PyQuery as pq
2 # 文件勿命名为 pyquery.py,会发生冲突
3
4 # 字符串初始化
5 html = '''
6 <div id="page">
7 <div id="car_test">
8 <ul class="menu-list">
9 <li class="icon1"><a href="link1.html">科目一</a></li>
10 <li class="icon2"><a href="link2.html">科目二</a></li>
11 <li class="icon3 subject"><a href="link3.html">科目三</a></li>
12 <li class="icon4"><a href="link4.html">科目四</a></li>
13 <li class="buy car"><a href="link4.html">买车</a></li>
14 </ul>
15 </div>
16 </div>
17 '''
18 doc = pq(html)
19 print(doc('li'))
20 # url初始化
21 doc = pq(url='https://www.jiakaobaodian.com/')
22 print(doc('title'))
23 # 文件初始化
24 doc = pq(filename='demo.html')
25 print(doc('li'))
26
27
28 # 基本 CSS 选择器
29 doc = pq(html)
30 print(doc('#car_test .menu-list li'))
31 print(type(doc('#car_test .menu-list li')))
32 # 当需要用 class 且名称内容有空格时,”.“表示并列
33 print(doc('.menu-list .icon3.subject, .buy.car'))
34
35
36 # 查找节点
37 doc = pq(html)
38 items = doc('.menu-list')
39 print(type(items))
40 print(items)
41 # find() 查找节点里的所有子孙节点
42 link_list = items.find('a')
43 print(type(link_list))
44 print(link_list)
45 print('\n')
46 # children() 查找节点里的子节点
47 menu_list = items.children()
48 print(menu_list)
49 last_li = items.children('.buy.car')
50 print(last_li)
51 # parent() 父节点
52 car_test = items.parent()
53 print(type(car_test))
54 print(car_test)
55 # parents() 祖先节点,此处输出 class 为 page 和 car_test 的两个节点
56 parents = items.parents()
57 print(parents)
58 # siblings() 兄弟节点
59 li = doc('.menu-list .icon3.subject')
60 print(li.siblings())
61
62
63 # 遍历
64 doc = pq(html)
65 menu_list = doc('li').items()
66 print(type(menu_list))
67 for li in menu_list:
68 print(li, type(li))
69
70
71 # 获取信息
72 doc = pq(html)
73 a = doc('.icon3.subject a')
74 b = doc('.icon4')
75 # 获取属性,此处为 class_
76 print(a.attr.href)
77 print(b.attr.class_)
78 # 获取文本,html() 返回节点内所有代码
79 print(type(a.text()))
80 print(a.text())
81 print(b.text())
82 print(b.html())
来源:https://www.cnblogs.com/lonelyWMW/p/11378529.html