mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-12 12:25:19 +08:00
更新README
This commit is contained in:
parent
af675c8a8e
commit
1c2185a7b8
103
README.en.md
103
README.en.md
@ -25,12 +25,13 @@ Even better, it is very concise and user-friendly, with little code and friendly
|
||||
|
||||
***
|
||||
|
||||
-Allows seamless switching between selenium and requests, sharing session.
|
||||
-The two modes provide a unified operation method with consistent user experience.
|
||||
-Common methods are encapsulated in units of pages to facilitate PO mode expansion.
|
||||
-Humanized operation method of page elements to reduce the workload of page analysis and coding.
|
||||
-Save configuration information to file for easy recall.
|
||||
-Some common functions (such as click) have been optimized to better meet the actual needs.
|
||||
- Allows seamless switching between selenium and requests, sharing session.
|
||||
- The two modes provide a unified operation method with consistent user experience.
|
||||
- Common methods are encapsulated in units of pages to facilitate PO mode expansion.
|
||||
- Humanized operation method of page elements to reduce the workload of page analysis and coding.
|
||||
- Save configuration information to file for easy recall.
|
||||
- Some common functions (such as click) have been optimized to better meet the actual needs.
|
||||
- Easy configuration method to get rid of the cumbersome browser configuration.
|
||||
|
||||
# Idea
|
||||
|
||||
@ -44,25 +45,16 @@ Even better, it is very concise and user-friendly, with little code and friendly
|
||||
|
||||
The following code implements exactly the same function, comparing the code amounts of the two:
|
||||
|
||||
1. Find all elements whose name is ele_name
|
||||
1. Find the element whose first text contains 'some text'
|
||||
|
||||
```python
|
||||
# selenium:
|
||||
element = WebDriverWait(driver).until(ec.presence_of_all_elements_located((By.XPATH, '//*[@name="ele_name"]')))
|
||||
element = WebDriverWait(driver).until(ec.presence_of_all_elements_located((By.XPATH, '//*[contains(text(), "some text")]')))
|
||||
# DrissionPage:
|
||||
element = page.eles('@name:ele_name')
|
||||
element = page.ele('some text')
|
||||
```
|
||||
|
||||
2. Find the element whose first text contains 'some text'
|
||||
|
||||
```python
|
||||
# selenium:
|
||||
element = WebDriverWait(driver, timeout = 2).until(ec.presence_of_element_located((By.XPATH, '//*[contains(text(), "some text")]')))
|
||||
# DrissionPage:
|
||||
element = page.ele('some text', timeout = 2)
|
||||
```
|
||||
|
||||
3. Jump to the first tab
|
||||
2. Jump to the first tab
|
||||
|
||||
```python
|
||||
# selenium
|
||||
@ -71,7 +63,7 @@ driver.switch_to.window(driver.window_handles[0])
|
||||
page.to_tab(0)
|
||||
```
|
||||
|
||||
4. Drag an element
|
||||
3. Drag an element
|
||||
|
||||
```python
|
||||
# selenium
|
||||
@ -80,7 +72,7 @@ ActionChains(driver).drag_and_drop(ele1, ele2).perform()
|
||||
ele1.drag_to(ele2)
|
||||
```
|
||||
|
||||
5. Scroll the window to the bottom (keep the horizontal scroll bar unchanged)
|
||||
4. Scroll the window to the bottom (keep the horizontal scroll bar unchanged)
|
||||
|
||||
```python
|
||||
# selenium
|
||||
@ -89,6 +81,18 @@ driver.execute_script("window.scrollTo(document.documentElement.scrollLeft,docum
|
||||
page.scroll_to('bottom')
|
||||
```
|
||||
|
||||
5. Set headless mode
|
||||
|
||||
```python
|
||||
# selenium
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument("--headless")
|
||||
# DrissionPage
|
||||
set_headless()
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Background
|
||||
|
||||
***
|
||||
@ -108,8 +112,7 @@ The design concept of this library is to keep everything simple, try to provide
|
||||
Example: Log in to the website with selenium, then switch to requests to read the web page.
|
||||
|
||||
```python
|
||||
drission = Drission() # Create Drive Object
|
||||
page = MixPage(drission) # Create page object, default driver mode
|
||||
page = MixPage() # Create page object, default driver mode
|
||||
page.get('https://gitee.com/profile') # Visit personal center page (redirect to the login page)
|
||||
|
||||
page.ele('@id:user_login').input('your_user_name') # Use selenium to log in
|
||||
@ -184,7 +187,7 @@ If you choose the third method, please run these lines of code before using the
|
||||
|
||||
```python
|
||||
from DrissionPage.easy_set import set_paths
|
||||
driver_path = 'C:\\chrome\\chromedriver.exe' # Your chromedriver.exe path, optional
|
||||
driver_path = 'D:\\chrome\\chromedriver.exe' # Your chromedriver.exe path, optional
|
||||
chrome_path = 'D:\\chrome\\chrome.exe' # Your chrome.exe path, optional
|
||||
set_paths(driver_path, chrome_path)
|
||||
```
|
||||
@ -210,6 +213,8 @@ In addition to the above two paths, this method can also set the following paths
|
||||
debugger_address # Opened browser address, eg. 127.0.0.1:9222
|
||||
download_path # Download path
|
||||
global_tmp_path # Temporary folder path
|
||||
user_data_path # User data path
|
||||
cache_path # Cache path
|
||||
```
|
||||
|
||||
Tips:
|
||||
@ -222,7 +227,8 @@ Tips:
|
||||
|
||||
## Create Drission Object
|
||||
|
||||
Drission objects are used to manage driver and session objects. It can be created by directly reading the configuration information of the ini file, or it can be passed in during initialization.
|
||||
Drission objects are used to manage driver and session objects.Drission objects are used to transmit drives when multiple pages work together, enabling multiple page classes to control the same browser or Session object.
|
||||
It can be created by directly reading the configuration information of the ini file, or it can be passed in during initialization.
|
||||
|
||||
```python
|
||||
# Created by default ini file
|
||||
@ -241,7 +247,7 @@ from DrissionPage.config import DriverOptions
|
||||
driver_options = DriverOptions() # Create driver configuration object
|
||||
driver_options.binary_location = 'D:\\chrome\\chrome.exe' # chrome.exe path
|
||||
session_options = {'headers': {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)'}}
|
||||
driver_path = 'C:\\chrome\\chromedriver.exe' # driver_path path
|
||||
driver_path = 'D:\\chrome\\chromedriver.exe' # driver_path path
|
||||
|
||||
drission = Drission(driver_options, session_options, driver_path) # Create object through incoming configuration
|
||||
```
|
||||
@ -250,11 +256,17 @@ drission = Drission(driver_options, session_options, driver_path) # Create obje
|
||||
|
||||
## Use MixPage objects
|
||||
|
||||
The MixPage page object encapsulates commonly used web page operations and implements the switch between driver and session mode.
|
||||
The MixPage page object encapsulates commonly used web page operations and implements the switch between driver and session mode.
|
||||
MixPage must receive a Drission object and use its driver or session. If no one is sent, MixPage will create a Drission itself (Use configurations from the default INI file).
|
||||
|
||||
Tips: When multi-page objects work together, remember to manually create Drission objects and transfer them to page objects for use. Otherwise, page objects can create their own Drission objects, rendering the information impossible to transmit.
|
||||
|
||||
```python
|
||||
# Automatic creation of Drission objects is recommended only for single page objects
|
||||
page = MixPage()
|
||||
|
||||
page = MixPage(drission) # Default driver mode
|
||||
page = MixPage(drission, mode='s', timeout=10) # Session mode, element waiting time 5 seconds (default 10 seconds)
|
||||
page = MixPage(drission, mode='d', timeout=5) # driver mode, element waiting time 5 seconds (default 10 seconds)
|
||||
|
||||
# Visit URL
|
||||
page.get(url, **kwargs)
|
||||
@ -383,7 +395,9 @@ arguments = [
|
||||
; No sandbox
|
||||
'--no-sandbox',
|
||||
; Google documentation mentions the need to add this attribute to avoid bugs
|
||||
'--disable-gpu'
|
||||
'--disable-gpu',
|
||||
; ignore errors
|
||||
'ignore-certificate-errors'
|
||||
]
|
||||
; Plugin
|
||||
extensions = []
|
||||
@ -395,12 +409,12 @@ experimental_options = {
|
||||
; No pop-up window
|
||||
'profile.default_content_setting_values': {'notifications': 2},
|
||||
; Disable PDF plugin
|
||||
'plugins.plugins_list': [{"enabled": False, "name": "Chrome PDF Viewer"}],
|
||||
'plugins.plugins_list': [{"enabled": False, "name": "Chrome PDF Viewer"}]
|
||||
},
|
||||
; Set to developer mode, anti-anti-reptile (useless)
|
||||
'excludeSwitches': ["ignore-certificate-errors", "enable-automation"],
|
||||
'excludeSwitches': ["enable-automation"],
|
||||
'useAutomationExtension': False
|
||||
}
|
||||
}
|
||||
|
||||
[session_options]
|
||||
headers = {
|
||||
@ -458,6 +472,25 @@ save('D:\\settings.ini') # Save to other path
|
||||
```
|
||||
|
||||
|
||||
|
||||
## easy_set methods
|
||||
|
||||
Chrome's configuration is hard to remember, so write the common configuration as a simple method that will modify the ini file.
|
||||
|
||||
```python
|
||||
set_headless(True) # Set headless mode
|
||||
set_no_imgs(True) # Set no-PIC mode
|
||||
set_no_js(True) # Disable JavaScript
|
||||
set_mute(True) # Silent mode
|
||||
set_user_agent('Mozilla/5.0 (Macintosh; Int......') # set user agent
|
||||
set_proxy('127.0.0.1:8888') # set proxy
|
||||
set_paths(paths) # See the Initialization section
|
||||
set_argument(arg, on_off) # Set a property without value, eg.'zh_CN.UTF-8', if on_off is False, delete the item
|
||||
set_value_argument(arg, value) # Set a property with value, eg.'--proxy-server=http://127.0.0.1:8888', Set to '' to delete the item
|
||||
```
|
||||
|
||||
|
||||
|
||||
# PO mode
|
||||
|
||||
***
|
||||
@ -641,7 +674,7 @@ print(page.ele('@id:su').text) # Output:百度一下
|
||||
|
||||
## MixPage class
|
||||
|
||||
class **MixPage**(drission: Drission, mode='d', timeout: float = 10)
|
||||
class **MixPage**(drission: Drission = None, mode:str = 'd', timeout: float = 10)
|
||||
|
||||
MixPage encapsulates common functions for page operations and can seamlessly switch between driver and session modes. Cookies are automatically synchronized when switching.
|
||||
The function of obtaining information is common to the two modes, and the function of operating page elements is only available in the d mode. Calling a function unique to a certain mode will automatically switch to that mode.
|
||||
@ -649,7 +682,7 @@ It inherits from DriverPage and SessionPage classes. These functions are impleme
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- drission - Drission object
|
||||
- drission - Drission objects, if not transmitted will create one
|
||||
- mode - Mode, optional 'd' or 's', default is 'd'
|
||||
- timeout - Search element time-out time (can also be set separately each time element search)
|
||||
|
||||
@ -1401,7 +1434,7 @@ Parameter Description:
|
||||
|
||||
|
||||
|
||||
- ## easy_set methods
|
||||
## easy_set methods
|
||||
|
||||
The configuration of chrome is too difficult to remember, so the commonly used configuration is written as a simple method, and the call will modify the relevant content of the ini file.
|
||||
|
||||
|
@ -27,6 +27,7 @@ DrissionPage,即driver和session的合体,是个基于python的Web自动化
|
||||
- 人性化的页面元素操作方法,减轻页面分析工作量和编码量。
|
||||
- 把配置信息保存到文件,方便调用。
|
||||
- 对某些常用功能(如点击)作了优化,更符合实际使用需要。
|
||||
- 简易的配置方法,摆脱繁琐的浏览器配置。
|
||||
|
||||
# 理念
|
||||
|
||||
@ -40,25 +41,16 @@ DrissionPage,即driver和session的合体,是个基于python的Web自动化
|
||||
|
||||
以下代码实现一模一样的功能,对比两者的代码量:
|
||||
|
||||
1、查找所有name为ele_name的元素
|
||||
1. 查找所有文本包含some text的元素
|
||||
|
||||
```python
|
||||
# selenium:
|
||||
element = WebDriverWait(driver).until(ec.presence_of_all_elements_located((By.XPATH, '//*[@name="ele_name"]')))
|
||||
element = WebDriverWait(driver).until(ec.presence_of_all_elements_located((By.XPATH, '//*[contains(text(), "some text")]')))
|
||||
# DrissionPage:
|
||||
element = page.eles('@name:ele_name')
|
||||
element = page.eles('some text')
|
||||
```
|
||||
|
||||
2、查找第一个文本包含some text的元素
|
||||
|
||||
```python
|
||||
# selenium:
|
||||
element = WebDriverWait(driver, timeout = 2).until(ec.presence_of_element_located((By.XPATH, '//*[contains(text(), "some text")]')))
|
||||
# DrissionPage:
|
||||
element = page.ele('some text', timeout = 2)
|
||||
```
|
||||
|
||||
3、跳转到第一个标签页
|
||||
2. 跳转到第一个标签页
|
||||
|
||||
```python
|
||||
# selenium
|
||||
@ -67,7 +59,7 @@ driver.switch_to.window(driver.window_handles[0])
|
||||
page.to_tab(0)
|
||||
```
|
||||
|
||||
4、拖拽一个元素
|
||||
3. 拖拽一个元素
|
||||
|
||||
```python
|
||||
# selenium
|
||||
@ -76,7 +68,7 @@ ActionChains(driver).drag_and_drop(ele1, ele2).perform()
|
||||
ele1.drag_to(ele2)
|
||||
```
|
||||
|
||||
5、滚动窗口到底部(保持水平滚动条不变)
|
||||
4. 滚动窗口到底部(保持水平滚动条不变)
|
||||
|
||||
```python
|
||||
# selenium
|
||||
@ -85,6 +77,18 @@ driver.execute_script("window.scrollTo(document.documentElement.scrollLeft,docum
|
||||
page.scroll_to('bottom')
|
||||
```
|
||||
|
||||
5. 设置headless模式
|
||||
|
||||
```python
|
||||
# selenium
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument("--headless")
|
||||
# DrissionPage
|
||||
set_headless()
|
||||
```
|
||||
|
||||
|
||||
|
||||
# 背景
|
||||
|
||||
***
|
||||
@ -104,8 +108,7 @@ page.scroll_to('bottom')
|
||||
例:用selenium登录网站,然后切换到requests读取网页。
|
||||
|
||||
```python
|
||||
drission = Drission() # 创建驱动器对象
|
||||
page = MixPage(drission) # 创建页面对象,默认driver模式
|
||||
page = MixPage() # 创建页面对象,默认driver模式
|
||||
page.get('https://gitee.com/profile') # 访问个人中心页面(未登录,重定向到登录页面)
|
||||
|
||||
page.ele('@id:user_login').input('your_user_name') # 使用selenium输入账号密码登录
|
||||
@ -165,12 +168,10 @@ from DrissionPage import *
|
||||
|
||||
## 初始化
|
||||
|
||||
使用selenium前,必须配置chrome.exe和chromedriver.exe的路径,并确保它们版本匹配。
|
||||
|
||||
使用selenium前,必须配置chrome.exe和chromedriver.exe的路径,并确保它们版本匹配。
|
||||
如果你只使用session模式,可跳过本节。
|
||||
|
||||
配置路径有三种方法:
|
||||
|
||||
- 将两个路径写入系统变量。
|
||||
- 使用时手动传入路径。
|
||||
- 将路径写入本库的ini文件(推荐)。
|
||||
@ -179,7 +180,7 @@ from DrissionPage import *
|
||||
|
||||
```python
|
||||
from DrissionPage.easy_set import set_paths
|
||||
driver_path = 'C:\\chrome\\chromedriver.exe' # 你的chromedriver.exe路径,可选
|
||||
driver_path = 'D:\\chrome\\chromedriver.exe' # 你的chromedriver.exe路径,可选
|
||||
chrome_path = 'D:\\chrome\\chrome.exe' # 你的chrome.exe路径,可选
|
||||
set_paths(driver_path, chrome_path)
|
||||
```
|
||||
@ -205,6 +206,8 @@ chromedriver下载网址:https://chromedriver.chromium.org/downloads
|
||||
debugger_address # 调试浏览器地址,如:127.0.0.1:9222
|
||||
download_path # 下载文件路径
|
||||
global_tmp_path # 临时文件夹路径
|
||||
user_data_path # 用户数据路径
|
||||
cache_path # 缓存路径
|
||||
```
|
||||
|
||||
Tips:
|
||||
@ -217,7 +220,8 @@ Tips:
|
||||
|
||||
## 创建驱动器对象Drission
|
||||
|
||||
Drission对象用于管理driver和session对象。可直接读取ini文件配置信息创建,也可以在初始化时传入配置信息。
|
||||
Drission对象用于管理driver和session对象。在多个页面协同工作时,Drission对象用于传递驱动器,使多个页面类可控制同一个浏览器或Session对象。
|
||||
可直接读取ini文件配置信息创建,也可以在初始化时传入配置信息。
|
||||
|
||||
```python
|
||||
# 由默认ini文件创建
|
||||
@ -236,7 +240,7 @@ from DrissionPage.config import DriverOptions
|
||||
driver_options = DriverOptions() # 创建driver配置对象
|
||||
driver_options.binary_location = 'D:\\chrome\\chrome.exe' # chrome.exe路径
|
||||
session_options = {'headers': {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)'}}
|
||||
driver_path = 'C:\\chrome\\chromedriver.exe' # driver_path路径
|
||||
driver_path = 'D:\\chrome\\chromedriver.exe' # driver_path路径
|
||||
|
||||
drission = Drission(driver_options, session_options, driver_path) # 传入配置
|
||||
```
|
||||
@ -245,11 +249,17 @@ drission = Drission(driver_options, session_options, driver_path) # 传入配
|
||||
|
||||
## 使用页面对象MixPage
|
||||
|
||||
MixPage页面对象封装了常用的网页操作,并实现driver和session模式之间的切换。
|
||||
MixPage页面对象封装了常用的网页操作,并实现driver和session模式之间的切换。
|
||||
MixPage须接收一个Drission对象并使用其中的driver或session,如没有传入,MixPage会自己创建一个Drission(使用默认ini文件的配置)。
|
||||
|
||||
Tips: 多页面对象协同工作时,记得手动创建Drission对象并传递给页面对象使用。否则页面对象会各自创建自己的Drission对象,使信息无法传递。
|
||||
|
||||
```python
|
||||
# 自动创建Drission对象,建议只在单页面对象情况下使用
|
||||
page = MixPage()
|
||||
|
||||
page = MixPage(drission) # 默认driver模式
|
||||
page = MixPage(drission, mode='s', timeout=10) # session模式,元素等待时间5秒(默认10秒)
|
||||
page = MixPage(drission, mode='d', timeout=5) # driver模式,元素等待时间5秒(默认10秒)
|
||||
|
||||
# 访问URL
|
||||
page.get(url, **kwargs)
|
||||
@ -378,7 +388,9 @@ arguments = [
|
||||
; 不使用沙盒
|
||||
'--no-sandbox',
|
||||
; 谷歌文档提到需要加上这个属性来规避bug
|
||||
'--disable-gpu'
|
||||
'--disable-gpu',
|
||||
; 忽略警告
|
||||
'ignore-certificate-errors'
|
||||
]
|
||||
; 插件
|
||||
extensions = []
|
||||
@ -390,12 +402,12 @@ experimental_options = {
|
||||
; 无弹窗
|
||||
'profile.default_content_setting_values': {'notifications': 2},
|
||||
; 禁用PDF插件
|
||||
'plugins.plugins_list': [{"enabled": False, "name": "Chrome PDF Viewer"}],
|
||||
; 设置为开发者模式,防反爬虫(无用)
|
||||
'excludeSwitches': ["ignore-certificate-errors", "enable-automation"],
|
||||
'plugins.plugins_list': [{"enabled": False, "name": "Chrome PDF Viewer"}]
|
||||
},
|
||||
; 设置为开发者模式,防反爬虫
|
||||
'excludeSwitches': ["enable-automation"],
|
||||
'useAutomationExtension': False
|
||||
}
|
||||
}
|
||||
|
||||
[session_options]
|
||||
headers = {
|
||||
@ -453,6 +465,23 @@ save('D:\\settings.ini') # 保存到其它路径
|
||||
```
|
||||
|
||||
|
||||
|
||||
## easy_set方法
|
||||
|
||||
chrome的配置太难记,所以把常用的配置写成简单的方法,调用会修改ini文件相关内容。
|
||||
|
||||
```python
|
||||
set_headless(True) # 开启headless模式
|
||||
set_no_imgs(True) # 开启无图模式
|
||||
set_no_js(True) # 禁用JS
|
||||
set_mute(True) # 开启静音模式
|
||||
set_user_agent('Mozilla/5.0 (Macintosh; Int......') # 设置user agent
|
||||
set_proxy('127.0.0.1:8888') # 设置代理
|
||||
set_paths(paths) # 见 [初始化] 一节
|
||||
set_argument(arg, on_off) # 设置不带值的属性,如'zh_CN.UTF-8',on_off为False则删除该项
|
||||
set_value_argument(arg, value) # 设置带值的属性,如'--proxy-server=http://127.0.0.1:8888',设为''则删除该项
|
||||
```
|
||||
|
||||
# PO模式
|
||||
|
||||
***
|
||||
@ -637,7 +666,7 @@ class **Drission**(driver_options: Union[dict, Options] = None, session_options:
|
||||
|
||||
## MixPage类
|
||||
|
||||
class **MixPage**(drission: Drission, mode='d', timeout: float = 10)
|
||||
class **MixPage**(drission: Drission = None, mode:str = 'd', timeout: float = 10)
|
||||
|
||||
MixPage封装了页面操作的常用功能,可在driver和session模式间无缝切换。切换的时候会自动同步cookies。
|
||||
获取信息功能为两种模式共有,操作页面元素功能只有d模式有。调用某种模式独有的功能,会自动切换到该模式。
|
||||
@ -645,7 +674,7 @@ MixPage封装了页面操作的常用功能,可在driver和session模式间无
|
||||
|
||||
参数说明:
|
||||
|
||||
- drission - Drission对象
|
||||
- drission - Drission对象,如没传入则创建一个
|
||||
- mode - 模式,可选'd'或's',默认为'd'
|
||||
- timeout - 查找元素超时时间(每次查找元素时还可单独设置)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user