mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-02 03:57:34 +08:00
待测试
重新调整继承结构 Page类增加json属性 DriverElement类增加wait_ele() 修复sub()方法使用时的小错误
This commit is contained in:
parent
b24b592f30
commit
346b88ba41
291
DrissionPage/base.py
Normal file
291
DrissionPage/base.py
Normal file
@ -0,0 +1,291 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
@File : base.py
|
||||
"""
|
||||
from abc import abstractmethod
|
||||
from re import sub
|
||||
from typing import Union
|
||||
|
||||
from lxml.html import HtmlElement
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
|
||||
from .common import format_html
|
||||
|
||||
|
||||
class BaseParser(object):
|
||||
def __call__(self,
|
||||
loc_or_str,
|
||||
mode: str = 'single',
|
||||
timeout: float = None):
|
||||
return self.ele(loc_or_str, mode, timeout)
|
||||
|
||||
def eles(self, loc_or_str, timeout):
|
||||
return self.ele(loc_or_str, mode='all', timeout=timeout)
|
||||
|
||||
# ----------------以下属性或方法待后代实现----------------
|
||||
@property
|
||||
def html(self):
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def ele(self, loc_or_ele, mode, timeout):
|
||||
pass
|
||||
|
||||
|
||||
class BaseElement(BaseParser):
|
||||
"""SessionElement和DriverElement的基类"""
|
||||
|
||||
def __init__(self, ele: Union[WebElement, HtmlElement], page=None):
|
||||
self._inner_ele = ele
|
||||
self.page = page
|
||||
|
||||
@property
|
||||
def inner_ele(self) -> Union[WebElement, HtmlElement]:
|
||||
return self._inner_ele
|
||||
|
||||
@property
|
||||
def next(self):
|
||||
"""返回后一个兄弟元素"""
|
||||
return self.nexts()
|
||||
|
||||
def eles(self, loc_or_str, timeout):
|
||||
return super().eles(loc_or_str, timeout)
|
||||
|
||||
# ----------------以下属性或方法由后代实现----------------
|
||||
@property
|
||||
def tag(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def html(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def parent(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def prev(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def is_valid(self):
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
def ele(self, loc_or_str, mode, timeout):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def nexts(self, num: int = 1):
|
||||
pass
|
||||
|
||||
|
||||
class DrissionElement(BaseElement):
|
||||
@property
|
||||
def parent(self):
|
||||
"""返回父级元素"""
|
||||
return self.parents()
|
||||
|
||||
@property
|
||||
def prev(self):
|
||||
"""返回前一个兄弟元素"""
|
||||
return self.prevs()
|
||||
|
||||
@property
|
||||
def link(self) -> str:
|
||||
"""返回href或src绝对url"""
|
||||
return self.attr('href') or self.attr('src')
|
||||
|
||||
@property
|
||||
def css_path(self) -> str:
|
||||
"""返回css path路径"""
|
||||
return self._get_ele_path('css')
|
||||
|
||||
@property
|
||||
def xpath(self) -> str:
|
||||
"""返回xpath路径"""
|
||||
return self._get_ele_path('xpath')
|
||||
|
||||
@property
|
||||
def comments(self) -> list:
|
||||
"""返回元素注释文本组成的列表"""
|
||||
return self.eles('xpath:.//comment()')
|
||||
|
||||
def texts(self, text_node_only: bool = False) -> list:
|
||||
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
|
||||
:param text_node_only: 是否只返回文本节点
|
||||
:return: 文本列表
|
||||
"""
|
||||
if text_node_only:
|
||||
texts = self.eles('xpath:/text()')
|
||||
else:
|
||||
texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')]
|
||||
|
||||
return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\n\t ]', '', x) != '']
|
||||
|
||||
def nexts(self, num: int = 1, mode: str = 'ele'):
|
||||
"""返回后面第num个兄弟元素或节点 \n
|
||||
:param num: 后面第几个兄弟元素或节点
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self._get_brother(num, mode, 'next')
|
||||
|
||||
def prevs(self, num: int = 1, mode: str = 'ele'):
|
||||
"""返回前面第num个兄弟元素或节点 \n
|
||||
:param num: 前面第几个兄弟元素或节点
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self._get_brother(num, mode, 'prev')
|
||||
|
||||
def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'):
|
||||
"""返回前面第num个兄弟节点或元素 \n
|
||||
:param num: 前面第几个兄弟节点或元素
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:param direction: 'next' 或 'prev',查找的方向
|
||||
:return: DriverElement对象或字符串
|
||||
"""
|
||||
# 查找节点的类型
|
||||
if mode == 'ele':
|
||||
node_txt = '*'
|
||||
elif mode == 'node':
|
||||
node_txt = 'node()'
|
||||
elif mode == 'text':
|
||||
node_txt = 'text()'
|
||||
else:
|
||||
raise ValueError(f"Argument mode can only be 'node' ,'ele' or 'text', not '{mode}'.")
|
||||
|
||||
# 查找节点的方向
|
||||
if direction == 'next':
|
||||
direction_txt = 'following'
|
||||
elif direction == 'prev':
|
||||
direction_txt = 'preceding'
|
||||
else:
|
||||
raise ValueError(f"Argument direction can only be 'next' or 'prev', not '{direction}'.")
|
||||
|
||||
timeout = 0 if direction == 'prev' else .5
|
||||
|
||||
# 获取节点
|
||||
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout)
|
||||
|
||||
# 跳过元素间的换行符
|
||||
while isinstance(ele_or_node, str) and sub('[\n\t ]', '', ele_or_node) == '':
|
||||
num += 1
|
||||
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout)
|
||||
|
||||
return ele_or_node
|
||||
|
||||
# ----------------以下属性或方法由后代实现----------------
|
||||
@property
|
||||
def inner_html(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def attrs(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def raw_text(self):
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def parents(self, num: int = 1):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def attr(self, attr: str):
|
||||
return ''
|
||||
|
||||
@abstractmethod
|
||||
def ele(self, loc: Union[tuple, str], mode: str = None, timeout=None):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def eles(self, loc: Union[tuple, str], timeout=None):
|
||||
pass
|
||||
|
||||
def _get_ele_path(self, mode):
|
||||
return ''
|
||||
|
||||
|
||||
class BasePage(BaseParser):
|
||||
def __init__(self, timeout: float = 10):
|
||||
"""初始化函数"""
|
||||
self._url = None
|
||||
self.timeout = timeout
|
||||
self.retry_times = 3
|
||||
self.retry_interval = 2
|
||||
self._url_available = None
|
||||
|
||||
@property
|
||||
def timeout(self) -> float:
|
||||
"""返回查找元素时等待的秒数"""
|
||||
return self._timeout
|
||||
|
||||
@timeout.setter
|
||||
def timeout(self, second: float) -> None:
|
||||
"""设置查找元素时等待的秒数"""
|
||||
self._timeout = second
|
||||
|
||||
@property
|
||||
def cookies(self) -> dict:
|
||||
"""返回cookies"""
|
||||
return self.get_cookies(True)
|
||||
|
||||
@property
|
||||
def url_available(self) -> bool:
|
||||
"""返回当前访问的url有效性"""
|
||||
return self._url_available
|
||||
|
||||
def eles(self, loc_or_str, timeout):
|
||||
return super().eles(loc_or_str, timeout)
|
||||
|
||||
# ----------------以下属性或方法由后代实现----------------
|
||||
@property
|
||||
def url(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def html(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def json(self):
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def get_cookies(self, as_dict: bool = False):
|
||||
return {}
|
||||
|
||||
@abstractmethod
|
||||
def get(self,
|
||||
url: str,
|
||||
go_anyway: bool = False,
|
||||
show_errmsg: bool = False,
|
||||
retry: int = None,
|
||||
interval: float = None):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def ele(self, loc_or_ele, mode, timeout):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _try_to_connect(self,
|
||||
to_url: str,
|
||||
times: int = 0,
|
||||
interval: float = 1,
|
||||
show_errmsg: bool = False, ):
|
||||
pass
|
@ -4,7 +4,6 @@
|
||||
@Contact : g1879@qq.com
|
||||
@File : common.py
|
||||
"""
|
||||
from abc import abstractmethod
|
||||
from html import unescape
|
||||
from pathlib import Path
|
||||
from re import split as re_SPLIT
|
||||
@ -12,53 +11,6 @@ from shutil import rmtree
|
||||
from typing import Union
|
||||
from zipfile import ZipFile
|
||||
|
||||
from lxml.html import HtmlElement
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
|
||||
|
||||
class DrissionElement(object):
|
||||
"""SessionElement和DriverElement的基类"""
|
||||
|
||||
def __init__(self, ele: Union[WebElement, HtmlElement], page=None):
|
||||
self._inner_ele = ele
|
||||
self.page = page
|
||||
|
||||
@property
|
||||
def inner_ele(self) -> Union[WebElement, HtmlElement]:
|
||||
return self._inner_ele
|
||||
|
||||
@property
|
||||
def html(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def tag(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def parent(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def next(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def prev(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def is_valid(self):
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
def ele(self, loc: Union[tuple, str], mode: str = None):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def eles(self, loc: Union[tuple, str]):
|
||||
pass
|
||||
|
||||
|
||||
def str_to_loc(loc: str) -> tuple:
|
||||
"""处理元素查找语句 \n
|
||||
|
@ -6,7 +6,7 @@
|
||||
"""
|
||||
from pathlib import Path
|
||||
from re import sub
|
||||
from time import sleep
|
||||
from time import sleep, time
|
||||
from typing import Union, List, Any, Tuple
|
||||
|
||||
from selenium.common.exceptions import TimeoutException, JavascriptException, InvalidElementStateException
|
||||
@ -15,7 +15,8 @@ from selenium.webdriver.remote.webelement import WebElement
|
||||
from selenium.webdriver.support import expected_conditions as ec
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
|
||||
from .common import DrissionElement, str_to_loc, get_available_file_name, translate_loc, format_html
|
||||
from .base import DrissionElement
|
||||
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
|
||||
|
||||
|
||||
class DriverElement(DrissionElement):
|
||||
@ -33,14 +34,14 @@ class DriverElement(DrissionElement):
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
mode: str = 'single',
|
||||
timeout: float = None):
|
||||
"""实现查找元素的简化写法 \n
|
||||
"""在内部查找元素 \n
|
||||
例:ele2 = ele1('@id=ele_id') \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 超时时间
|
||||
:return: DriverElement对象
|
||||
"""
|
||||
return self.ele(loc_or_str, mode, timeout)
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
# -----------------共有属性和方法-------------------
|
||||
@property
|
||||
@ -58,21 +59,6 @@ class DriverElement(DrissionElement):
|
||||
"""返回元素innerHTML文本"""
|
||||
return self.attr('innerHTML')
|
||||
|
||||
@property
|
||||
def parent(self):
|
||||
"""返回父级元素"""
|
||||
return self.parents()
|
||||
|
||||
@property
|
||||
def next(self):
|
||||
"""返回后一个兄弟元素"""
|
||||
return self.nexts()
|
||||
|
||||
@property
|
||||
def prev(self):
|
||||
"""返回前一个兄弟元素"""
|
||||
return self.prevs()
|
||||
|
||||
@property
|
||||
def attrs(self) -> dict:
|
||||
"""返回元素所有属性及值"""
|
||||
@ -107,38 +93,6 @@ class DriverElement(DrissionElement):
|
||||
"""返回未格式化处理的元素内文本"""
|
||||
return self.inner_ele.get_attribute('innerText')
|
||||
|
||||
@property
|
||||
def link(self) -> str:
|
||||
"""返回href或src绝对url"""
|
||||
return self.attr('href') or self.attr('src')
|
||||
|
||||
@property
|
||||
def css_path(self) -> str:
|
||||
"""返回当前元素的css路径"""
|
||||
return self._get_ele_path('css')
|
||||
|
||||
@property
|
||||
def xpath(self) -> str:
|
||||
"""返回xpath路径"""
|
||||
return self._get_ele_path('xpath')
|
||||
|
||||
@property
|
||||
def comments(self) -> list:
|
||||
"""返回元素注释文本组成的列表"""
|
||||
return self.eles('xpath:.//comment()')
|
||||
|
||||
def texts(self, text_node_only: bool = False) -> list:
|
||||
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
|
||||
:param text_node_only: 是否只返回文本节点
|
||||
:return: 文本列表
|
||||
"""
|
||||
if text_node_only:
|
||||
texts = self.eles('xpath:/text()')
|
||||
else:
|
||||
texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')]
|
||||
|
||||
return [x.strip(' ') for x in texts if x and x.replace('\n', '').replace('\t', '').replace(' ', '') != '']
|
||||
|
||||
def parents(self, num: int = 1):
|
||||
"""返回上面第num级父元素 \n
|
||||
:param num: 第几级父元素
|
||||
@ -147,24 +101,8 @@ class DriverElement(DrissionElement):
|
||||
loc = 'xpath', f'.{"/.." * num}'
|
||||
return self.ele(loc, timeout=0)
|
||||
|
||||
def nexts(self, num: int = 1, mode: str = 'ele'):
|
||||
"""返回后面第num个兄弟元素或节点文本 \n
|
||||
:param num: 后面第几个兄弟元素或节点
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:return: DriverElement对象或字符串
|
||||
"""
|
||||
return self._get_brother(num, mode, 'next')
|
||||
|
||||
def prevs(self, num: int = 1, mode: str = 'ele'):
|
||||
"""返回前面第num个兄弟元素或节点文本 \n
|
||||
:param num: 前面第几个兄弟元素或节点
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:return: DriverElement对象或字符串
|
||||
"""
|
||||
return self._get_brother(num, mode, 'prev')
|
||||
|
||||
def attr(self, attr: str) -> str:
|
||||
"""获取属性值 \n
|
||||
"""获取attribute属性值 \n
|
||||
:param attr: 属性名
|
||||
:return: 属性值文本
|
||||
"""
|
||||
@ -179,36 +117,6 @@ class DriverElement(DrissionElement):
|
||||
mode: str = None,
|
||||
timeout: float = None):
|
||||
"""返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
ele.ele('.ele_class') - 返回第一个 class 为 ele_class 的子元素 \n
|
||||
ele.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的子元素 \n
|
||||
ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n
|
||||
ele.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的子元素 \n
|
||||
ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n
|
||||
ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n
|
||||
ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n
|
||||
ele.ele('tag:p') - 返回第一个<p>子元素 \n
|
||||
ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n
|
||||
ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n
|
||||
ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n
|
||||
ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n
|
||||
ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n
|
||||
ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n
|
||||
ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n
|
||||
ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n
|
||||
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
||||
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||
ele.ele('x://div[@class="ele_class"]') - 等同于 ele.ele('xpath://div[@class="ele_class"]') \n
|
||||
ele.ele('c:div.ele_class') - 等同于 ele.ele('css:div.ele_class') \n
|
||||
ele.ele('t:div') - 等同于 ele.ele('tag:div') \n
|
||||
ele.ele('t:div@tx()=some_text') - 等同于 ele.ele('tag:div@text()=some_text') \n
|
||||
ele.ele('tx:some_text') - 等同于 ele.ele('text:some_text') \n
|
||||
ele.ele('tx=some_text') - 等同于 ele.ele('text=some_text')
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 查找元素超时时间
|
||||
@ -220,9 +128,7 @@ class DriverElement(DrissionElement):
|
||||
else:
|
||||
if len(loc_or_str) != 2:
|
||||
raise ValueError("Len of loc_or_str must be 2 when it's a tuple.")
|
||||
|
||||
loc_or_str = translate_loc(loc_or_str)
|
||||
|
||||
else:
|
||||
raise ValueError('Argument loc_or_str can only be tuple or str.')
|
||||
|
||||
@ -242,36 +148,6 @@ class DriverElement(DrissionElement):
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None):
|
||||
"""返回当前元素下级所有符合条件的子元素、属性或节点文本 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n
|
||||
ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n
|
||||
ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n
|
||||
ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n
|
||||
ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n
|
||||
ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n
|
||||
ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n
|
||||
ele.eles('tag:p') - 返回所有<p>子元素 \n
|
||||
ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n
|
||||
ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n
|
||||
ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n
|
||||
ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n
|
||||
ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n
|
||||
ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n
|
||||
ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n
|
||||
ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n
|
||||
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
||||
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||
ele.eles('x://div[@class="ele_class"]') - 等同于 ele.eles('xpath://div[@class="ele_class"]') \n
|
||||
ele.eles('c:div.ele_class') - 等同于 ele.eles('css:div.ele_class') \n
|
||||
ele.eles('t:div') - 等同于 ele.eles('tag:div') \n
|
||||
ele.eles('t:div@tx()=some_text') - 等同于 ele.eles('tag:div@text()=some_text') \n
|
||||
ele.eles('tx:some_text') - 等同于 ele.eles('text:some_text') \n
|
||||
ele.eles('tx=some_text') - 等同于 ele.eles('text=some_text')
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 查找元素超时时间
|
||||
:return: DriverElement对象组成的列表
|
||||
@ -319,43 +195,6 @@ class DriverElement(DrissionElement):
|
||||
'''
|
||||
return self.run_script(js)
|
||||
|
||||
def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'):
|
||||
"""返回前面第num个兄弟节点或元素 \n
|
||||
:param num: 前面第几个兄弟节点或元素
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:param direction: 'next' 或 'prev',查找的方向
|
||||
:return: DriverElement对象或字符串
|
||||
"""
|
||||
# 查找节点的类型
|
||||
if mode == 'ele':
|
||||
node_txt = '*'
|
||||
elif mode == 'node':
|
||||
node_txt = 'node()'
|
||||
elif mode == 'text':
|
||||
node_txt = 'text()'
|
||||
else:
|
||||
raise ValueError(f"Argument mode can only be 'node' ,'ele' or 'text', not '{mode}'.")
|
||||
|
||||
# 查找节点的方向
|
||||
if direction == 'next':
|
||||
direction_txt = 'following'
|
||||
elif direction == 'prev':
|
||||
direction_txt = 'preceding'
|
||||
else:
|
||||
raise ValueError(f"Argument direction can only be 'next' or 'prev', not '{direction}'.")
|
||||
|
||||
timeout = 0 if direction == 'prev' else .5
|
||||
|
||||
# 获取节点
|
||||
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout)
|
||||
|
||||
# 跳过元素间的换行符
|
||||
while isinstance(ele_or_node, str) and ele_or_node.replace('\n', '').replace('\t', '').replace(' ', '') == '':
|
||||
num += 1
|
||||
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout)
|
||||
|
||||
return ele_or_node
|
||||
|
||||
# -----------------driver独有属性和方法-------------------
|
||||
@property
|
||||
def size(self) -> dict:
|
||||
@ -401,6 +240,18 @@ class DriverElement(DrissionElement):
|
||||
|
||||
return self._select
|
||||
|
||||
def wait_ele(self,
|
||||
loc_or_ele: Union[str, tuple, DrissionElement, WebElement],
|
||||
mode: str,
|
||||
timeout: float = None) -> bool:
|
||||
"""等待子元素从dom删除、显示、隐藏 \n
|
||||
:param loc_or_ele: 可以是元素、查询字符串、loc元组
|
||||
:param mode: 等待方式,可选:'del', 'display', 'hidden'
|
||||
:param timeout: 等待超时时间
|
||||
:return: 等待是否成功
|
||||
"""
|
||||
return _wait_ele(self, loc_or_ele, mode, timeout)
|
||||
|
||||
def get_style_property(self, style: str, pseudo_ele: str = '') -> str:
|
||||
"""返回元素样式属性值
|
||||
:param style: 样式属性名称
|
||||
@ -476,16 +327,16 @@ class DriverElement(DrissionElement):
|
||||
from selenium.webdriver import ActionChains
|
||||
ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).context_click().perform()
|
||||
|
||||
def input(self, value: Union[str, tuple], clear: bool = True) -> None:
|
||||
def input(self, vals: Union[str, tuple], clear: bool = True) -> None:
|
||||
"""输入文本或组合键,可用于所有场合 \n
|
||||
:param value: 文本值或按键组合
|
||||
:param vals: 文本值或按键组合
|
||||
:param clear: 输入前是否清空文本框
|
||||
:return: 是否输入成功
|
||||
"""
|
||||
if clear:
|
||||
self.clear()
|
||||
|
||||
self.inner_ele.send_keys(*value)
|
||||
self.inner_ele.send_keys(*vals)
|
||||
|
||||
def input_txt(self, txt: Union[str, tuple], clear: bool = True) -> None:
|
||||
"""专门用于输入文本框,解决文本框有时输入失效的问题 \n
|
||||
@ -499,10 +350,7 @@ class DriverElement(DrissionElement):
|
||||
from time import perf_counter
|
||||
|
||||
t1 = perf_counter()
|
||||
while True:
|
||||
if self.attr('value') == full_txt or perf_counter() - t1 > self.page.timeout:
|
||||
break
|
||||
|
||||
while self.attr('value') != full_txt and perf_counter() - t1 > self.page.timeout:
|
||||
if clear:
|
||||
self.clear()
|
||||
|
||||
@ -978,3 +826,80 @@ class Select(object):
|
||||
|
||||
for i in self.options:
|
||||
i.click()
|
||||
|
||||
|
||||
def _wait_ele(page_or_ele,
|
||||
loc_or_ele: Union[str, tuple, DriverElement, WebElement],
|
||||
mode: str,
|
||||
timeout: float = None) -> bool:
|
||||
"""等待元素从dom删除、显示、隐藏 \n
|
||||
:param page_or_ele: 要等待子元素的页面或元素
|
||||
:param loc_or_ele: 可以是元素、查询字符串、loc元组
|
||||
:param mode: 等待方式,可选:'del', 'display', 'hidden'
|
||||
:param timeout: 等待超时时间
|
||||
:return: 等待是否成功
|
||||
"""
|
||||
if mode.lower() not in ('del', 'display', 'hidden'):
|
||||
raise ValueError('Argument mode can only be "del", "display", "hidden"')
|
||||
|
||||
if isinstance(page_or_ele, DrissionElement):
|
||||
page = page_or_ele.page
|
||||
ele_or_driver = page_or_ele.inner_ele
|
||||
else:
|
||||
page = page_or_ele
|
||||
ele_or_driver = page_or_ele.driver
|
||||
|
||||
timeout = timeout or page.timeout
|
||||
is_ele = False
|
||||
|
||||
if isinstance(loc_or_ele, DriverElement):
|
||||
loc_or_ele = loc_or_ele.inner_ele
|
||||
is_ele = True
|
||||
|
||||
elif isinstance(loc_or_ele, WebElement):
|
||||
is_ele = True
|
||||
|
||||
elif isinstance(loc_or_ele, str):
|
||||
loc_or_ele = str_to_loc(loc_or_ele)
|
||||
|
||||
elif isinstance(loc_or_ele, tuple):
|
||||
pass
|
||||
|
||||
else:
|
||||
raise TypeError('The type of loc_or_ele can only be str, tuple, DriverElement, WebElement')
|
||||
|
||||
# 当传入参数是元素对象时
|
||||
if is_ele:
|
||||
end_time = time() + timeout
|
||||
|
||||
while time() < end_time:
|
||||
if mode == 'del':
|
||||
try:
|
||||
loc_or_ele.is_enabled()
|
||||
except:
|
||||
return True
|
||||
|
||||
elif mode == 'display' and loc_or_ele.is_displayed():
|
||||
return True
|
||||
|
||||
elif mode == 'hidden' and not loc_or_ele.is_displayed():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
# 当传入参数是控制字符串或元组时
|
||||
else:
|
||||
try:
|
||||
if mode == 'del':
|
||||
WebDriverWait(ele_or_driver, timeout).until_not(ec.presence_of_element_located(loc_or_ele))
|
||||
|
||||
elif mode == 'display':
|
||||
WebDriverWait(ele_or_driver, timeout).until(ec.visibility_of_element_located(loc_or_ele))
|
||||
|
||||
elif mode == 'hidden':
|
||||
WebDriverWait(ele_or_driver, timeout).until_not(ec.visibility_of_element_located(loc_or_ele))
|
||||
|
||||
return True
|
||||
|
||||
except:
|
||||
return False
|
||||
|
@ -6,7 +6,7 @@
|
||||
"""
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
from time import time, sleep
|
||||
from time import sleep
|
||||
from typing import Union, List, Any, Tuple
|
||||
from urllib.parse import quote
|
||||
|
||||
@ -15,29 +15,32 @@ from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
|
||||
from .base import BasePage
|
||||
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
|
||||
from .driver_element import DriverElement, execute_driver_find
|
||||
from .driver_element import DriverElement, execute_driver_find, _wait_ele
|
||||
|
||||
|
||||
class DriverPage(object):
|
||||
class DriverPage(BasePage):
|
||||
"""DriverPage封装了页面操作的常用功能,使用selenium来获取、解析、操作网页"""
|
||||
|
||||
def __init__(self, driver: WebDriver, timeout: float = 10):
|
||||
"""初始化函数,接收一个WebDriver对象,用来操作网页"""
|
||||
super().__init__(timeout)
|
||||
self._driver = driver
|
||||
self._timeout = timeout
|
||||
self._url = None
|
||||
self._url_available = None
|
||||
self._wait = None
|
||||
|
||||
self.retry_times = 3
|
||||
self.retry_interval = 2
|
||||
self._wait_object = None
|
||||
|
||||
def __call__(self,
|
||||
loc_or_str: Union[Tuple[str, str], str, DriverElement, WebElement],
|
||||
mode: str = 'single',
|
||||
timeout: float = None):
|
||||
return self.ele(loc_or_str, mode, timeout)
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement]]:
|
||||
"""在内部查找元素 \n
|
||||
例:ele = page('@id=ele_id') \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 超时时间
|
||||
:return: DriverElement对象
|
||||
"""
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
# -----------------共有属性和方法-------------------
|
||||
@property
|
||||
@ -59,14 +62,10 @@ class DriverPage(object):
|
||||
return format_html(self.driver.find_element_by_xpath("//*").get_attribute("outerHTML"))
|
||||
|
||||
@property
|
||||
def cookies(self) -> list:
|
||||
"""返回当前网站cookies"""
|
||||
return self.get_cookies(True)
|
||||
|
||||
@property
|
||||
def url_available(self) -> bool:
|
||||
"""url有效性"""
|
||||
return self._url_available
|
||||
def json(self) -> dict:
|
||||
"""当返回内容是json格式时,返回对应的字典"""
|
||||
from json import loads
|
||||
return loads(self('t:pre').text)
|
||||
|
||||
def get(self,
|
||||
url: str,
|
||||
@ -104,31 +103,6 @@ class DriverPage(object):
|
||||
mode: str = None,
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]:
|
||||
"""返回页面中符合条件的元素,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 接收到元素对象时: \n
|
||||
返回DriverElement对象 \n
|
||||
- 用loc元组查找: \n
|
||||
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
page.ele('.ele_class') - 返回第一个 class 为 ele_class 的元素 \n
|
||||
page.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的元素 \n
|
||||
page.ele('#ele_id') - 返回第一个 id 为 ele_id 的元素 \n
|
||||
page.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的元素 \n
|
||||
page.ele('@class:ele_class') - 返回第一个class含有ele_class的元素 \n
|
||||
page.ele('@name=ele_name') - 返回第一个name等于ele_name的元素 \n
|
||||
page.ele('@placeholder') - 返回第一个带placeholder属性的元素 \n
|
||||
page.ele('tag:p') - 返回第一个<p>元素 \n
|
||||
page.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div元素 \n
|
||||
page.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div元素 \n
|
||||
page.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div元素 \n
|
||||
page.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div元素 \n
|
||||
page.ele('text:some_text') - 返回第一个文本含有some_text的元素 \n
|
||||
page.ele('some_text') - 返回第一个文本含有some_text的元素(等价于上一行) \n
|
||||
page.ele('text=some_text') - 返回第一个文本等于some_text的元素 \n
|
||||
page.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的元素 \n
|
||||
page.ele('css:div.ele_class') - 返回第一个符合css selector的元素 \n
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||
:param timeout: 查找元素超时时间
|
||||
@ -143,9 +117,6 @@ class DriverPage(object):
|
||||
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
|
||||
loc_or_ele = translate_loc(loc_or_ele)
|
||||
|
||||
# if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
||||
# loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
||||
|
||||
# 接收到DriverElement对象直接返回
|
||||
elif isinstance(loc_or_ele, DriverElement):
|
||||
return loc_or_ele
|
||||
@ -164,29 +135,6 @@ class DriverPage(object):
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None) -> List[DriverElement]:
|
||||
"""返回页面中所有符合条件的元素 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
page.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
page.eles('.ele_class') - 返回所有 class 为 ele_class 的元素 \n
|
||||
page.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的元素 \n
|
||||
page.eles('#ele_id') - 返回所有 id 为 ele_id 的元素 \n
|
||||
page.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的元素 \n
|
||||
page.eles('@class:ele_class') - 返回所有class含有ele_class的元素 \n
|
||||
page.eles('@name=ele_name') - 返回所有name等于ele_name的元素 \n
|
||||
page.eles('@placeholder') - 返回所有带placeholder属性的元素 \n
|
||||
page.eles('tag:p') - 返回所有<p>元素 \n
|
||||
page.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div元素 \n
|
||||
page.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div元素 \n
|
||||
page.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div元素 \n
|
||||
page.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div元素 \n
|
||||
page.eles('text:some_text') - 返回所有文本含有some_text的元素 \n
|
||||
page.eles('some_text') - 返回所有文本含有some_text的元素(等价于上一行) \n
|
||||
page.eles('text=some_text') - 返回所有文本等于some_text的元素 \n
|
||||
page.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的元素 \n
|
||||
page.eles('css:div.ele_class') - 返回所有符合css selector的元素 \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 查找元素超时时间
|
||||
:return: DriverElement对象组成的列表
|
||||
@ -194,7 +142,7 @@ class DriverPage(object):
|
||||
if not isinstance(loc_or_str, (tuple, str)):
|
||||
raise TypeError('Type of loc_or_str can only be tuple or str.')
|
||||
|
||||
return self.ele(loc_or_str, mode='all', timeout=timeout)
|
||||
return super().eles(loc_or_str, timeout)
|
||||
|
||||
def get_cookies(self, as_dict: bool = False) -> Union[list, dict]:
|
||||
"""返回当前网站cookies"""
|
||||
@ -203,6 +151,17 @@ class DriverPage(object):
|
||||
else:
|
||||
return self.driver.get_cookies()
|
||||
|
||||
@property
|
||||
def timeout(self) -> float:
|
||||
"""返回查找元素时等待的秒数"""
|
||||
return self._timeout
|
||||
|
||||
@timeout.setter
|
||||
def timeout(self, second: float) -> None:
|
||||
"""设置查找元素时等待的秒数"""
|
||||
self._timeout = second
|
||||
self._wait_object = None
|
||||
|
||||
def _try_to_connect(self,
|
||||
to_url: str,
|
||||
times: int = 0,
|
||||
@ -245,24 +204,13 @@ class DriverPage(object):
|
||||
def driver(self) -> WebDriver:
|
||||
return self._driver
|
||||
|
||||
@property
|
||||
def timeout(self) -> float:
|
||||
"""返回查找元素时等待的秒数"""
|
||||
return self._timeout
|
||||
|
||||
@timeout.setter
|
||||
def timeout(self, second: float) -> None:
|
||||
"""设置查找元素时等待的秒数"""
|
||||
self._timeout = second
|
||||
self._wait = None
|
||||
|
||||
@property
|
||||
def wait_object(self) -> WebDriverWait:
|
||||
"""返回WebDriverWait对象,重用避免每次新建对象"""
|
||||
if self._wait is None:
|
||||
self._wait = WebDriverWait(self.driver, timeout=self.timeout)
|
||||
if self._wait_object is None:
|
||||
self._wait_object = WebDriverWait(self.driver, timeout=self.timeout)
|
||||
|
||||
return self._wait
|
||||
return self._wait_object
|
||||
|
||||
@property
|
||||
def tabs_count(self) -> int:
|
||||
@ -297,66 +245,7 @@ class DriverPage(object):
|
||||
:param timeout: 等待超时时间
|
||||
:return: 等待是否成功
|
||||
"""
|
||||
if mode.lower() not in ('del', 'display', 'hidden'):
|
||||
raise ValueError('Argument mode can only be "del", "display", "hidden"')
|
||||
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as ec
|
||||
|
||||
timeout = timeout or self.timeout
|
||||
is_ele = False
|
||||
|
||||
if isinstance(loc_or_ele, DriverElement):
|
||||
loc_or_ele = loc_or_ele.inner_ele
|
||||
is_ele = True
|
||||
|
||||
elif isinstance(loc_or_ele, WebElement):
|
||||
is_ele = True
|
||||
|
||||
elif isinstance(loc_or_ele, str):
|
||||
loc_or_ele = str_to_loc(loc_or_ele)
|
||||
|
||||
elif isinstance(loc_or_ele, tuple):
|
||||
pass
|
||||
|
||||
else:
|
||||
raise TypeError('The type of loc_or_ele can only be str, tuple, DriverElement, WebElement')
|
||||
|
||||
# 当传入参数是元素对象时
|
||||
if is_ele:
|
||||
end_time = time() + timeout
|
||||
|
||||
while time() < end_time:
|
||||
if mode == 'del':
|
||||
try:
|
||||
loc_or_ele.is_enabled()
|
||||
except:
|
||||
return True
|
||||
|
||||
elif mode == 'display' and loc_or_ele.is_displayed():
|
||||
return True
|
||||
|
||||
elif mode == 'hidden' and not loc_or_ele.is_displayed():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
# 当传入参数是控制字符串或元组时
|
||||
else:
|
||||
try:
|
||||
if mode == 'del':
|
||||
WebDriverWait(self.driver, timeout).until_not(ec.presence_of_element_located(loc_or_ele))
|
||||
|
||||
elif mode == 'display':
|
||||
WebDriverWait(self.driver, timeout).until(ec.visibility_of_element_located(loc_or_ele))
|
||||
|
||||
elif mode == 'hidden':
|
||||
WebDriverWait(self.driver, timeout).until_not(ec.visibility_of_element_located(loc_or_ele))
|
||||
|
||||
return True
|
||||
|
||||
except:
|
||||
return False
|
||||
return _wait_ele(self, loc_or_ele, mode, timeout)
|
||||
|
||||
def check_page(self) -> Union[bool, None]:
|
||||
"""检查页面是否符合预期 \n
|
||||
@ -536,8 +425,8 @@ class DriverPage(object):
|
||||
self.driver.execute_script(f"window.scrollBy({pixel},0);")
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
"Argument mode can only be 'top', 'bottom', 'half', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'.")
|
||||
raise ValueError("Argument mode can only be "
|
||||
"'top', 'bottom', 'half', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'.")
|
||||
|
||||
def refresh(self) -> None:
|
||||
"""刷新当前页面"""
|
||||
|
@ -5,17 +5,17 @@
|
||||
@File : driver_page.py
|
||||
"""
|
||||
from os import popen
|
||||
|
||||
from pathlib import Path
|
||||
from pprint import pprint
|
||||
from re import search as RE_SEARCH
|
||||
from selenium import webdriver
|
||||
from re import search as RE_SEARCH, sub
|
||||
from typing import Union
|
||||
|
||||
from DrissionPage.config import OptionsManager, DriverOptions
|
||||
from DrissionPage.drission import Drission
|
||||
from DrissionPage.session_page import SessionPage
|
||||
from selenium import webdriver
|
||||
|
||||
from .common import unzip
|
||||
from .config import OptionsManager, DriverOptions
|
||||
from .drission import Drission
|
||||
from .session_page import SessionPage
|
||||
|
||||
|
||||
def show_settings(ini_path: str = None) -> None:
|
||||
@ -349,7 +349,7 @@ def _download_driver(version: str, save_path: str = None, show_msg: bool = True)
|
||||
remote_main = i.text.split('.')[0]
|
||||
|
||||
try:
|
||||
remote_num = int(i.text.replace('.', '').replace('/', ''))
|
||||
remote_num = int(sub(r'[./]', '', i.text))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
|
@ -11,6 +11,7 @@ from requests.cookies import RequestsCookieJar
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
|
||||
from .base import BasePage
|
||||
from .config import DriverOptions, SessionOptions
|
||||
from .drission import Drission
|
||||
from .driver_element import DriverElement
|
||||
@ -19,14 +20,7 @@ from .session_element import SessionElement
|
||||
from .session_page import SessionPage
|
||||
|
||||
|
||||
class Null(object):
|
||||
"""避免IDE发出未调用超类初始化函数的警告,无实际作用"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
class MixPage(Null, SessionPage, DriverPage):
|
||||
class MixPage(SessionPage, DriverPage, BasePage):
|
||||
"""MixPage整合了DriverPage和SessionPage,封装了对页面的操作,
|
||||
可在selenium(d模式)和requests(s模式)间无缝切换。
|
||||
切换的时候会自动同步cookies。
|
||||
@ -46,35 +40,37 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
:param driver_options: 浏览器设置,没有传入drission参数时会用这个设置新建Drission对象
|
||||
:param session_options: requests设置,没有传入drission参数时会用这个设置新建Drission对象
|
||||
"""
|
||||
super().__init__()
|
||||
if drission in ('s', 'd', 'S', 'D'):
|
||||
mode = drission.lower()
|
||||
super().__init__(timeout) # BasePage的__init__()
|
||||
if isinstance(drission, str):
|
||||
self._mode = drission.lower()
|
||||
drission = None
|
||||
|
||||
self._drission = drission or Drission(driver_options, session_options)
|
||||
self._url = None
|
||||
self._response = None
|
||||
self.timeout = timeout
|
||||
self._url_available = None
|
||||
else:
|
||||
self._mode = mode
|
||||
|
||||
self.retry_times = 3
|
||||
self.retry_interval = 2
|
||||
|
||||
if mode == 's':
|
||||
if self._mode == 's':
|
||||
self._driver = None
|
||||
self._session = True
|
||||
elif mode == 'd':
|
||||
elif self._mode == 'd':
|
||||
self._driver = True
|
||||
self._session = None
|
||||
else:
|
||||
raise ValueError("Argument mode can only be 'd' or 's'.")
|
||||
|
||||
self._drission = drission or Drission(driver_options, session_options)
|
||||
|
||||
def __call__(self,
|
||||
loc_or_str: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement],
|
||||
mode: str = 'single',
|
||||
timeout: float = None):
|
||||
return self.ele(loc_or_str, mode, timeout)
|
||||
timeout: float = None) \
|
||||
-> Union[DriverElement, SessionElement, str, List[DriverElement], List[SessionElement]]:
|
||||
"""在内部查找元素 \n
|
||||
例:ele = page('@id=ele_id') \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 超时时间
|
||||
:return: DriverElement对象
|
||||
"""
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
# -----------------共有属性和方法-------------------
|
||||
@property
|
||||
@ -102,12 +98,12 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
return super(SessionPage, self).html
|
||||
|
||||
@property
|
||||
def cookies(self) -> Union[dict, list]:
|
||||
"""返回cookies"""
|
||||
def json(self) -> dict:
|
||||
"""当返回内容是json格式时,返回对应的字典"""
|
||||
if self._mode == 's':
|
||||
return super().cookies
|
||||
return super().json
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self).cookies
|
||||
return super(SessionPage, self).json
|
||||
|
||||
def get(self,
|
||||
url: str,
|
||||
@ -137,38 +133,6 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
timeout: float = None) \
|
||||
-> Union[DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]:
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 接收到元素对象时: \n
|
||||
返回元素对象对象 \n
|
||||
- 用loc元组查找: \n
|
||||
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
page.ele('.ele_class') - 返回第一个 class 为 ele_class 的元素 \n
|
||||
page.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的元素 \n
|
||||
page.ele('#ele_id') - 返回第一个 id 为 ele_id 的元素 \n
|
||||
page.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的元素 \n
|
||||
page.ele('@class:ele_class') - 返回第一个class含有ele_class的元素 \n
|
||||
page.ele('@name=ele_name') - 返回第一个name等于ele_name的元素 \n
|
||||
page.ele('@placeholder') - 返回第一个带placeholder属性的元素 \n
|
||||
page.ele('tag:p') - 返回第一个<p>元素 \n
|
||||
page.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div元素 \n
|
||||
page.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div元素 \n
|
||||
page.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div元素 \n
|
||||
page.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div元素 \n
|
||||
page.ele('text:some_text') - 返回第一个文本含有some_text的元素 \n
|
||||
page.ele('some_text') - 返回第一个文本含有some_text的元素(等价于上一行) \n
|
||||
page.ele('text=some_text') - 返回第一个文本等于some_text的元素 \n
|
||||
page.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的元素 \n
|
||||
page.ele('css:div.ele_class') - 返回第一个符合css selector的元素 \n
|
||||
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||
page.ele('x://div[@class="ele_class"]') - 等同于 page.ele('xpath://div[@class="ele_class"]') \n
|
||||
page.ele('c:div.ele_class') - 等同于 page.ele('css:div.ele_class') \n
|
||||
page.ele('t:div') - 等同于 page.ele('tag:div') \n
|
||||
page.ele('t:div@tx()=some_text') - 等同于 page.ele('tag:div@text()=some_text') \n
|
||||
page.ele('tx:some_text') - 等同于 page.ele('text:some_text') \n
|
||||
page.ele('tx=some_text') - 等同于 page.ele('text=some_text')
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||
:param timeout: 查找元素超时时间,d模式专用
|
||||
@ -183,43 +147,10 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]:
|
||||
"""返回页面中所有符合条件的元素、属性或节点文本 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
page.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
page.eles('.ele_class') - 返回所有 class 为 ele_class 的元素 \n
|
||||
page.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的元素 \n
|
||||
page.eles('#ele_id') - 返回所有 id 为 ele_id 的元素 \n
|
||||
page.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的元素 \n
|
||||
page.eles('@class:ele_class') - 返回所有class含有ele_class的元素 \n
|
||||
page.eles('@name=ele_name') - 返回所有name等于ele_name的元素 \n
|
||||
page.eles('@placeholder') - 返回所有带placeholder属性的元素 \n
|
||||
page.eles('tag:p') - 返回所有<p>元素 \n
|
||||
page.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div元素 \n
|
||||
page.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div元素 \n
|
||||
page.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div元素 \n
|
||||
page.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div元素 \n
|
||||
page.eles('text:some_text') - 返回所有文本含有some_text的元素 \n
|
||||
page.eles('some_text') - 返回所有文本含有some_text的元素(等价于上一行) \n
|
||||
page.eles('text=some_text') - 返回所有文本等于some_text的元素 \n
|
||||
page.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的元素 \n
|
||||
page.eles('css:div.ele_class') - 返回所有符合css selector的元素 \n
|
||||
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||
page.eles('x://div[@class="ele_class"]') - 等同于 page.eles('xpath://div[@class="ele_class"]') \n
|
||||
page.eles('c:div.ele_class') - 等同于 page.eles('css:div.ele_class') \n
|
||||
page.eles('t:div') - 等同于 page.eles('tag:div') \n
|
||||
page.eles('t:div@tx()=some_text') - 等同于 page.eles('tag:div@text()=some_text') \n
|
||||
page.eles('tx:some_text') - 等同于 page.eles('text:some_text') \n
|
||||
page.eles('tx=some_text') - 等同于 page.eles('text=some_text')
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 查找元素超时时间,d模式专用
|
||||
:return: 元素对象或属性、文本节点文本组成的列表
|
||||
"""
|
||||
if self._mode == 's':
|
||||
return super().eles(loc_or_str)
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self).eles(loc_or_str, timeout=timeout)
|
||||
|
||||
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
|
||||
@ -444,4 +375,5 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
raise
|
||||
except:
|
||||
raise IOError('Download path not found.')
|
||||
|
||||
return super().chrome_downloading(path)
|
||||
|
@ -11,7 +11,8 @@ from urllib.parse import urlparse, urljoin, urlunparse
|
||||
from lxml.etree import tostring
|
||||
from lxml.html import HtmlElement, fromstring
|
||||
|
||||
from .common import DrissionElement, str_to_loc, translate_loc, format_html
|
||||
from .base import DrissionElement
|
||||
from .common import str_to_loc, translate_loc, format_html
|
||||
|
||||
|
||||
class SessionElement(DrissionElement):
|
||||
@ -24,14 +25,15 @@ class SessionElement(DrissionElement):
|
||||
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
|
||||
return f'<SessionElement {self.tag} {" ".join(attrs)}>'
|
||||
|
||||
def __call__(self, loc_or_str: Union[Tuple[str, str], str], mode: str = 'single'):
|
||||
"""实现查找元素的简化写法 \n
|
||||
def __call__(self, loc_or_str: Union[Tuple[str, str], str], mode: str = 'single', timeout: float = None):
|
||||
"""在内部查找元素 \n
|
||||
例:ele2 = ele1('@id=ele_id') \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self.ele(loc_or_str, mode)
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
@property
|
||||
def tag(self) -> str:
|
||||
@ -50,21 +52,6 @@ class SessionElement(DrissionElement):
|
||||
r = match(r'<.*?>(.*)</.*?>', self.html, flags=DOTALL)
|
||||
return '' if not r else r.group(1)
|
||||
|
||||
@property
|
||||
def parent(self):
|
||||
"""返回父级元素"""
|
||||
return self.parents()
|
||||
|
||||
@property
|
||||
def next(self):
|
||||
"""返回后一个兄弟元素"""
|
||||
return self.nexts()
|
||||
|
||||
@property
|
||||
def prev(self):
|
||||
"""返回前一个兄弟元素"""
|
||||
return self.prevs()
|
||||
|
||||
@property
|
||||
def attrs(self) -> dict:
|
||||
"""返回元素所有属性及值"""
|
||||
@ -86,7 +73,7 @@ class SessionElement(DrissionElement):
|
||||
str_list.append('\n')
|
||||
|
||||
if isinstance(el, str):
|
||||
if el.replace(' ', '').replace('\n', '') != '':
|
||||
if sub('[ \n]', '', el) != '':
|
||||
if pre:
|
||||
str_list.append(el)
|
||||
else:
|
||||
@ -112,39 +99,6 @@ class SessionElement(DrissionElement):
|
||||
"""返回未格式化处理的元素内文本"""
|
||||
return str(self._inner_ele.text_content())
|
||||
|
||||
@property
|
||||
def link(self) -> str:
|
||||
"""返回href或src绝对url"""
|
||||
return self.attr('href') or self.attr('src')
|
||||
|
||||
@property
|
||||
def css_path(self) -> str:
|
||||
"""返回css path路径"""
|
||||
return self._get_ele_path('css')
|
||||
|
||||
@property
|
||||
def xpath(self) -> str:
|
||||
"""返回xpath路径"""
|
||||
return self._get_ele_path('xpath')
|
||||
|
||||
@property
|
||||
def comments(self) -> list:
|
||||
"""返回元素注释文本组成的列表"""
|
||||
return self.eles('xpath:.//comment()')
|
||||
|
||||
def texts(self, text_node_only: bool = False) -> list:
|
||||
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
|
||||
:param text_node_only: 是否只返回文本节点
|
||||
:return: 文本列表
|
||||
"""
|
||||
if text_node_only:
|
||||
texts = self.eles('xpath:/text()')
|
||||
else:
|
||||
texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')]
|
||||
|
||||
return [format_html(x.strip(' ')) for x in texts if
|
||||
x and x.replace('\n', '').replace('\t', '').replace(' ', '') != '']
|
||||
|
||||
def parents(self, num: int = 1):
|
||||
"""返回上面第num级父元素 \n
|
||||
:param num: 第几级父元素
|
||||
@ -152,24 +106,8 @@ class SessionElement(DrissionElement):
|
||||
"""
|
||||
return self.ele(f'xpath:..{"/.." * (num - 1)}')
|
||||
|
||||
def nexts(self, num: int = 1, mode: str = 'ele'):
|
||||
"""返回后面第num个兄弟元素或节点 \n
|
||||
:param num: 后面第几个兄弟元素或节点
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self._get_brother(num, mode, 'next')
|
||||
|
||||
def prevs(self, num: int = 1, mode: str = 'ele'):
|
||||
"""返回前面第num个兄弟元素或节点 \n
|
||||
:param num: 前面第几个兄弟元素或节点
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self._get_brother(num, mode, 'prev')
|
||||
|
||||
def attr(self, attr: str) -> Union[str, None]:
|
||||
"""返回属性值 \n
|
||||
"""返回attribute属性值 \n
|
||||
:param attr: 属性名
|
||||
:return: 属性值文本,没有该属性返回None
|
||||
"""
|
||||
@ -200,40 +138,11 @@ class SessionElement(DrissionElement):
|
||||
else:
|
||||
return self.inner_ele.get(attr)
|
||||
|
||||
def ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None):
|
||||
def ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None):
|
||||
"""返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
ele.ele('.ele_class') - 返回第一个 class 为 ele_class 的子元素 \n
|
||||
ele.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的子元素 \n
|
||||
ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n
|
||||
ele.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的子元素 \n
|
||||
ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n
|
||||
ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n
|
||||
ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n
|
||||
ele.ele('tag:p') - 返回第一个<p>子元素 \n
|
||||
ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n
|
||||
ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n
|
||||
ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n
|
||||
ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n
|
||||
ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n
|
||||
ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n
|
||||
ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n
|
||||
ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n
|
||||
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
||||
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||
ele.ele('x://div[@class="ele_class"]') - 等同于 ele.ele('xpath://div[@class="ele_class"]') \n
|
||||
ele.ele('c:div.ele_class') - 等同于 ele.ele('css:div.ele_class') \n
|
||||
ele.ele('t:div') - 等同于 ele.ele('tag:div') \n
|
||||
ele.ele('t:div@tx()=some_text') - 等同于 ele.ele('tag:div@text()=some_text') \n
|
||||
ele.ele('tx:some_text') - 等同于 ele.ele('text:some_text') \n
|
||||
ele.ele('tx=some_text') - 等同于 ele.ele('text=some_text')
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
if isinstance(loc_or_str, (str, tuple)):
|
||||
@ -261,39 +170,10 @@ class SessionElement(DrissionElement):
|
||||
|
||||
return execute_session_find(element, loc_or_str, mode)
|
||||
|
||||
def eles(self, loc_or_str: Union[Tuple[str, str], str]):
|
||||
def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None):
|
||||
"""返回当前元素下级所有符合条件的子元素、属性或节点文本 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n
|
||||
ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n
|
||||
ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n
|
||||
ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n
|
||||
ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n
|
||||
ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n
|
||||
ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n
|
||||
ele.eles('tag:p') - 返回所有<p>子元素 \n
|
||||
ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n
|
||||
ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n
|
||||
ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n
|
||||
ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n
|
||||
ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n
|
||||
ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n
|
||||
ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n
|
||||
ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n
|
||||
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
||||
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||
ele.eles('x://div[@class="ele_class"]') - 等同于 ele.eles('xpath://div[@class="ele_class"]') \n
|
||||
ele.eles('c:div.ele_class') - 等同于 ele.eles('css:div.ele_class') \n
|
||||
ele.eles('t:div') - 等同于 ele.eles('tag:div') \n
|
||||
ele.eles('t:div@tx()=some_text') - 等同于 ele.eles('tag:div@text()=some_text') \n
|
||||
ele.eles('tx:some_text') - 等同于 ele.eles('text:some_text') \n
|
||||
ele.eles('tx=some_text') - 等同于 ele.eles('text=some_text')
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象组成的列表
|
||||
"""
|
||||
return self.ele(loc_or_str, mode='all')
|
||||
@ -318,41 +198,6 @@ class SessionElement(DrissionElement):
|
||||
|
||||
return path_str[1:] if mode == 'css' else path_str
|
||||
|
||||
def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'):
|
||||
"""返回前面或后面第num个兄弟元素或节点 \n
|
||||
:param num: 前面第几个兄弟元素或节点
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:param direction: 'next' 或 'prev',查找的方向
|
||||
:return: DriverElement对象或字符串
|
||||
"""
|
||||
# 查找节点的类型
|
||||
if mode == 'ele':
|
||||
node_txt = '*'
|
||||
elif mode == 'node':
|
||||
node_txt = 'node()'
|
||||
elif mode == 'text':
|
||||
node_txt = 'text()'
|
||||
else:
|
||||
raise ValueError(f"Argument mode can only be 'node' ,'ele' or 'text', not '{mode}'.")
|
||||
|
||||
# 查找节点的方向
|
||||
if direction == 'next':
|
||||
direction_txt = 'following'
|
||||
elif direction == 'prev':
|
||||
direction_txt = 'preceding'
|
||||
else:
|
||||
raise ValueError(f"Argument direction can only be 'next' or 'prev', not '{direction}'.")
|
||||
|
||||
# 获取节点
|
||||
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]')
|
||||
|
||||
# 跳过元素间的换行符
|
||||
while ele_or_node == '\n':
|
||||
num += 1
|
||||
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]')
|
||||
|
||||
return ele_or_node
|
||||
|
||||
# ----------------session独有方法-----------------------
|
||||
def _make_absolute(self, link) -> str:
|
||||
"""获取绝对url
|
||||
|
@ -7,7 +7,7 @@
|
||||
from os import path as os_PATH
|
||||
from pathlib import Path
|
||||
from random import randint
|
||||
from re import search as re_SEARCH, sub as re_SUB
|
||||
from re import search as re_SEARCH, sub
|
||||
from time import time, sleep
|
||||
from typing import Union, List, Tuple
|
||||
from urllib.parse import urlparse, quote, unquote
|
||||
@ -15,30 +15,33 @@ from urllib.parse import urlparse, quote, unquote
|
||||
from requests import Session, Response
|
||||
from tldextract import extract
|
||||
|
||||
from .base import BasePage
|
||||
from .common import str_to_loc, translate_loc, get_available_file_name, format_html
|
||||
from .config import _cookie_to_dict
|
||||
from .session_element import SessionElement, execute_session_find
|
||||
|
||||
|
||||
class SessionPage(object):
|
||||
class SessionPage(BasePage):
|
||||
"""SessionPage封装了页面操作的常用功能,使用requests来获取、解析网页"""
|
||||
|
||||
def __init__(self, session: Session, timeout: float = 10):
|
||||
"""初始化函数"""
|
||||
super().__init__(timeout)
|
||||
self._session = session
|
||||
self.timeout = timeout
|
||||
self._url = None
|
||||
self._url_available = None
|
||||
self._response = None
|
||||
|
||||
self.retry_times = 3
|
||||
self.retry_interval = 2
|
||||
|
||||
def __call__(self,
|
||||
loc_or_str: Union[Tuple[str, str], str, SessionElement],
|
||||
mode: str = 'single',
|
||||
timeout: float = None):
|
||||
return self.ele(loc_or_str, mode)
|
||||
timeout: float = None) -> Union[SessionElement, List[SessionElement]]:
|
||||
"""在内部查找元素 \n
|
||||
例:ele2 = ele1('@id=ele_id') \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
# -----------------共有属性和方法-------------------
|
||||
@property
|
||||
@ -57,14 +60,9 @@ class SessionPage(object):
|
||||
return format_html(self.response.text) if self.response else ''
|
||||
|
||||
@property
|
||||
def cookies(self) -> dict:
|
||||
"""返回session的cookies"""
|
||||
return self.get_cookies(True)
|
||||
|
||||
@property
|
||||
def url_available(self) -> bool:
|
||||
"""返回当前访问的url有效性"""
|
||||
return self._url_available
|
||||
def json(self) -> dict:
|
||||
"""当返回内容是json格式时,返回对应的字典"""
|
||||
return self.response.json()
|
||||
|
||||
def get(self,
|
||||
url: str,
|
||||
@ -109,35 +107,11 @@ class SessionPage(object):
|
||||
|
||||
def ele(self,
|
||||
loc_or_ele: Union[Tuple[str, str], str, SessionElement],
|
||||
mode: str = None) -> Union[SessionElement, List[SessionElement], str, None]:
|
||||
mode: str = None, timeout=None) -> Union[SessionElement, List[SessionElement], str, None]:
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 接收到元素对象时: \n
|
||||
返回SessionElement对象 \n
|
||||
- 用loc元组查找: \n
|
||||
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
page.ele('.ele_class') - 返回第一个 class 为 ele_class 的元素 \n
|
||||
page.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的元素 \n
|
||||
page.ele('#ele_id') - 返回第一个 id 为 ele_id 的元素 \n
|
||||
page.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的元素 \n
|
||||
page.ele('@class:ele_class') - 返回第一个class含有ele_class的元素 \n
|
||||
page.ele('@name=ele_name') - 返回第一个name等于ele_name的元素 \n
|
||||
page.ele('@placeholder') - 返回第一个带placeholder属性的元素 \n
|
||||
page.ele('tag:p') - 返回第一个<p>元素 \n
|
||||
page.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div元素 \n
|
||||
page.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div元素 \n
|
||||
page.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div元素 \n
|
||||
page.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div元素 \n
|
||||
page.ele('text:some_text') - 返回第一个文本含有some_text的元素 \n
|
||||
page.ele('some_text') - 返回第一个文本含有some_text的元素(等价于上一行) \n
|
||||
page.ele('text=some_text') - 返回第一个文本等于some_text的元素 \n
|
||||
page.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的元素 \n
|
||||
page.ele('css:div.ele_class') - 返回第一个符合css selector的元素 \n
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
if isinstance(loc_or_ele, (str, tuple)):
|
||||
@ -146,12 +120,8 @@ class SessionPage(object):
|
||||
else:
|
||||
if len(loc_or_ele) != 2:
|
||||
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
|
||||
|
||||
loc_or_ele = translate_loc(loc_or_ele)
|
||||
|
||||
# if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
||||
# loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
||||
|
||||
elif isinstance(loc_or_ele, SessionElement):
|
||||
return loc_or_ele
|
||||
|
||||
@ -161,38 +131,16 @@ class SessionPage(object):
|
||||
return execute_session_find(self, loc_or_ele, mode)
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]:
|
||||
loc_or_str: Union[Tuple[str, str], str], timeout=None) -> List[SessionElement]:
|
||||
"""返回页面中所有符合条件的元素、属性或节点文本 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
page.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
page.eles('.ele_class') - 返回所有 class 为 ele_class 的元素 \n
|
||||
page.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的元素 \n
|
||||
page.eles('#ele_id') - 返回所有 id 为 ele_id 的元素 \n
|
||||
page.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的元素 \n
|
||||
page.eles('@class:ele_class') - 返回所有class含有ele_class的元素 \n
|
||||
page.eles('@name=ele_name') - 返回所有name等于ele_name的元素 \n
|
||||
page.eles('@placeholder') - 返回所有带placeholder属性的元素 \n
|
||||
page.eles('tag:p') - 返回所有<p>元素 \n
|
||||
page.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div元素 \n
|
||||
page.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div元素 \n
|
||||
page.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div元素 \n
|
||||
page.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div元素 \n
|
||||
page.eles('text:some_text') - 返回所有文本含有some_text的元素 \n
|
||||
page.eles('some_text') - 返回所有文本含有some_text的元素(等价于上一行) \n
|
||||
page.eles('text=some_text') - 返回所有文本等于some_text的元素 \n
|
||||
page.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的元素 \n
|
||||
page.eles('css:div.ele_class') - 返回所有符合css selector的元素 \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象组成的列表
|
||||
"""
|
||||
if not isinstance(loc_or_str, (tuple, str)):
|
||||
raise TypeError('Type of loc_or_str can only be tuple or str.')
|
||||
|
||||
return self.ele(loc_or_str, mode='all')
|
||||
return super().eles(loc_or_str, timeout)
|
||||
|
||||
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
|
||||
"""返回cookies \n
|
||||
@ -391,12 +339,12 @@ class SessionPage(object):
|
||||
file_name = f'untitled_{time()}_{randint(0, 100)}'
|
||||
|
||||
# 去除非法字符
|
||||
file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip()
|
||||
file_name = sub(r'[\\/*:|<>?"]', '', file_name).strip()
|
||||
file_name = unquote(file_name)
|
||||
|
||||
# -------------------重命名,不改变扩展名-------------------
|
||||
if new_name:
|
||||
new_name = re_SUB(r'[\\/*:|<>?"]', '', new_name).strip()
|
||||
new_name = sub(r'[\\/*:|<>?"]', '', new_name).strip()
|
||||
ext_name = file_name.split('.')[-1]
|
||||
|
||||
if '.' in new_name or ext_name == file_name:
|
||||
@ -412,9 +360,9 @@ class SessionPage(object):
|
||||
goal = ''
|
||||
skip = False
|
||||
|
||||
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
|
||||
goal += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip()
|
||||
goal += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
|
||||
for key, p in enumerate(goal_Path.parts): # 去除路径中的非法字符
|
||||
goal += goal_Path.drive if key == 0 and goal_Path.drive else sub(r'[*:|<>?"]', '', p).strip()
|
||||
goal += '\\' if p != '\\' and key < len(goal_Path.parts) - 1 else ''
|
||||
|
||||
goal_Path = Path(goal).absolute()
|
||||
goal_Path.mkdir(parents=True, exist_ok=True)
|
||||
|
@ -1,26 +1,32 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
@File : shadow_root_element.py
|
||||
"""
|
||||
from re import split as re_SPLIT
|
||||
from typing import Union, Any, Tuple
|
||||
from typing import Union, Any, Tuple, List
|
||||
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
|
||||
from .common import format_html, DrissionElement
|
||||
from .base import BaseElement
|
||||
from .common import format_html
|
||||
from .driver_element import execute_driver_find, DriverElement
|
||||
|
||||
|
||||
class ShadowRootElement(DrissionElement):
|
||||
class ShadowRootElement(BaseElement):
|
||||
def __init__(self, inner_ele: WebElement, parent_ele: DriverElement):
|
||||
super().__init__(inner_ele, parent_ele.page)
|
||||
self.parent_ele = parent_ele
|
||||
|
||||
def __repr__(self):
|
||||
def __repr__(self) -> str:
|
||||
return f'<ShadowRootElement in {self.parent_ele} >'
|
||||
|
||||
def __call__(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
mode: str = 'single',
|
||||
timeout: float = None):
|
||||
"""实现查找元素的简化写法 \n
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement]]:
|
||||
"""在内部查找元素 \n
|
||||
例:ele2 = ele1('@id=ele_id') \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
@ -30,7 +36,7 @@ class ShadowRootElement(DrissionElement):
|
||||
return self.ele(loc_or_str, mode, timeout)
|
||||
|
||||
@property
|
||||
def tag(self):
|
||||
def tag(self) -> str:
|
||||
"""元素标签名"""
|
||||
return 'shadow-root'
|
||||
|
||||
@ -40,16 +46,11 @@ class ShadowRootElement(DrissionElement):
|
||||
return format_html(self.inner_ele.get_attribute('innerHTML'))
|
||||
|
||||
@property
|
||||
def parent(self):
|
||||
def parent(self) -> DriverElement:
|
||||
"""shadow-root所依赖的父元素"""
|
||||
return self.parent_ele
|
||||
|
||||
@property
|
||||
def next(self):
|
||||
"""返回后一个兄弟元素"""
|
||||
return self.nexts()
|
||||
|
||||
def parents(self, num: int = 1):
|
||||
def parents(self, num: int = 1) -> DriverElement:
|
||||
"""返回上面第num级父元素 \n
|
||||
:param num: 第几级父元素
|
||||
:return: DriverElement对象
|
||||
@ -57,7 +58,7 @@ class ShadowRootElement(DrissionElement):
|
||||
loc = 'xpath', f'.{"/.." * (num - 1)}'
|
||||
return self.parent_ele.ele(loc, timeout=0.1)
|
||||
|
||||
def nexts(self, num: int = 1):
|
||||
def nexts(self, num: int = 1) -> DriverElement:
|
||||
"""返回后面第num个兄弟元素 \n
|
||||
:param num: 后面第几个兄弟元素
|
||||
:return: DriverElement对象
|
||||
@ -68,36 +69,8 @@ class ShadowRootElement(DrissionElement):
|
||||
def ele(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
mode: str = 'single',
|
||||
timeout: float = None):
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement]]:
|
||||
"""返回当前元素下级符合条件的子元素,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、css selector \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
ele.ele('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n
|
||||
ele.ele('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n
|
||||
ele.ele('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n
|
||||
ele.ele('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n
|
||||
ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n
|
||||
ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n
|
||||
ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n
|
||||
ele.ele('tag:p') - 返回第一个<p>子元素 \n
|
||||
ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n
|
||||
ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n
|
||||
ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n
|
||||
ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n
|
||||
ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n
|
||||
ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n
|
||||
ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n
|
||||
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
||||
- 查询字符串还有最精简模式,用c代替css、t代替tag、tx代替text: \n
|
||||
ele.ele('c:div.ele_class') - 等同于 ele.ele('css:div.ele_class') \n
|
||||
ele.ele('t:div') - 等同于 ele.ele('tag:div') \n
|
||||
ele.ele('t:div@tx()=some_text') - 等同于 ele.ele('tag:div@txet()=some_text') \n
|
||||
ele.ele('tx:some_text') - 等同于 ele.ele('text:some_text') \n
|
||||
ele.ele('tx=some_text') - 等同于 ele.ele('text=some_text')
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 查找元素超时时间
|
||||
@ -118,41 +91,13 @@ class ShadowRootElement(DrissionElement):
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None):
|
||||
timeout: float = None) -> List[DriverElement]:
|
||||
"""返回当前元素下级所有符合条件的子元素 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
|
||||
- 用查询字符串查找: \n
|
||||
查找方式:属性、tag name和属性、文本、css selector \n
|
||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||
ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n
|
||||
ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n
|
||||
ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n
|
||||
ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n
|
||||
ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n
|
||||
ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n
|
||||
ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n
|
||||
ele.eles('tag:p') - 返回所有<p>子元素 \n
|
||||
ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n
|
||||
ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n
|
||||
ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n
|
||||
ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n
|
||||
ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n
|
||||
ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n
|
||||
ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n
|
||||
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
||||
- 查询字符串还有最精简模式,用c代替css、t代替tag、tx代替text: \n
|
||||
ele.eles('c:div.ele_class') - 等同于 ele.eles('css:div.ele_class') \n
|
||||
ele.eles('t:div') - 等同于 ele.eles('tag:div') \n
|
||||
ele.eles('t:div@tx()=some_text') - 等同于 ele.eles('tag:div@txet()=some_text') \n
|
||||
ele.eles('tx:some_text') - 等同于 ele.eles('text:some_text') \n
|
||||
ele.eles('tx=some_text') - 等同于 ele.eles('text=some_text')
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 查找元素超时时间
|
||||
:return: DriverElement对象组成的列表
|
||||
"""
|
||||
return self.ele(loc_or_str, mode='all', timeout=timeout)
|
||||
return super().eles(loc_or_str, timeout)
|
||||
|
||||
def run_script(self, script: str, *args) -> Any:
|
||||
"""执行js代码,传入自己为第一个参数 \n
|
||||
@ -175,7 +120,11 @@ class ShadowRootElement(DrissionElement):
|
||||
return False
|
||||
|
||||
# ----------------ShadowRootElement独有方法-----------------------
|
||||
def _find_eles_by_text(self, text: str, tag: str = '', match: str = 'exact', mode: str = 'single'):
|
||||
def _find_eles_by_text(self,
|
||||
text: str,
|
||||
tag: str = '',
|
||||
match: str = 'exact',
|
||||
mode: str = 'single') -> Union[DriverElement, List[DriverElement]]:
|
||||
"""根据文本获取页面元素 \n
|
||||
:param text: 文本字符串
|
||||
:param tag: tag name
|
||||
|
@ -2332,7 +2332,8 @@ Drag the current element, the target is another element or coordinate tuple, and
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- ele_or_loc[tuple, WebElement, DrissionElement] - Another element or relative current position, the coordinates are the coordinates of the element's midpoint.
|
||||
- ele_or_loc[tuple, WebElement, BaseElement] - Another element or relative current position, the coordinates are the
|
||||
coordinates of the element's midpoint.
|
||||
- speed: int - drag speed
|
||||
- shake: bool - whether to shake randomly
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user