增加了search()和search_all()

This commit is contained in:
g1879 2020-04-27 15:52:36 +08:00
parent cc272ed14a
commit 315cf6085a
9 changed files with 304 additions and 273 deletions

View File

@ -10,17 +10,17 @@ Path(global_tmp_path).mkdir(parents=True, exist_ok=True)
global_driver_options = { global_driver_options = {
# ---------------已打开的浏览器--------------- # ---------------已打开的浏览器---------------
# 'debuggerAddress': '127.0.0.1:9222', 'debuggerAddress': '127.0.0.1:9222',
# ---------------chromedriver路径--------------- # ---------------chromedriver路径---------------
'chromedriver_path': r'D:\python\Google Chrome\Chrome\chromedriver.exe', 'chromedriver_path': r'D:\python\Google Chrome\Chrome\chromedriver.exe',
# ---------------手动指定使用的浏览器位置--------------- # ---------------手动指定使用的浏览器位置---------------
# 'binary_location': r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe', # 'binary_location': r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe',
# ---------------启动参数--------------- # ---------------启动参数---------------
'arguments': [ 'arguments': [
'--headless', # 隐藏浏览器窗口 # '--headless', # 隐藏浏览器窗口
'--mute-audio', # 静音 '--mute-audio', # 静音
'--no-sandbox', '--no-sandbox',
'--blink-settings=imagesEnabled=false', # 不加载图片 # '--blink-settings=imagesEnabled=false', # 不加载图片
# r'--user-data-dir="E:\tmp\chrome_tmp"', # 指定用户文件夹路径 # r'--user-data-dir="E:\tmp\chrome_tmp"', # 指定用户文件夹路径
# '-disk-cache-dir=""', # 指定缓存路径 # '-disk-cache-dir=""', # 指定缓存路径
'zh_CN.UTF-8', # 编码格式 'zh_CN.UTF-8', # 编码格式

View File

@ -12,7 +12,7 @@ from selenium import webdriver
from selenium.common.exceptions import WebDriverException from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from DrissionPage.config import global_driver_options, global_session_options from .config import global_driver_options, global_session_options
def _get_chrome_options(options: dict) -> Options: def _get_chrome_options(options: dict) -> Options:

View File

@ -95,7 +95,6 @@ class DriverPage(object):
elif mode == 'visible': elif mode == 'visible':
msg = '元素不可见或不存在' msg = '元素不可见或不存在'
ele = wait.until(EC.visibility_of_element_located(loc)) ele = wait.until(EC.visibility_of_element_located(loc))
# print(loc,ele)
return ele return ele
except: except:
if show_errmsg: if show_errmsg:
@ -105,6 +104,25 @@ class DriverPage(object):
"""查找符合条件的所有元素""" """查找符合条件的所有元素"""
return self.find(loc, mode='all', timeout=timeout, show_errmsg=show_errmsg) return self.find(loc, mode='all', timeout=timeout, show_errmsg=show_errmsg)
def search(self, value: str, mode: str = None, timeout: float = 10):
mode = mode if mode else 'single'
if mode not in ['single', 'all']:
raise ValueError("mode须在'single', 'all'中选择")
ele = []
try:
loc = 'xpath', f'//*[contains(text(),"{value}")]'
wait = WebDriverWait(self.driver, timeout=timeout)
if mode == 'single':
ele = wait.until(EC.presence_of_element_located(loc))
elif mode == 'all':
ele = wait.until(EC.presence_of_all_elements_located(loc))
return ele
except:
return ele
def search_all(self, value: str, timeout: float = 10):
return self.search(value, mode='all', timeout=timeout)
def get_attr(self, loc_or_ele: Union[WebElement, tuple], attr: str) -> str: def get_attr(self, loc_or_ele: Union[WebElement, tuple], attr: str) -> str:
"""获取元素属性""" """获取元素属性"""
ele = self._get_ele(loc_or_ele) ele = self._get_ele(loc_or_ele)

224
DrissionPage/mix_element.py Normal file
View File

@ -0,0 +1,224 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
from html import unescape
from time import sleep
from typing import Union
from requests_html import Element
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from .config import global_tmp_path
from .session_page import _translate_loc
class MixElement(object):
def __init__(self, ele: Union[WebElement, Element]):
self._ele = ele
@property
def ele(self):
return self._ele
@property
def text(self):
if isinstance(self._ele, Element):
return unescape(self._ele.text).replace('\xa0', ' ')
else:
return unescape(self.attr('innerText')).replace('\xa0', ' ')
@property
def html(self):
if isinstance(self._ele, Element):
html = unescape(self._ele.html).replace('\xa0', ' ')
r = re.match(r'<.*?>(.*)</.*?>', html, flags=re.DOTALL)
return r.group(1)
else:
return unescape(self.attr('innerHTML')).replace('\xa0', ' ')
@property
def tag_name(self):
if isinstance(self._ele, Element):
html = unescape(self._ele.html).replace('\xa0', ' ')
r = re.match(r'^<(.*?)\s+', html, flags=re.DOTALL)
return r.group(1)
else:
return self._ele.tag_name
def attr(self, attr):
if isinstance(self._ele, Element):
try:
if attr == 'href':
# 如直接获取attr只能获取相对地址
for link in self._ele.absolute_links:
return link
elif attr == 'class':
class_str = ''
for key, i in enumerate(self._ele.attrs['class']):
class_str += ' ' if key > 0 else ''
class_str += i
return class_str
else:
return self._ele.attrs[attr]
except:
return ''
else:
return self._ele.get_attribute(attr)
def find(self, loc: tuple, mode: str = None, show_errmsg: bool = True):
"""根据loc获取元素"""
if isinstance(self._ele, Element):
mode = mode if mode else 'single'
if mode not in ['single', 'all']:
raise ValueError("mode须在'single', 'all'中选择")
loc_by, loc_str = _translate_loc(loc)
msg = first = None
try:
if mode == 'single':
msg = '未找到元素'
first = True
elif mode == 'all':
msg = '未找到元素s'
first = False
if loc_by == 'xpath':
ele = self.ele.xpath(loc_str, first=first, _encoding='utf-8')
else:
ele = self.ele.find(loc_str, first=first, _encoding='utf-8')
return MixElement(ele)
except:
if show_errmsg:
print(msg, loc)
raise
else: # d模式
mode = mode if mode else 'single'
if mode not in ['single', 'all', 'visible']:
raise ValueError("mode须在'single', 'all', 'visible'中选择")
msg = ele = None
try:
wait = WebDriverWait(self.ele.parent, timeout=10)
if mode == 'single':
msg = '未找到元素'
ele = wait.until(EC.presence_of_element_located(loc))
elif mode == 'all':
msg = '未找到元素s'
ele = wait.until(EC.presence_of_all_elements_located(loc))
elif mode == 'visible':
msg = '元素不可见或不存在'
ele = wait.until(EC.visibility_of_element_located(loc))
return MixElement(ele)
except:
if show_errmsg:
print(msg, loc)
def find_all(self, loc: tuple, show_errmsg: bool = True):
return self.find(loc, mode='all', show_errmsg=show_errmsg)
def search(self, value: str, mode: str = None):
"""根据内容获取元素"""
mode = mode if mode else 'single'
if mode not in ['single', 'all']:
raise ValueError("mode须在'single', 'all'中选择")
if isinstance(self._ele, Element):
try:
if mode == 'single':
ele = self.ele.xpath(f'.//*[contains(text(),"{value}")]', first=True)
return MixElement(ele)
elif mode == 'all':
eles = self.ele.xpath(f'.//*[contains(text(),"{value}")]')
return [MixElement(ele) for ele in eles]
except:
return None
else: # d模式
try:
loc = 'xpath', f'.//*[contains(text(),"{value}")]'
wait = WebDriverWait(self.ele.parent, timeout=10)
if mode == 'single':
ele = wait.until(EC.presence_of_element_located(loc))
return MixElement(ele)
elif mode == 'all':
eles = wait.until(EC.presence_of_all_elements_located(loc))
return [MixElement(ele) for ele in eles]
except:
return None
def search_all(self, value: str):
return self.search(value, mode='all')
# -----------------以下为d模式独占-------------------
def click(self):
"""点击"""
for _ in range(10):
try:
self.ele.click()
return True
except Exception as e:
print(e)
sleep(0.2)
# 若点击失败用js方式点击
print('用js点击')
try:
self.run_script('arguments[0].click()')
return True
except:
raise
def input(self, value, clear: bool = True):
try:
if clear:
self.run_script("arguments[0].value=''")
self.ele.send_keys(value)
return True
except:
raise
def run_script(self, script: str):
self.ele.parent.execute_script(script, self.ele)
def submit(self):
self.ele.submit()
def clear(self):
self.ele.clear()
def is_selected(self):
return self.ele.is_selected()
def is_enabled(self):
return self.ele.is_enabled()
def is_displayed(self):
return self.ele.is_displayed()
@property
def size(self):
return self.ele.size
@property
def location(self):
return self.ele.location
def screenshot(self, path: str = None, filename: str = None):
path = path if path else global_tmp_path
name = filename if filename else self.tag_name
# 等待元素加载完成
if self.tag_name == 'img':
js = 'return arguments[0].complete && typeof arguments[0].naturalWidth ' \
'!= "undefined" && arguments[0].naturalWidth > 0'
while not self.run_script(js):
pass
img_path = f'{path}\\{name}.png'
self.ele.screenshot(img_path)
return img_path
def select(self, value: str):
pass
def set_attr(self, attr, value):
"""设置元素属性"""
try:
self.run_script(f"arguments[0].{attr} = '{value}';")
return True
except:
raise

View File

@ -12,9 +12,9 @@ from requests_html import Element, HTMLSession
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webelement import WebElement
from DrissionPage.drission import Drission from .drission import Drission
from DrissionPage.driver_page import DriverPage from .driver_page import DriverPage
from DrissionPage.session_page import SessionPage from .session_page import SessionPage
class Null(object): class Null(object):
@ -183,6 +183,18 @@ class MixPage(Null, SessionPage, DriverPage):
elif self._mode == 'd': elif self._mode == 'd':
return super(SessionPage, self).find_all(loc, timeout=timeout, show_errmsg=show_errmsg) return super(SessionPage, self).find_all(loc, timeout=timeout, show_errmsg=show_errmsg)
def search(self, value: str, mode: str = None, timeout: float = 10):
if self._mode == 's':
return super().search(value, mode=mode)
elif self._mode == 'd':
return super(SessionPage, self).search(value, mode=mode, timeout=timeout)
def search_all(self, value: str, timeout: float = 10):
if self._mode == 's':
return super().search_all(value)
elif self._mode == 'd':
return super(SessionPage, self).search_all(value, timeout=timeout)
def get_attr(self, loc_or_ele: Union[WebElement, Element, tuple], attr: str) -> str: def get_attr(self, loc_or_ele: Union[WebElement, Element, tuple], attr: str) -> str:
"""获取元素属性值""" """获取元素属性值"""
if self._mode == 's': if self._mode == 's':

View File

@ -1,232 +0,0 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
旧版MixPage已弃用
在MixPage类中使用DriverPage和SessionPage对象使用时根据模式调用相应对象的函数
问题是须要在MixPage类中为这两个类中的函数写一一对应的调用函数
新版中直接继承这两个类只须要为这两个类共有的函数写调用函数即可
"""
from abc import abstractmethod
from typing import Union
from urllib import parse
from requests_html import Element
from selenium.webdriver.remote.webelement import WebElement
from DrissionPage.drission import Drission
from DrissionPage.driver_page import DriverPage
from DrissionPage.session_page import SessionPage
class MixPage:
def __init__(self, drission: Drission, locs=None, mode='d'):
self._drission = drission
self._session = None
self._driver = None
self._session_page = None
self._driver_page = None
self._url = None
self._session_url = None
self._locs = locs
self._mode = mode
if mode == 's':
self._session_page = self.s_page
else:
self._driver_page = self.d_page
self._open_self_url()
@abstractmethod
def _open_self_url(self):
pass
@property
def mode(self):
return self._mode
@mode.setter
def mode(self, value):
self._mode = value
def change_mode(self, mode=None):
if mode == self.mode:
return
self.mode = 's' if self.mode == 'd' else 'd'
@property
def drission(self):
return self._drission
@property
def response(self):
return self.s_page.response
@property
def session(self):
if self._session is None:
self._session = self._drission.session
return self._session
@property
def driver(self):
if self._driver is None:
self._driver = self._drission.driver
return self._driver
@property
def d_page(self):
if self._driver_page is None:
self._driver_page = DriverPage(self.driver)
if self._url:
self._init_page()
return self._driver_page
@property
def s_page(self):
if self._session_page is None:
self._session_page = SessionPage(self.session)
if self._url:
self._init_page()
self.refresh_url() # 每次调用session页面时使url和driver页面保持一致
return self._session_page
@property
def url(self):
if self.mode == 'd':
return self.d_page.url
else:
return self._url
def _init_page(self):
if self._session_page:
self.cookies_to_driver(self._url)
self.d_page.get(self._url)
elif self._driver_page:
self.cookies_to_session()
self.s_page.get(self._url)
def goto(self, url: str, url_data: dict = None):
"""跳转到一个url"""
to_url = f'{url}?{parse.urlencode(url_data)}' if url_data else url
if self._url == to_url:
return
now_url = self._url
self._url = to_url
if self._driver_page:
if self._session_page:
self.cookies_to_driver(now_url)
self._driver_page.get(to_url, url_data)
if not self._session_page:
return self.check_driver_url()
if self._session_page:
self._session_url = to_url
if self._session_page:
self.cookies_to_session()
return self.s_page.goto(to_url, url_data)
def check_driver_url(self) -> bool:
"""由子类依据不同的页面自行实现"""
return True
def refresh_url(self):
"""使session的url与driver当前保持一致并复制cookies到session"""
if self._driver and (self._url != self._driver.current_url or self._session_url != self._driver.current_url):
self._url = self._driver.current_url
self._session_url = self._driver.current_url
self.cookies_to_session()
self._session_page.get(self._url)
def cookies_to_session(self):
self._drission.cookies_to_session()
def cookies_to_driver(self, url=None):
u = url if url else self._url
self._drission.cookies_to_driver(u)
# ----------------以下为共用函数-----------------------
def find(self, loc, timeout=10, show_errmsg=True) -> Union[WebElement, Element]:
if self._mode == 's':
return self.s_page.find(loc, show_errmsg)
elif self._mode == 'd':
return self.d_page.find(loc, timeout, show_errmsg)
def find_all(self, loc, timeout=10, show_errmsg=True) -> list:
if self._mode == 's':
return self.s_page.find_all(loc, show_errmsg)
elif self._mode == 'd':
return self.d_page.find_all(loc, timeout, show_errmsg)
def get_attr(self, loc_or_ele, attr) -> str:
if self._mode == 's':
return self.s_page.get_attr(loc_or_ele, attr)
elif self._mode == 'd':
return self.d_page.get_attr(loc_or_ele, attr)
def get_html(self, loc_or_ele) -> str:
if self._mode == 's':
return self.s_page.get_html(loc_or_ele)
elif self._mode == 'd':
return self.d_page.get_html(loc_or_ele)
def get_text(self, loc_or_ele) -> str:
if self._mode == 's':
return self.s_page.get_text(loc_or_ele)
elif self._mode == 'd':
return self.d_page.get_text(loc_or_ele)
def get_source(self):
if self._mode == 's':
return self.s_page.get_html()
elif self._mode == 'd':
return self.d_page.get_html()
def get_cookies(self):
if self._mode == 's':
return self.s_page.cookies
elif self._mode == 'd':
return self.d_page.cookies
# ----------------以下为driver page专用函数-----------------
def input(self, loc_or_ele, value: str, clear=True) -> bool:
return self.d_page.input(loc_or_ele, value, clear)
def click(self, loc_or_ele) -> bool:
return self.d_page.click(loc_or_ele)
def set_attr(self, loc_or_ele, attribute: str, value: str) -> bool:
return self.d_page.set_attr(loc_or_ele, attribute, value)
def run_script(self, loc_or_ele, script: str):
return self.d_page.run_script(loc_or_ele, script)
def get_tabs_sum(self) -> int:
return self.d_page.get_tabs_sum()
def get_tab_num(self) -> int:
return self.d_page.get_tab_num()
def to_tab(self, index: int = 0):
return self.d_page.to_tab(index)
def close_current_tab(self):
return self.d_page.close_current_tab()
def close_other_tabs(self, tab_index: int = None):
return self.d_page.close_other_tabs(tab_index)
def to_iframe(self, loc_or_ele):
return self.d_page.to_iframe(loc_or_ele)
def get_screen(self, loc_or_ele, path: str, file_name: str = None) -> str:
return self.d_page.get_screen(loc_or_ele, path, file_name)
def choose_select_list(self, loc_or_ele, text):
return self.d_page.choose_select_list(loc_or_ele, text)
def refresh(self):
return self.d_page.refresh()
def back(self):
return self.d_page.back()
def set_window_size(self, x: int = None, y: int = None):
return self.d_page.set_window_size(x, y)

View File

@ -11,7 +11,7 @@ from urllib import parse
from requests_html import Element, HTMLSession, HTMLResponse from requests_html import Element, HTMLSession, HTMLResponse
from DrissionPage.config import global_session_options from .config import global_session_options
def _translate_loc(loc): def _translate_loc(loc):
@ -115,6 +115,23 @@ class SessionPage(object):
"""查找符合条件的所有元素""" """查找符合条件的所有元素"""
return self.find(loc, mode='all', show_errmsg=True) return self.find(loc, mode='all', show_errmsg=True)
def search(self, value: str, mode: str = None):
mode = mode if mode else 'single'
if mode not in ['single', 'all']:
raise ValueError("mode须在'single', 'all'中选择")
try:
if mode == 'single':
ele = self.response.html.xpath(f'.//*[contains(text(),"{value}")]', first=True)
return ele
elif mode == 'all':
eles = self.response.html.xpath(f'.//*[contains(text(),"{value}")]')
return eles
except:
return None
def search_all(self, value: str):
return self.search(value, mode='all')
def _get_ele(self, loc_or_ele: Union[Element, tuple]) -> Element: def _get_ele(self, loc_or_ele: Union[Element, tuple]) -> Element:
"""获取loc或元素实例返回元素实例""" """获取loc或元素实例返回元素实例"""
# ====================================== # ======================================

View File

@ -2,36 +2,6 @@
#### 介绍 #### 介绍
一个整合了selenium和requests_html的模块封装了常用页面操作可实现两种模式的无缝切换。兼顾selenium的易用性和requests的高性能也可直接用于PO模式。 一个整合了selenium和requests_html的模块封装了常用页面操作可实现两种模式的无缝切换。兼顾selenium的易用性和requests的高性能也可直接用于PO模式。
适用于网页自动化,有效减少代码量。
#### 软件架构
软件架构说明
#### 安装教程
1. xxxx
2. xxxx
3. xxxx
#### 使用说明
1. xxxx
2. xxxx
3. xxxx
#### 参与贡献
1. Fork 本仓库
2. 新建 Feat_xxx 分支
3. 提交代码
4. 新建 Pull Request
#### 码云特技
1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md
2. 码云官方博客 [blog.gitee.com](https://blog.gitee.com)
3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解码云上的优秀开源项目
4. [GVP](https://gitee.com/gvp) 全称是码云最有价值开源项目,是码云综合评定出的优秀开源项目
5. 码云官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help)
6. 码云封面人物是一档用来展示码云会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)

22
setup.py Normal file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from setuptools import setup, find_packages
setup(
name="DrissionPage",
version="1.0.0 Alpha",
author="g1879",
author_email="g1879@qq.com",
description="page contains selenium and requests.",
license="BSD",
keywords="DrissionPage",
url="https://gitee.com/g1879/DrissionPage",
packages=find_packages(),
long_description="Long descrition is actually short...",
classifiers=[
"Development Status :: 1 - Alpha",
"Topic :: Utilities",
"License :: OSI Approved :: BSD License",
],
)