From 315cf6085a4c3913d55561c2cd34f0d63c04b0e6 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 27 Apr 2020 15:52:36 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86search()=E5=92=8Csea?= =?UTF-8?q?rch=5Fall()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 6 +- DrissionPage/drission.py | 2 +- DrissionPage/driver_page.py | 20 ++- DrissionPage/mix_element.py | 224 +++++++++++++++++++++++++++++++++ DrissionPage/mix_page.py | 18 ++- DrissionPage/mix_page_old.py | 232 ----------------------------------- DrissionPage/session_page.py | 19 ++- README.md | 34 +---- setup.py | 22 ++++ 9 files changed, 304 insertions(+), 273 deletions(-) create mode 100644 DrissionPage/mix_element.py delete mode 100644 DrissionPage/mix_page_old.py create mode 100644 setup.py diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 9eec4d1..aa1fce2 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -10,17 +10,17 @@ Path(global_tmp_path).mkdir(parents=True, exist_ok=True) global_driver_options = { # ---------------已打开的浏览器--------------- - # 'debuggerAddress': '127.0.0.1:9222', + 'debuggerAddress': '127.0.0.1:9222', # ---------------chromedriver路径--------------- 'chromedriver_path': r'D:\python\Google Chrome\Chrome\chromedriver.exe', # ---------------手动指定使用的浏览器位置--------------- # 'binary_location': r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe', # ---------------启动参数--------------- 'arguments': [ - '--headless', # 隐藏浏览器窗口 + # '--headless', # 隐藏浏览器窗口 '--mute-audio', # 静音 '--no-sandbox', - '--blink-settings=imagesEnabled=false', # 不加载图片 + # '--blink-settings=imagesEnabled=false', # 不加载图片 # r'--user-data-dir="E:\tmp\chrome_tmp"', # 指定用户文件夹路径 # '-–disk-cache-dir=""', # 指定缓存路径 'zh_CN.UTF-8', # 编码格式 diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 69a67ba..25d8321 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -12,7 +12,7 @@ from selenium import webdriver from selenium.common.exceptions import WebDriverException from selenium.webdriver.chrome.options import Options -from DrissionPage.config import global_driver_options, global_session_options +from .config import global_driver_options, global_session_options def _get_chrome_options(options: dict) -> Options: diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index bb6cc6d..f849e89 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -95,7 +95,6 @@ class DriverPage(object): elif mode == 'visible': msg = '元素不可见或不存在' ele = wait.until(EC.visibility_of_element_located(loc)) - # print(loc,ele) return ele except: if show_errmsg: @@ -105,6 +104,25 @@ class DriverPage(object): """查找符合条件的所有元素""" return self.find(loc, mode='all', timeout=timeout, show_errmsg=show_errmsg) + def search(self, value: str, mode: str = None, timeout: float = 10): + mode = mode if mode else 'single' + if mode not in ['single', 'all']: + raise ValueError("mode须在'single', 'all'中选择") + ele = [] + try: + loc = 'xpath', f'//*[contains(text(),"{value}")]' + wait = WebDriverWait(self.driver, timeout=timeout) + if mode == 'single': + ele = wait.until(EC.presence_of_element_located(loc)) + elif mode == 'all': + ele = wait.until(EC.presence_of_all_elements_located(loc)) + return ele + except: + return ele + + def search_all(self, value: str, timeout: float = 10): + return self.search(value, mode='all', timeout=timeout) + def get_attr(self, loc_or_ele: Union[WebElement, tuple], attr: str) -> str: """获取元素属性""" ele = self._get_ele(loc_or_ele) diff --git a/DrissionPage/mix_element.py b/DrissionPage/mix_element.py new file mode 100644 index 0000000..29f8f4f --- /dev/null +++ b/DrissionPage/mix_element.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- +import re +from html import unescape +from time import sleep +from typing import Union + +from requests_html import Element +from selenium.webdriver.remote.webelement import WebElement +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from .config import global_tmp_path +from .session_page import _translate_loc + + +class MixElement(object): + def __init__(self, ele: Union[WebElement, Element]): + self._ele = ele + + @property + def ele(self): + return self._ele + + @property + def text(self): + if isinstance(self._ele, Element): + return unescape(self._ele.text).replace('\xa0', ' ') + else: + return unescape(self.attr('innerText')).replace('\xa0', ' ') + + @property + def html(self): + if isinstance(self._ele, Element): + html = unescape(self._ele.html).replace('\xa0', ' ') + r = re.match(r'<.*?>(.*)', html, flags=re.DOTALL) + return r.group(1) + else: + return unescape(self.attr('innerHTML')).replace('\xa0', ' ') + + @property + def tag_name(self): + if isinstance(self._ele, Element): + html = unescape(self._ele.html).replace('\xa0', ' ') + r = re.match(r'^<(.*?)\s+', html, flags=re.DOTALL) + return r.group(1) + else: + return self._ele.tag_name + + def attr(self, attr): + if isinstance(self._ele, Element): + try: + if attr == 'href': + # 如直接获取attr只能获取相对地址 + for link in self._ele.absolute_links: + return link + elif attr == 'class': + class_str = '' + for key, i in enumerate(self._ele.attrs['class']): + class_str += ' ' if key > 0 else '' + class_str += i + return class_str + else: + return self._ele.attrs[attr] + except: + return '' + else: + return self._ele.get_attribute(attr) + + def find(self, loc: tuple, mode: str = None, show_errmsg: bool = True): + """根据loc获取元素""" + if isinstance(self._ele, Element): + mode = mode if mode else 'single' + if mode not in ['single', 'all']: + raise ValueError("mode须在'single', 'all'中选择") + loc_by, loc_str = _translate_loc(loc) + msg = first = None + try: + if mode == 'single': + msg = '未找到元素' + first = True + elif mode == 'all': + msg = '未找到元素s' + first = False + if loc_by == 'xpath': + ele = self.ele.xpath(loc_str, first=first, _encoding='utf-8') + else: + ele = self.ele.find(loc_str, first=first, _encoding='utf-8') + return MixElement(ele) + except: + if show_errmsg: + print(msg, loc) + raise + else: # d模式 + mode = mode if mode else 'single' + if mode not in ['single', 'all', 'visible']: + raise ValueError("mode须在'single', 'all', 'visible'中选择") + msg = ele = None + try: + wait = WebDriverWait(self.ele.parent, timeout=10) + if mode == 'single': + msg = '未找到元素' + ele = wait.until(EC.presence_of_element_located(loc)) + elif mode == 'all': + msg = '未找到元素s' + ele = wait.until(EC.presence_of_all_elements_located(loc)) + elif mode == 'visible': + msg = '元素不可见或不存在' + ele = wait.until(EC.visibility_of_element_located(loc)) + return MixElement(ele) + except: + if show_errmsg: + print(msg, loc) + + def find_all(self, loc: tuple, show_errmsg: bool = True): + return self.find(loc, mode='all', show_errmsg=show_errmsg) + + def search(self, value: str, mode: str = None): + """根据内容获取元素""" + mode = mode if mode else 'single' + if mode not in ['single', 'all']: + raise ValueError("mode须在'single', 'all'中选择") + if isinstance(self._ele, Element): + try: + if mode == 'single': + ele = self.ele.xpath(f'.//*[contains(text(),"{value}")]', first=True) + return MixElement(ele) + elif mode == 'all': + eles = self.ele.xpath(f'.//*[contains(text(),"{value}")]') + return [MixElement(ele) for ele in eles] + except: + return None + else: # d模式 + try: + loc = 'xpath', f'.//*[contains(text(),"{value}")]' + wait = WebDriverWait(self.ele.parent, timeout=10) + if mode == 'single': + ele = wait.until(EC.presence_of_element_located(loc)) + return MixElement(ele) + elif mode == 'all': + eles = wait.until(EC.presence_of_all_elements_located(loc)) + return [MixElement(ele) for ele in eles] + except: + return None + + def search_all(self, value: str): + return self.search(value, mode='all') + + # -----------------以下为d模式独占------------------- + def click(self): + """点击""" + for _ in range(10): + try: + self.ele.click() + return True + except Exception as e: + print(e) + sleep(0.2) + # 若点击失败,用js方式点击 + print('用js点击') + try: + self.run_script('arguments[0].click()') + return True + except: + raise + + def input(self, value, clear: bool = True): + try: + if clear: + self.run_script("arguments[0].value=''") + self.ele.send_keys(value) + return True + except: + raise + + def run_script(self, script: str): + self.ele.parent.execute_script(script, self.ele) + + def submit(self): + self.ele.submit() + + def clear(self): + self.ele.clear() + + def is_selected(self): + return self.ele.is_selected() + + def is_enabled(self): + return self.ele.is_enabled() + + def is_displayed(self): + return self.ele.is_displayed() + + @property + def size(self): + return self.ele.size + + @property + def location(self): + return self.ele.location + + def screenshot(self, path: str = None, filename: str = None): + path = path if path else global_tmp_path + name = filename if filename else self.tag_name + # 等待元素加载完成 + if self.tag_name == 'img': + js = 'return arguments[0].complete && typeof arguments[0].naturalWidth ' \ + '!= "undefined" && arguments[0].naturalWidth > 0' + while not self.run_script(js): + pass + img_path = f'{path}\\{name}.png' + self.ele.screenshot(img_path) + return img_path + + def select(self, value: str): + pass + + def set_attr(self, attr, value): + """设置元素属性""" + try: + self.run_script(f"arguments[0].{attr} = '{value}';") + return True + except: + raise diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 9e0c106..bc11e8a 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -12,9 +12,9 @@ from requests_html import Element, HTMLSession from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement -from DrissionPage.drission import Drission -from DrissionPage.driver_page import DriverPage -from DrissionPage.session_page import SessionPage +from .drission import Drission +from .driver_page import DriverPage +from .session_page import SessionPage class Null(object): @@ -183,6 +183,18 @@ class MixPage(Null, SessionPage, DriverPage): elif self._mode == 'd': return super(SessionPage, self).find_all(loc, timeout=timeout, show_errmsg=show_errmsg) + def search(self, value: str, mode: str = None, timeout: float = 10): + if self._mode == 's': + return super().search(value, mode=mode) + elif self._mode == 'd': + return super(SessionPage, self).search(value, mode=mode, timeout=timeout) + + def search_all(self, value: str, timeout: float = 10): + if self._mode == 's': + return super().search_all(value) + elif self._mode == 'd': + return super(SessionPage, self).search_all(value, timeout=timeout) + def get_attr(self, loc_or_ele: Union[WebElement, Element, tuple], attr: str) -> str: """获取元素属性值""" if self._mode == 's': diff --git a/DrissionPage/mix_page_old.py b/DrissionPage/mix_page_old.py deleted file mode 100644 index 1b4b28f..0000000 --- a/DrissionPage/mix_page_old.py +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/env python -# -*- coding:utf-8 -*- -""" -旧版MixPage,已弃用 -在MixPage类中使用DriverPage和SessionPage对象,使用时根据模式调用相应对象的函数 -问题是须要在MixPage类中为这两个类中的函数写一一对应的调用函数 -新版中直接继承这两个类,只须要为这两个类共有的函数写调用函数即可 -""" -from abc import abstractmethod -from typing import Union -from urllib import parse - -from requests_html import Element -from selenium.webdriver.remote.webelement import WebElement - -from DrissionPage.drission import Drission -from DrissionPage.driver_page import DriverPage -from DrissionPage.session_page import SessionPage - - -class MixPage: - def __init__(self, drission: Drission, locs=None, mode='d'): - self._drission = drission - self._session = None - self._driver = None - self._session_page = None - self._driver_page = None - self._url = None - self._session_url = None - self._locs = locs - self._mode = mode - if mode == 's': - self._session_page = self.s_page - else: - self._driver_page = self.d_page - self._open_self_url() - - @abstractmethod - def _open_self_url(self): - pass - - @property - def mode(self): - return self._mode - - @mode.setter - def mode(self, value): - self._mode = value - - def change_mode(self, mode=None): - if mode == self.mode: - return - self.mode = 's' if self.mode == 'd' else 'd' - - @property - def drission(self): - return self._drission - - @property - def response(self): - return self.s_page.response - - @property - def session(self): - if self._session is None: - self._session = self._drission.session - return self._session - - @property - def driver(self): - if self._driver is None: - self._driver = self._drission.driver - return self._driver - - @property - def d_page(self): - if self._driver_page is None: - self._driver_page = DriverPage(self.driver) - if self._url: - self._init_page() - return self._driver_page - - @property - def s_page(self): - if self._session_page is None: - self._session_page = SessionPage(self.session) - if self._url: - self._init_page() - self.refresh_url() # 每次调用session页面时,使url和driver页面保持一致 - return self._session_page - - @property - def url(self): - if self.mode == 'd': - return self.d_page.url - else: - return self._url - - def _init_page(self): - if self._session_page: - self.cookies_to_driver(self._url) - self.d_page.get(self._url) - elif self._driver_page: - self.cookies_to_session() - self.s_page.get(self._url) - - def goto(self, url: str, url_data: dict = None): - """跳转到一个url""" - to_url = f'{url}?{parse.urlencode(url_data)}' if url_data else url - if self._url == to_url: - return - now_url = self._url - self._url = to_url - if self._driver_page: - if self._session_page: - self.cookies_to_driver(now_url) - self._driver_page.get(to_url, url_data) - if not self._session_page: - return self.check_driver_url() - if self._session_page: - self._session_url = to_url - if self._session_page: - self.cookies_to_session() - return self.s_page.goto(to_url, url_data) - - def check_driver_url(self) -> bool: - """由子类依据不同的页面自行实现""" - return True - - def refresh_url(self): - """使session的url与driver当前保持一致,并复制cookies到session""" - if self._driver and (self._url != self._driver.current_url or self._session_url != self._driver.current_url): - self._url = self._driver.current_url - self._session_url = self._driver.current_url - self.cookies_to_session() - self._session_page.get(self._url) - - def cookies_to_session(self): - self._drission.cookies_to_session() - - def cookies_to_driver(self, url=None): - u = url if url else self._url - self._drission.cookies_to_driver(u) - - # ----------------以下为共用函数----------------------- - def find(self, loc, timeout=10, show_errmsg=True) -> Union[WebElement, Element]: - if self._mode == 's': - return self.s_page.find(loc, show_errmsg) - elif self._mode == 'd': - return self.d_page.find(loc, timeout, show_errmsg) - - def find_all(self, loc, timeout=10, show_errmsg=True) -> list: - if self._mode == 's': - return self.s_page.find_all(loc, show_errmsg) - elif self._mode == 'd': - return self.d_page.find_all(loc, timeout, show_errmsg) - - def get_attr(self, loc_or_ele, attr) -> str: - if self._mode == 's': - return self.s_page.get_attr(loc_or_ele, attr) - elif self._mode == 'd': - return self.d_page.get_attr(loc_or_ele, attr) - - def get_html(self, loc_or_ele) -> str: - if self._mode == 's': - return self.s_page.get_html(loc_or_ele) - elif self._mode == 'd': - return self.d_page.get_html(loc_or_ele) - - def get_text(self, loc_or_ele) -> str: - if self._mode == 's': - return self.s_page.get_text(loc_or_ele) - elif self._mode == 'd': - return self.d_page.get_text(loc_or_ele) - - def get_source(self): - if self._mode == 's': - return self.s_page.get_html() - elif self._mode == 'd': - return self.d_page.get_html() - - def get_cookies(self): - if self._mode == 's': - return self.s_page.cookies - elif self._mode == 'd': - return self.d_page.cookies - - # ----------------以下为driver page专用函数----------------- - def input(self, loc_or_ele, value: str, clear=True) -> bool: - return self.d_page.input(loc_or_ele, value, clear) - - def click(self, loc_or_ele) -> bool: - return self.d_page.click(loc_or_ele) - - def set_attr(self, loc_or_ele, attribute: str, value: str) -> bool: - return self.d_page.set_attr(loc_or_ele, attribute, value) - - def run_script(self, loc_or_ele, script: str): - return self.d_page.run_script(loc_or_ele, script) - - def get_tabs_sum(self) -> int: - return self.d_page.get_tabs_sum() - - def get_tab_num(self) -> int: - return self.d_page.get_tab_num() - - def to_tab(self, index: int = 0): - return self.d_page.to_tab(index) - - def close_current_tab(self): - return self.d_page.close_current_tab() - - def close_other_tabs(self, tab_index: int = None): - return self.d_page.close_other_tabs(tab_index) - - def to_iframe(self, loc_or_ele): - return self.d_page.to_iframe(loc_or_ele) - - def get_screen(self, loc_or_ele, path: str, file_name: str = None) -> str: - return self.d_page.get_screen(loc_or_ele, path, file_name) - - def choose_select_list(self, loc_or_ele, text): - return self.d_page.choose_select_list(loc_or_ele, text) - - def refresh(self): - return self.d_page.refresh() - - def back(self): - return self.d_page.back() - - def set_window_size(self, x: int = None, y: int = None): - return self.d_page.set_window_size(x, y) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index c85b89e..99eedbf 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -11,7 +11,7 @@ from urllib import parse from requests_html import Element, HTMLSession, HTMLResponse -from DrissionPage.config import global_session_options +from .config import global_session_options def _translate_loc(loc): @@ -115,6 +115,23 @@ class SessionPage(object): """查找符合条件的所有元素""" return self.find(loc, mode='all', show_errmsg=True) + def search(self, value: str, mode: str = None): + mode = mode if mode else 'single' + if mode not in ['single', 'all']: + raise ValueError("mode须在'single', 'all'中选择") + try: + if mode == 'single': + ele = self.response.html.xpath(f'.//*[contains(text(),"{value}")]', first=True) + return ele + elif mode == 'all': + eles = self.response.html.xpath(f'.//*[contains(text(),"{value}")]') + return eles + except: + return None + + def search_all(self, value: str): + return self.search(value, mode='all') + def _get_ele(self, loc_or_ele: Union[Element, tuple]) -> Element: """获取loc或元素实例,返回元素实例""" # ====================================== diff --git a/README.md b/README.md index 5de3461..d71d298 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,7 @@ # DrissionPage #### 介绍 -一个整合了selenium和requests_html的模块,封装了常用页面操作,可实现两种模式的无缝切换。兼顾selenium的易用性和requests的高性能,也可直接用于PO模式。 - -#### 软件架构 -软件架构说明 +一个整合了selenium和requests_html的模块,封装了常用页面操作,可实现两种模式的无缝切换。兼顾selenium的易用性和requests的高性能,也可直接用于PO模式。 +适用于网页自动化,有效减少代码量。 -#### 安装教程 - -1. xxxx -2. xxxx -3. xxxx - -#### 使用说明 - -1. xxxx -2. xxxx -3. xxxx - -#### 参与贡献 - -1. Fork 本仓库 -2. 新建 Feat_xxx 分支 -3. 提交代码 -4. 新建 Pull Request - - -#### 码云特技 - -1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. 码云官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解码云上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是码云最有价值开源项目,是码云综合评定出的优秀开源项目 -5. 码云官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. 码云封面人物是一档用来展示码云会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..2ee2f9c --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- + +from setuptools import setup, find_packages + +setup( + name="DrissionPage", + version="1.0.0 Alpha", + author="g1879", + author_email="g1879@qq.com", + description="page contains selenium and requests.", + license="BSD", + keywords="DrissionPage", + url="https://gitee.com/g1879/DrissionPage", + packages=find_packages(), + long_description="Long descrition is actually short...", + classifiers=[ + "Development Status :: 1 - Alpha", + "Topic :: Utilities", + "License :: OSI Approved :: BSD License", + ], +) \ No newline at end of file