mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-11-29 18:47:34 +08:00
4.0.0b1对连接逻辑进行完全重构,'none'模式不主动停止网页且无视timeout;wait.new_tab()成功时返回id
This commit is contained in:
parent
341591edc6
commit
368665df57
@ -367,7 +367,7 @@ class BasePage(BaseParser):
|
||||
self.retry_times = 3
|
||||
self.retry_interval = 2
|
||||
self._DownloadKit = None
|
||||
self._download_path = str(Path('../..').absolute())
|
||||
self._download_path = str(Path('.').absolute())
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
|
@ -6,7 +6,7 @@
|
||||
from json import dumps, loads
|
||||
from queue import Queue, Empty
|
||||
from threading import Thread, Event
|
||||
from time import perf_counter, sleep
|
||||
from time import perf_counter
|
||||
|
||||
from requests import get
|
||||
from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \
|
||||
@ -56,8 +56,8 @@ class ChromiumDriver(object):
|
||||
message_json = dumps(message)
|
||||
|
||||
if self._debug:
|
||||
if self._debug is True or (
|
||||
isinstance(self._debug, str) and message.get('method', '').startswith(self._debug)):
|
||||
if self._debug is True or (isinstance(self._debug, str) and
|
||||
message.get('method', '').startswith(self._debug)):
|
||||
print(f'发> {message_json}')
|
||||
elif isinstance(self._debug, (list, tuple, set)):
|
||||
for m in self._debug:
|
||||
@ -74,17 +74,16 @@ class ChromiumDriver(object):
|
||||
|
||||
while not self._stopped.is_set():
|
||||
try:
|
||||
return self.method_results[message['id']].get_nowait()
|
||||
return self.method_results[message['id']].get(.2)
|
||||
|
||||
except Empty:
|
||||
if self.alert_flag:
|
||||
self.alert_flag = False
|
||||
return {'result': []}
|
||||
return {'error': {'message': 'alert exists.'}}
|
||||
|
||||
if timeout is not None and perf_counter() > timeout:
|
||||
return {'error': {'message': 'timeout'}}
|
||||
|
||||
sleep(.02)
|
||||
continue
|
||||
|
||||
except Exception:
|
||||
@ -138,7 +137,11 @@ class ChromiumDriver(object):
|
||||
|
||||
function = self.event_handlers.get(event['method'])
|
||||
if function:
|
||||
if self._debug:
|
||||
print(f'开始执行 {function.__name__}')
|
||||
function(**event['params'])
|
||||
if self._debug:
|
||||
print(f'执行 {function.__name__}完毕')
|
||||
|
||||
self.event_queue.task_done()
|
||||
|
||||
|
@ -376,7 +376,7 @@ class ChromiumElement(DrissionElement):
|
||||
def run_js(self, script, *args, as_expr=False):
|
||||
"""对本元素执行javascript代码
|
||||
:param script: js文本
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]...
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:return: 运行的结果
|
||||
"""
|
||||
@ -385,7 +385,7 @@ class ChromiumElement(DrissionElement):
|
||||
def run_async_js(self, script, *args, as_expr=False):
|
||||
"""以异步方式对本元素执行javascript代码
|
||||
:param script: js文本
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]...
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:return: None
|
||||
"""
|
||||
@ -841,7 +841,7 @@ class ChromiumShadowRoot(BaseElement):
|
||||
def run_js(self, script, *args, as_expr=False):
|
||||
"""运行javascript代码
|
||||
:param script: js文本
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]...
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:return: 运行的结果
|
||||
"""
|
||||
@ -850,7 +850,7 @@ class ChromiumShadowRoot(BaseElement):
|
||||
def run_async_js(self, script, *args, as_expr=False):
|
||||
"""以异步方式执行js代码
|
||||
:param script: js文本
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]...
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:return: None
|
||||
"""
|
||||
@ -1042,9 +1042,9 @@ class ChromiumShadowRoot(BaseElement):
|
||||
loc = loc[0], loc[1][5:]
|
||||
|
||||
timeout = timeout if timeout is not None else self.page.timeout
|
||||
t1 = perf_counter()
|
||||
end_time = perf_counter() + timeout
|
||||
eles = make_session_ele(self.html).eles(loc)
|
||||
while not eles and perf_counter() - t1 <= timeout:
|
||||
while not eles and perf_counter() <= end_time:
|
||||
eles = make_session_ele(self.html).eles(loc)
|
||||
|
||||
if not eles:
|
||||
@ -1299,7 +1299,7 @@ def run_js(page_or_ele, script, as_expr=False, timeout=None, args=None):
|
||||
:param script: js文本
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:param timeout: 超时时间
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]...
|
||||
:return: js执行结果
|
||||
"""
|
||||
if isinstance(page_or_ele, (ChromiumElement, ChromiumShadowRoot)):
|
||||
@ -1701,7 +1701,8 @@ class ChromiumScroll(object):
|
||||
x = r['layoutViewport']['pageX']
|
||||
y = r['layoutViewport']['pageY']
|
||||
|
||||
while True:
|
||||
end_time = perf_counter() + self._driver.page.timeout
|
||||
while perf_counter() < end_time:
|
||||
sleep(.1)
|
||||
r = page.run_cdp('Page.getLayoutMetrics')
|
||||
x1 = r['layoutViewport']['pageX']
|
||||
|
@ -24,8 +24,8 @@ from .._units.network_listener import NetworkListener
|
||||
from .._units.screencast import Screencast
|
||||
from .._units.setter import ChromiumBaseSetter
|
||||
from .._units.waiter import ChromiumBaseWaiter
|
||||
from ..errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \
|
||||
NoRectError, BrowserConnectError, GetDocumentError
|
||||
from ..errors import (ContextLossError, ElementLossError, CDPError, TabClosedError, NoRectError, BrowserConnectError,
|
||||
AlertExistsError)
|
||||
|
||||
|
||||
class ChromiumBase(BasePage):
|
||||
@ -41,14 +41,14 @@ class ChromiumBase(BasePage):
|
||||
self._is_loading = None
|
||||
self._root_id = None # object id
|
||||
self._debug = False
|
||||
self._debug_recorder = None
|
||||
self._set = None
|
||||
self._screencast = None
|
||||
self._actions = None
|
||||
self._listener = None
|
||||
self._has_alert = False
|
||||
self._ready_state = None
|
||||
|
||||
self._download_path = str(Path('../..').absolute())
|
||||
self._download_path = str(Path('.').absolute())
|
||||
|
||||
if isinstance(address, int) or (isinstance(address, str) and address.isdigit()):
|
||||
address = f'127.0.0.1:{address}'
|
||||
@ -76,7 +76,6 @@ class ChromiumBase(BasePage):
|
||||
:param tab_id: 要控制的标签页id,不指定默认为激活的
|
||||
:return: None
|
||||
"""
|
||||
self._first_run = True
|
||||
self._is_reading = False
|
||||
self._upload_list = None
|
||||
self._wait = None
|
||||
@ -89,9 +88,15 @@ class ChromiumBase(BasePage):
|
||||
if not tab_id:
|
||||
raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。')
|
||||
tab_id = tab_id[0]
|
||||
|
||||
self._driver_init(tab_id)
|
||||
self._get_document()
|
||||
self._first_run = False
|
||||
if self.ready_state == 'complete' and self._ready_state is None:
|
||||
self._get_document()
|
||||
self._ready_state = 'complete'
|
||||
|
||||
r = self.run_cdp('Page.getFrameTree')
|
||||
for i in findall(r"'id': '(.*?)'", str(r)):
|
||||
self.browser._frames[i] = self.tab_id
|
||||
|
||||
def _driver_init(self, tab_id):
|
||||
"""新建页面、页面刷新、切换标签页后要进行的cdp参数初始化
|
||||
@ -99,6 +104,7 @@ class ChromiumBase(BasePage):
|
||||
:return: None
|
||||
"""
|
||||
self._is_loading = True
|
||||
self._frame_id = tab_id
|
||||
self._driver = ChromiumDriver(tab_id=tab_id, tab_type='page', address=self.address)
|
||||
self._alert = Alert()
|
||||
self._driver.set_listener('Page.javascriptDialogOpening', self._on_alert_open)
|
||||
@ -108,59 +114,20 @@ class ChromiumBase(BasePage):
|
||||
self._driver.call_method('Page.enable')
|
||||
self._driver.call_method('Emulation.setFocusEmulationEnabled', enabled=True)
|
||||
|
||||
self._driver.set_listener('Page.frameStoppedLoading', self._onFrameStoppedLoading)
|
||||
self._driver.set_listener('Page.frameStartedLoading', self._onFrameStartedLoading)
|
||||
self._driver.set_listener('DOM.documentUpdated', self._onDocumentUpdated)
|
||||
self._driver.set_listener('Page.loadEventFired', self._onLoadEventFired)
|
||||
self._driver.set_listener('Page.frameNavigated', self._onFrameNavigated)
|
||||
self._driver.set_listener('Page.domContentEventFired', self._onDomContentEventFired)
|
||||
self._driver.set_listener('Page.loadEventFired', self._onLoadEventFired)
|
||||
self._driver.set_listener('Page.frameStoppedLoading', self._onFrameStoppedLoading)
|
||||
self._driver.set_listener('Page.frameAttached', self._onFrameAttached)
|
||||
self._driver.set_listener('Page.frameDetached', self._onFrameDetached)
|
||||
|
||||
def _get_document(self):
|
||||
"""刷新cdp使用的document数据"""
|
||||
if self._is_reading:
|
||||
return
|
||||
|
||||
self._is_reading = True
|
||||
|
||||
if self._debug:
|
||||
print('获取document')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '获取document', '开始'))
|
||||
|
||||
try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉
|
||||
self._wait_loaded()
|
||||
except TabClosedError:
|
||||
return
|
||||
|
||||
end_time = perf_counter() + 10
|
||||
while perf_counter() < end_time:
|
||||
try:
|
||||
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
||||
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId']
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{self._root_id}'))
|
||||
break
|
||||
|
||||
except CDPError as e:
|
||||
err = e
|
||||
if self._debug:
|
||||
print('重试获取document')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错'))
|
||||
|
||||
sleep(.1)
|
||||
|
||||
else:
|
||||
txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \
|
||||
f'报告网址:https://gitee.com/g1879/DrissionPage/issues'
|
||||
raise GetDocumentError(txt)
|
||||
|
||||
if self._debug:
|
||||
print('获取document结束')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '获取document', '结束'))
|
||||
|
||||
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
||||
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId']
|
||||
r = self.run_cdp('Page.getFrameTree')
|
||||
for i in findall(r"'id': '(.*?)'", str(r)):
|
||||
self.browser._frames[i] = self.tab_id
|
||||
@ -173,25 +140,18 @@ class ChromiumBase(BasePage):
|
||||
:param timeout: 超时时间
|
||||
:return: 是否成功,超时返回False
|
||||
"""
|
||||
timeout = timeout if timeout is not None else self.timeouts.page_load
|
||||
if self.page_load_strategy == 'none':
|
||||
return True
|
||||
|
||||
timeout = timeout if timeout is not None else self.timeouts.page_load
|
||||
end_time = perf_counter() + timeout
|
||||
while perf_counter() < end_time:
|
||||
state = self.ready_state
|
||||
if state is None: # 存在alert的情况
|
||||
return None
|
||||
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), 'waiting', state))
|
||||
|
||||
if state == 'complete':
|
||||
if self._ready_state == 'complete':
|
||||
return True
|
||||
elif self.page_load_strategy == 'eager' and state in ('interactive', 'complete'):
|
||||
self.stop_loading()
|
||||
return True
|
||||
elif self.page_load_strategy == 'none':
|
||||
elif self.page_load_strategy == 'eager' and self._ready_state in ('interactive', 'complete'):
|
||||
self.stop_loading()
|
||||
return True
|
||||
|
||||
sleep(.1)
|
||||
|
||||
self.stop_loading()
|
||||
@ -209,50 +169,44 @@ class ChromiumBase(BasePage):
|
||||
def _onFrameStartedLoading(self, **kwargs):
|
||||
"""页面开始加载时执行"""
|
||||
self.browser._frames[kwargs['frameId']] = self.tab_id
|
||||
if kwargs['frameId'] == self._target_id:
|
||||
if kwargs['frameId'] == self._frame_id:
|
||||
self._ready_state = 'loading'
|
||||
self._is_loading = True
|
||||
|
||||
if self._debug:
|
||||
print('页面开始加载 FrameStartedLoading')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStartedLoading'))
|
||||
print(f'frameStartedLoading {kwargs}')
|
||||
|
||||
def _onFrameNavigated(self, **kwargs):
|
||||
"""页面跳转时执行"""
|
||||
if kwargs['frame']['id'] == self._frame_id:
|
||||
self._ready_state = 'loading'
|
||||
self._is_loading = True
|
||||
if self._debug:
|
||||
print(f'FrameNavigated {kwargs}')
|
||||
|
||||
def _onDomContentEventFired(self, **kwargs):
|
||||
"""在页面刷新、变化后重新读取页面内容"""
|
||||
self._ready_state = 'interactive'
|
||||
if self.page_load_strategy == 'eager':
|
||||
self.run_cdp('Page.stopLoading')
|
||||
if self._debug:
|
||||
print(f'DomContentEventFired {kwargs}')
|
||||
|
||||
def _onLoadEventFired(self, **kwargs):
|
||||
"""在页面刷新、变化后重新读取页面内容"""
|
||||
self._ready_state = 'complete'
|
||||
if self._debug:
|
||||
print(f'LoadEventFired {kwargs}')
|
||||
# self._get_document()
|
||||
|
||||
def _onFrameStoppedLoading(self, **kwargs):
|
||||
"""页面加载完成后执行"""
|
||||
self.browser._frames[kwargs['frameId']] = self.tab_id
|
||||
if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading:
|
||||
if kwargs['frameId'] == self._frame_id:
|
||||
self._ready_state = 'complete'
|
||||
if self._debug:
|
||||
print('页面停止加载 FrameStoppedLoading')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStoppedLoading'))
|
||||
|
||||
print(f'FrameStoppedLoading {kwargs}')
|
||||
self._get_document()
|
||||
|
||||
def _onLoadEventFired(self, **kwargs):
|
||||
"""在页面刷新、变化后重新读取页面内容"""
|
||||
if self._debug:
|
||||
print('loadEventFired')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired'))
|
||||
|
||||
self._get_document()
|
||||
|
||||
def _onDocumentUpdated(self, **kwargs):
|
||||
"""页面跳转时执行"""
|
||||
if self._debug:
|
||||
print('documentUpdated')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '加载流程', 'documentUpdated'))
|
||||
|
||||
def _onFrameNavigated(self, **kwargs):
|
||||
"""页面跳转时执行"""
|
||||
if kwargs['frame'].get('parentId', None) == self._target_id and self._first_run is False and self._is_loading:
|
||||
self._is_loading = True
|
||||
if self._debug:
|
||||
print('navigated')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated'))
|
||||
|
||||
def _onFileChooserOpened(self, **kwargs):
|
||||
"""文件选择框打开时执行"""
|
||||
if self._upload_list:
|
||||
@ -344,14 +298,13 @@ class ChromiumBase(BasePage):
|
||||
|
||||
@property
|
||||
def ready_state(self):
|
||||
"""返回当前页面加载状态,'loading' 'interactive' 'complete',有弹出框时返回None"""
|
||||
while True:
|
||||
try:
|
||||
return self.run_cdp('Runtime.evaluate', expression='document.readyState;')['result']['value']
|
||||
except (AlertExistsError, TypeError):
|
||||
return None
|
||||
except ContextLossError:
|
||||
continue
|
||||
"""返回当前页面加载状态,'loading' 'interactive' 'complete','timeout' 表示可能有弹出框"""
|
||||
try:
|
||||
return self.run_cdp('Runtime.evaluate', expression='document.readyState;', _timeout=3)['result']['value']
|
||||
except ContextLossError:
|
||||
return None
|
||||
except TimeoutError:
|
||||
return 'timeout'
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
@ -439,9 +392,6 @@ class ChromiumBase(BasePage):
|
||||
:param cmd_args: 参数
|
||||
:return: 执行的结果
|
||||
"""
|
||||
# if self.driver.has_alert and cmd != HANDLE_ALERT_METHOD:
|
||||
# raise AlertExistsError
|
||||
|
||||
r = self.driver.call_method(cmd, **cmd_args)
|
||||
if ERROR not in r:
|
||||
return r
|
||||
@ -455,8 +405,10 @@ class ChromiumBase(BasePage):
|
||||
raise ElementLossError
|
||||
elif error == 'tab closed':
|
||||
raise TabClosedError
|
||||
elif error == 'alert exists':
|
||||
pass
|
||||
elif error == 'timeout':
|
||||
raise TimeoutError
|
||||
elif error == 'alert exists.':
|
||||
raise AlertExistsError
|
||||
elif error in ('Node does not have a layout object', 'Could not compute box model.'):
|
||||
raise NoRectError
|
||||
elif r['type'] == 'call_method_error':
|
||||
@ -476,7 +428,7 @@ class ChromiumBase(BasePage):
|
||||
def run_js(self, script, *args, as_expr=False):
|
||||
"""运行javascript代码
|
||||
:param script: js文本
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]...
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:return: 运行的结果
|
||||
"""
|
||||
@ -485,7 +437,7 @@ class ChromiumBase(BasePage):
|
||||
def run_js_loaded(self, script, *args, as_expr=False):
|
||||
"""运行javascript代码,执行前等待页面加载完毕
|
||||
:param script: js文本
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]...
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:return: 运行的结果
|
||||
"""
|
||||
@ -495,7 +447,7 @@ class ChromiumBase(BasePage):
|
||||
def run_async_js(self, script, *args, as_expr=False):
|
||||
"""以异步方式执行js代码
|
||||
:param script: js文本
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]...
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:return: None
|
||||
"""
|
||||
@ -691,11 +643,12 @@ class ChromiumBase(BasePage):
|
||||
"""页面停止加载"""
|
||||
if self._debug:
|
||||
print('停止页面加载')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '操作', '停止页面加载'))
|
||||
|
||||
self.run_cdp('Page.stopLoading')
|
||||
while self.ready_state not in ('complete', None):
|
||||
try:
|
||||
self.run_cdp('Page.stopLoading')
|
||||
except TabClosedError:
|
||||
pass
|
||||
end_time = perf_counter() + self.timeouts.page_load
|
||||
while self._ready_state != 'complete' and perf_counter() < end_time:
|
||||
sleep(.1)
|
||||
|
||||
def remove_ele(self, loc_or_ele):
|
||||
@ -881,32 +834,39 @@ class ChromiumBase(BasePage):
|
||||
"""
|
||||
err = None
|
||||
timeout = timeout if timeout is not None else self.timeouts.page_load
|
||||
|
||||
for t in range(times + 1):
|
||||
err = None
|
||||
result = self.run_cdp('Page.navigate', url=to_url)
|
||||
|
||||
is_timeout = self._wait_loaded(timeout)
|
||||
if is_timeout is None:
|
||||
return None
|
||||
is_timeout = not is_timeout
|
||||
self.wait.load_complete()
|
||||
|
||||
if is_timeout:
|
||||
end_time = perf_counter() + timeout
|
||||
result = self.run_cdp('Page.navigate', url=to_url, _timeout=timeout)
|
||||
if result.get('error') == 'timeout':
|
||||
err = TimeoutError('页面连接超时。')
|
||||
if 'errorText' in result:
|
||||
|
||||
elif 'errorText' in result:
|
||||
err = ConnectionError(result['errorText'])
|
||||
|
||||
if err:
|
||||
sleep(interval)
|
||||
if self._debug or show_errmsg:
|
||||
print(f'重试{t + 1} {to_url}')
|
||||
self.stop_loading()
|
||||
continue
|
||||
|
||||
if self.page_load_strategy == 'none':
|
||||
return True
|
||||
|
||||
yu = end_time - perf_counter()
|
||||
ok = self._wait_loaded(1 if yu <= 0 else yu)
|
||||
if not ok:
|
||||
err = TimeoutError('页面连接超时。')
|
||||
sleep(interval)
|
||||
if self._debug or show_errmsg:
|
||||
print(f'重试{t + 1} {to_url}')
|
||||
self.stop_loading()
|
||||
continue
|
||||
|
||||
if not err:
|
||||
break
|
||||
|
||||
if t < times:
|
||||
sleep(interval)
|
||||
while self.ready_state not in ('complete', None):
|
||||
sleep(.1)
|
||||
if self._debug or show_errmsg:
|
||||
print(f'重试{t + 1} {to_url}')
|
||||
|
||||
if err:
|
||||
if show_errmsg:
|
||||
raise err if err is not None else ConnectionError('连接异常。')
|
||||
|
@ -4,9 +4,7 @@
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from pathlib import Path
|
||||
from typing import Union, Tuple, List, Any
|
||||
|
||||
from DataRecorder import Recorder
|
||||
from typing import Union, Tuple, List, Any, Optional
|
||||
|
||||
from .._base.base import BasePage
|
||||
from .._base.browser import Browser
|
||||
@ -32,6 +30,7 @@ class ChromiumBase(BasePage):
|
||||
self._page: ChromiumPage = ...
|
||||
self.address: str = ...
|
||||
self._driver: ChromiumDriver = ...
|
||||
self._frame_id: str = ...
|
||||
self._is_reading: bool = ...
|
||||
self._timeouts: Timeout = ...
|
||||
self._first_run: bool = ...
|
||||
@ -41,7 +40,6 @@ class ChromiumBase(BasePage):
|
||||
self._url: str = ...
|
||||
self._root_id: str = ...
|
||||
self._debug: bool = ...
|
||||
self._debug_recorder: Recorder = ...
|
||||
self._upload_list: list = ...
|
||||
self._wait: ChromiumBaseWaiter = ...
|
||||
self._set: ChromiumBaseSetter = ...
|
||||
@ -50,6 +48,7 @@ class ChromiumBase(BasePage):
|
||||
self._listener: NetworkListener = ...
|
||||
self._alert: Alert = ...
|
||||
self._has_alert: bool = ...
|
||||
self._ready_state: Optional[str] = ...
|
||||
|
||||
def _connect_browser(self, tab_id: str = None) -> None: ...
|
||||
|
||||
@ -65,13 +64,13 @@ class ChromiumBase(BasePage):
|
||||
|
||||
def _onFrameStartedLoading(self, **kwargs): ...
|
||||
|
||||
def _onFrameStoppedLoading(self, **kwargs): ...
|
||||
def _onFrameNavigated(self, **kwargs): ...
|
||||
|
||||
def _onDomContentEventFired(self, **kwargs): ...
|
||||
|
||||
def _onLoadEventFired(self, **kwargs): ...
|
||||
|
||||
def _onDocumentUpdated(self, **kwargs): ...
|
||||
|
||||
def _onFrameNavigated(self, **kwargs): ...
|
||||
def _onFrameStoppedLoading(self, **kwargs): ...
|
||||
|
||||
def _onFileChooserOpened(self, **kwargs): ...
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from copy import copy
|
||||
from re import search
|
||||
from re import search, findall
|
||||
from threading import Thread
|
||||
from time import sleep, perf_counter
|
||||
|
||||
@ -14,7 +14,7 @@ from .._elements.chromium_element import ChromiumElement
|
||||
from .._pages.chromium_base import ChromiumBase, ChromiumPageScroll
|
||||
from .._units.setter import ChromiumFrameSetter
|
||||
from .._units.waiter import FrameWaiter
|
||||
from ..errors import ContextLossError
|
||||
from ..errors import ContextLossError, ElementLossError, GetDocumentError
|
||||
|
||||
|
||||
class ChromiumFrame(ChromiumBase):
|
||||
@ -40,6 +40,7 @@ class ChromiumFrame(ChromiumBase):
|
||||
self._backend_id = ele.ids.backend_id
|
||||
self._frame_ele = ele
|
||||
self._states = None
|
||||
self._ids = ChromiumFrameIds(self)
|
||||
|
||||
if self._is_inner_frame():
|
||||
self._is_diff_domain = False
|
||||
@ -50,9 +51,8 @@ class ChromiumFrame(ChromiumBase):
|
||||
super().__init__(page.address, self.frame_id, page.timeout)
|
||||
obj_id = super().run_js('document;', as_expr=True)['objectId']
|
||||
self.doc_ele = ChromiumElement(self, obj_id=obj_id)
|
||||
self._ids = ChromiumFrameIds(self)
|
||||
|
||||
end_time = perf_counter() + 2
|
||||
end_time = perf_counter() + 5
|
||||
while perf_counter() < end_time and self.url == 'about:blank':
|
||||
sleep(.1)
|
||||
|
||||
@ -92,28 +92,46 @@ class ChromiumFrame(ChromiumBase):
|
||||
except:
|
||||
get(f'http://{self.address}/json', headers={'Connection': 'close'})
|
||||
super()._driver_init(tab_id)
|
||||
self.driver.set_listener('Inspector.detached', self._onInspectorDetached)
|
||||
|
||||
def _reload(self):
|
||||
"""重新获取document"""
|
||||
debug = self._debug
|
||||
d_debug = self.driver._debug
|
||||
if debug:
|
||||
print('重新获取document')
|
||||
|
||||
self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id)
|
||||
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele.ids.backend_id)['node']
|
||||
|
||||
if self._is_inner_frame():
|
||||
self._is_diff_domain = False
|
||||
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
|
||||
super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout)
|
||||
self._debug = debug
|
||||
end_time = perf_counter() + self.timeout
|
||||
while perf_counter() < end_time:
|
||||
try:
|
||||
if self._is_inner_frame():
|
||||
self._is_diff_domain = False
|
||||
self.doc_ele = ChromiumElement(self._target_page,
|
||||
backend_id=node['contentDocument']['backendNodeId'])
|
||||
super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout)
|
||||
self._debug = debug
|
||||
self.driver._debug = d_debug
|
||||
else:
|
||||
self._is_diff_domain = True
|
||||
self._driver.stop()
|
||||
super().__init__(self.address, self.frame_id, self._target_page.timeout)
|
||||
obj_id = super().run_js('document;', as_expr=True)['objectId']
|
||||
self.doc_ele = ChromiumElement(self, obj_id=obj_id)
|
||||
self._debug = debug
|
||||
self.driver._debug = d_debug
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
sleep(.1)
|
||||
|
||||
else:
|
||||
self._is_diff_domain = True
|
||||
self._driver.stop()
|
||||
super().__init__(self.address, self.frame_id, self._target_page.timeout)
|
||||
obj_id = super().run_js('document;', as_expr=True)['objectId']
|
||||
self.doc_ele = ChromiumElement(self, obj_id=obj_id)
|
||||
self._debug = debug
|
||||
raise GetDocumentError
|
||||
|
||||
self.wait.load_complete()
|
||||
|
||||
def _check_ok(self):
|
||||
"""用于应付同域异域之间跳转导致元素丢失问题"""
|
||||
@ -122,7 +140,7 @@ class ChromiumFrame(ChromiumBase):
|
||||
|
||||
try:
|
||||
self._target_page.run_cdp('DOM.describeNode', nodeId=self.ids.node_id)
|
||||
except Exception:
|
||||
except ElementLossError:
|
||||
self._reload()
|
||||
# sleep(2)
|
||||
|
||||
@ -130,72 +148,42 @@ class ChromiumFrame(ChromiumBase):
|
||||
"""刷新cdp使用的document数据"""
|
||||
if self._is_reading:
|
||||
return
|
||||
|
||||
self._is_reading = True
|
||||
if self._is_diff_domain is False:
|
||||
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node']
|
||||
self.doc_ele = ChromiumElement(self._target_page,
|
||||
backend_id=node['contentDocument']['backendNodeId'])
|
||||
|
||||
if self._debug:
|
||||
print('---获取document')
|
||||
else:
|
||||
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
||||
self.doc_ele = ChromiumElement(self, backend_id=b_id)
|
||||
|
||||
end_time = perf_counter() + 3
|
||||
while self.is_alive and perf_counter() < end_time:
|
||||
try:
|
||||
if self._is_diff_domain is False:
|
||||
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node']
|
||||
self.doc_ele = ChromiumElement(self._target_page,
|
||||
backend_id=node['contentDocument']['backendNodeId'])
|
||||
|
||||
else:
|
||||
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
||||
self.doc_ele = ChromiumElement(self, backend_id=b_id)
|
||||
|
||||
break
|
||||
|
||||
except Exception:
|
||||
sleep(.1)
|
||||
|
||||
# else:
|
||||
# raise RuntimeError('获取document失败。')
|
||||
|
||||
if self._debug:
|
||||
print('---获取document结束')
|
||||
r = self.run_cdp('Page.getFrameTree')
|
||||
for i in findall(r"'id': '(.*?)'", str(r)):
|
||||
self.browser._frames[i] = self.tab_id
|
||||
|
||||
self._is_loading = False
|
||||
self._is_reading = False
|
||||
|
||||
def _onFrameNavigated(self, **kwargs):
|
||||
"""页面跳转时触发"""
|
||||
if kwargs['frame']['id'] == self.frame_id and self._first_run is False and self._is_loading:
|
||||
self._is_loading = True
|
||||
|
||||
if self._debug:
|
||||
print('navigated')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated'))
|
||||
|
||||
def _onLoadEventFired(self, **kwargs):
|
||||
"""在页面刷新、变化后重新读取页面内容"""
|
||||
# 用于覆盖父类方法,不能删
|
||||
self._get_new_document()
|
||||
|
||||
if self._debug:
|
||||
print('loadEventFired')
|
||||
if self._debug_recorder:
|
||||
self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired'))
|
||||
|
||||
def _onFrameStartedLoading(self, **kwargs):
|
||||
"""页面开始加载时触发"""
|
||||
if kwargs['frameId'] == self.frame_id:
|
||||
self._is_loading = True
|
||||
if self._debug:
|
||||
print('页面开始加载 FrameStartedLoading')
|
||||
|
||||
def _onFrameStoppedLoading(self, **kwargs):
|
||||
"""页面加载完成后触发"""
|
||||
if kwargs['frameId'] == self.frame_id and self._first_run is False and self._is_loading:
|
||||
self.browser._frames[kwargs['frameId']] = self.tab_id
|
||||
if kwargs['frameId'] == self.frame_id:
|
||||
self._ready_state = 'complete'
|
||||
if self._debug:
|
||||
print('页面停止加载 FrameStoppedLoading')
|
||||
print(f'FrameStoppedLoading {kwargs}')
|
||||
self._get_new_document()
|
||||
|
||||
def _onInspectorDetached(self, **kwargs):
|
||||
self._is_loading = True
|
||||
# print('reload')
|
||||
self._reload()
|
||||
|
||||
# def _onFrameDetached(self, **kwargs):
|
||||
# if kwargs['frameId'] == self.frame_id:
|
||||
# self._is_loading = True
|
||||
# self._reload()
|
||||
|
||||
@property
|
||||
def page(self):
|
||||
return self._page
|
||||
@ -387,7 +375,7 @@ class ChromiumFrame(ChromiumBase):
|
||||
def run_js(self, script, *args, as_expr=False):
|
||||
"""运行javascript代码
|
||||
:param script: js文本
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]...
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:return: 运行的结果
|
||||
"""
|
||||
@ -614,34 +602,43 @@ class ChromiumFrame(ChromiumBase):
|
||||
|
||||
for t in range(times + 1):
|
||||
err = None
|
||||
result = self.driver.call_method('Page.navigate', url=to_url, frameId=self.frame_id)
|
||||
|
||||
is_timeout = not self._wait_loaded(timeout)
|
||||
sleep(.5)
|
||||
self.wait.load_complete()
|
||||
|
||||
if is_timeout:
|
||||
end_time = perf_counter() + timeout
|
||||
result = self.driver.call_method('Page.navigate', url=to_url, frameId=self.frame_id, _timeout=timeout)
|
||||
if result.get('error') == 'timeout':
|
||||
err = TimeoutError('页面连接超时。')
|
||||
if 'errorText' in result:
|
||||
|
||||
elif 'errorText' in result:
|
||||
err = ConnectionError(result['errorText'])
|
||||
|
||||
if err:
|
||||
sleep(interval)
|
||||
if self._debug or show_errmsg:
|
||||
print(f'重试{t + 1} {to_url}')
|
||||
self.stop_loading()
|
||||
continue
|
||||
|
||||
if self.page_load_strategy == 'none':
|
||||
return True
|
||||
|
||||
yu = end_time - perf_counter()
|
||||
ok = self._wait_loaded(1 if yu <= 0 else yu)
|
||||
if not ok:
|
||||
err = TimeoutError('页面连接超时。')
|
||||
sleep(interval)
|
||||
if self._debug or show_errmsg:
|
||||
print(f'重试{t + 1} {to_url}')
|
||||
self.stop_loading()
|
||||
continue
|
||||
|
||||
if not err:
|
||||
break
|
||||
|
||||
if t < times:
|
||||
sleep(interval)
|
||||
while self.ready_state not in ('complete', None):
|
||||
sleep(.1)
|
||||
if self._debug:
|
||||
print('重试')
|
||||
if show_errmsg:
|
||||
print(f'重试 {to_url}')
|
||||
|
||||
if err:
|
||||
if show_errmsg:
|
||||
raise err if err is not None else ConnectionError('连接异常。')
|
||||
return False
|
||||
|
||||
self._check_ok()
|
||||
return True
|
||||
|
||||
def _is_inner_frame(self):
|
||||
|
@ -49,9 +49,9 @@ class ChromiumFrame(ChromiumBase):
|
||||
|
||||
def _get_new_document(self) -> None: ...
|
||||
|
||||
def _onFrameAttached(self, **kwargs): ...
|
||||
def _onFrameStoppedLoading(self, **kwargs): ...
|
||||
|
||||
def _onFrameDetached(self, **kwargs): ...
|
||||
def _onInspectorDetached(self, **kwargs): ...
|
||||
|
||||
@property
|
||||
def page(self) -> Union[ChromiumPage, WebPage]: ...
|
||||
|
@ -266,8 +266,8 @@ class DownloadMission(object):
|
||||
"""
|
||||
if show:
|
||||
print(f'url:{self.url}')
|
||||
t2 = perf_counter()
|
||||
while self.name is None and perf_counter() - t2 < 4:
|
||||
end_time = perf_counter()
|
||||
while self.name is None and perf_counter() < end_time:
|
||||
sleep(0.01)
|
||||
print(f'文件名:{self.name}')
|
||||
print(f'目标路径:{self.path}')
|
||||
|
@ -82,8 +82,12 @@ class ChromiumBaseWaiter(object):
|
||||
|
||||
def upload_paths_inputted(self):
|
||||
"""等待自动填写上传文件路径"""
|
||||
while self._driver._upload_list:
|
||||
end_time = perf_counter() + self._driver.timeout
|
||||
while perf_counter() < end_time:
|
||||
if not self._driver._upload_list:
|
||||
return True
|
||||
sleep(.01)
|
||||
return False
|
||||
|
||||
def download_begin(self, timeout=None, cancel_it=False):
|
||||
"""等待浏览器下载开始,可将其拦截
|
||||
@ -201,7 +205,7 @@ class ChromiumTabWaiter(ChromiumBaseWaiter):
|
||||
|
||||
else:
|
||||
end_time = perf_counter() + timeout
|
||||
while end_time > perf_counter():
|
||||
while perf_counter() < end_time:
|
||||
if not self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
|
||||
return True
|
||||
sleep(.5)
|
||||
@ -224,13 +228,14 @@ class ChromiumPageWaiter(ChromiumTabWaiter):
|
||||
"""等待新标签页出现
|
||||
:param timeout: 等待超时时间,为None则使用页面对象timeout属性
|
||||
:param raise_err: 等待失败时是否报错,为None时根据Settings设置
|
||||
:return: 是否等到新标签页出现
|
||||
:return: 等到新标签页返回其id,否则返回False
|
||||
"""
|
||||
timeout = timeout if timeout is not None else self._driver.timeout
|
||||
end_time = perf_counter() + timeout
|
||||
while perf_counter() < end_time:
|
||||
if self._driver.tab_id != self._driver.latest_tab:
|
||||
return True
|
||||
latest_tab = self._driver.latest_tab
|
||||
if self._driver.tab_id != latest_tab:
|
||||
return latest_tab
|
||||
sleep(.01)
|
||||
|
||||
if raise_err is True or Settings.raise_when_wait_failed is True:
|
||||
@ -251,7 +256,7 @@ class ChromiumPageWaiter(ChromiumTabWaiter):
|
||||
|
||||
else:
|
||||
end_time = perf_counter() + timeout
|
||||
while end_time > perf_counter():
|
||||
while perf_counter() < end_time:
|
||||
if not self._driver.browser._dl_mgr._missions:
|
||||
return True
|
||||
sleep(.5)
|
||||
|
@ -37,7 +37,7 @@ class ChromiumBaseWaiter(object):
|
||||
|
||||
def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ...
|
||||
|
||||
def upload_paths_inputted(self) -> None: ...
|
||||
def upload_paths_inputted(self) -> bool: ...
|
||||
|
||||
def download_begin(self, timeout: float = None, cancel_it: bool = False) -> Union[DownloadMission, bool]: ...
|
||||
|
||||
@ -62,7 +62,7 @@ class ChromiumTabWaiter(ChromiumBaseWaiter):
|
||||
class ChromiumPageWaiter(ChromiumTabWaiter):
|
||||
_driver: ChromiumPage = ...
|
||||
|
||||
def new_tab(self, timeout: float = None, raise_err: bool = None) -> bool: ...
|
||||
def new_tab(self, timeout: float = None, raise_err: bool = None) -> Union[str, bool]: ...
|
||||
|
||||
def all_downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ...
|
||||
|
||||
|
2
setup.py
2
setup.py
@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
|
||||
|
||||
setup(
|
||||
name="DrissionPage",
|
||||
version="4.0.0b0",
|
||||
version="4.0.0b1",
|
||||
author="g1879",
|
||||
author_email="g1879@qq.com",
|
||||
description="Python based web automation tool. It can control the browser and send and receive data packets.",
|
||||
|
Loading…
Reference in New Issue
Block a user