From b1b57c8fb710e6641443383b305e909d72eb1141 Mon Sep 17 00:00:00 2001 From: chenrj <102030622+xdu-chenrj@users.noreply.github.com> Date: Thu, 22 Sep 2022 14:43:00 +0800 Subject: [PATCH] [feat][python] new resource plugin GitHub and gitlab (#11831) Co-authored-by: Jiajie Zhong Co-authored-by: JieguangZhou Co-authored-by: Devosend Co-authored-by: juzimao <578961953@qq.com> --- .../docs/source/resources_plugin/develop.rst | 4 +- .../docs/source/resources_plugin/github.rst | 35 ++++ .../docs/source/resources_plugin/index.rst | 3 +- .../resources_plugin/resource-plugin.rst | 6 +- .../core/resource_plugin.py | 11 +- .../resources_plugin/__init__.py | 2 + .../resources_plugin/base/__init__.py | 18 ++ .../resources_plugin/base/git.py | 91 ++++++++ .../resources_plugin/github.py | 103 +++++++++ .../tests/resources_plugin/test_github.py | 195 ++++++++++++++++++ .../resources_plugin/test_resource_plugin.py | 75 +++++++ 11 files changed, 536 insertions(+), 7 deletions(-) create mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst create mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py create mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py create mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py create mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py create mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst index 9e112b240a..e7d90ea03c 100644 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst @@ -20,7 +20,7 @@ How to develop When you want to create a new resource plugin, you need to add a new class in the module `resources_plugin`. -The resource plug-in class needs to inherit the abstract class `ResourcePlugin` and implement its abstract method `read_file` function. +The resource plugin class needs to inherit the abstract class `ResourcePlugin` and implement its abstract method `read_file` function. The parameter of the `__init__` function of `ResourcePlugin` is the prefix of STR type. You can override this function when necessary. @@ -37,7 +37,7 @@ Example - Method `read_file`: Get content from the given URI, The function parameter is the suffix of the file path. -The file prefix has been initialized in init of the resource plug-in. +The file prefix has been initialized in init of the resource plugin. The prefix plus suffix is the absolute path of the file in this resource. diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst new file mode 100644 index 0000000000..16febd4fcb --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst @@ -0,0 +1,35 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +GitHub +====== + +`GitHub` is a github resource plugin for pydolphinscheduler. + +When using a github resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, +such as `resource_plugin=GitHub(prefix="https://github.com/xxx", access_token="ghpxx")`. +The token parameter is optional. You need to add it when your warehouse is a private repository. + +You can view this `document `_ +when creating a token. + +For the specific use of resource plugins, you can see `How to use` in :doc:`resource-plugin` + +Dive Into +--------- + +.. automodule:: pydolphinscheduler.resources_plugin.github \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst index 05a7ebd94a..1110cf315f 100644 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst @@ -25,4 +25,5 @@ In this section develop resource-plugin - local \ No newline at end of file + local + github \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst index 0b90eeecbf..bdd7dc3a35 100644 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst @@ -55,10 +55,10 @@ It is an abstract function. You must rewrite it How to use ---------- -Resource plug-ins can be used in task subclasses and workflows. You can use the resource plug-ins by adding the `resource_plugin` parameter when they are initialized. -For example, local resource plug-ins, add `resource_plugin = Local("/tmp")`. +Resource plugin can be used in task subclasses and workflows. You can use the resource plugin by adding the `resource_plugin` parameter when they are initialized. +For example, local resource plugin, add `resource_plugin = Local("/tmp")`. -The resource plug-ins we currently support is `local`. +The resource plugin we currently support are `local`, `github`. Here is an example. diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py index 457a7c27b7..8b500d165f 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py @@ -17,9 +17,10 @@ """DolphinScheduler ResourcePlugin object.""" - from abc import ABCMeta, abstractmethod +from pydolphinscheduler.exceptions import PyResPluginException + # [start resource_plugin_definition] class ResourcePlugin(object, metaclass=ABCMeta): @@ -45,5 +46,13 @@ class ResourcePlugin(object, metaclass=ABCMeta): # [end abstractmethod read_file] + def get_index(self, s: str, x, n): + """Find the subscript of the nth occurrence of the X character in the string s.""" + if n <= s.count(x): + all_index = [key for key, value in enumerate(s) if value == x] + return all_index[n - 1] + else: + raise PyResPluginException("Incomplete path.") + # [end resource_plugin_definition] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py index b6bc7a5ffb..110988401f 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py @@ -16,8 +16,10 @@ # under the License. """Init resources_plugin package.""" +from pydolphinscheduler.resources_plugin.github import GitHub from pydolphinscheduler.resources_plugin.local import Local __all__ = [ "Local", + "GitHub", ] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py new file mode 100644 index 0000000000..4253cda64d --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Init base package.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py new file mode 100644 index 0000000000..a36ab50547 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""DolphinScheduler GitFileInfo and Git object.""" + +from abc import ABCMeta, abstractmethod +from typing import Optional + + +class GitFileInfo: + """A class that defines the details of GIT files. + + :param user: A string representing the user the git file belongs to. + :param repo_name: A string representing the repository to which the git file belongs. + :param branch: A string representing the branch to which the git file belongs. + :param file_path: A string representing the git file path. + """ + + def __init__( + self, + user: Optional[str] = None, + repo_name: Optional[str] = None, + branch: Optional[str] = None, + file_path: Optional[str] = None, + *args, + **kwargs + ): + self.user = user + self.repo_name = repo_name + self.branch = branch + self.file_path = file_path + + +class GitHubFileInfo(GitFileInfo): + """A class that defines the details of GitHub files. + + :param user: A string representing the user the GitHub file belongs to. + :param repo_name: A string representing the repository to which the GitHub file belongs. + :param branch: A string representing the branch to which the GitHub file belongs. + :param file_path: A string representing the GitHub file path. + """ + + def __init__( + self, + user: Optional[str] = None, + repo_name: Optional[str] = None, + branch: Optional[str] = None, + file_path: Optional[str] = None, + *args, + **kwargs + ): + super().__init__( + user=user, + repo_name=repo_name, + branch=branch, + file_path=file_path, + *args, + **kwargs + ) + + +# [start Git] +class Git(object, metaclass=ABCMeta): + """An abstract class of online code warehouse based on git implementation.""" + + _git_file_info: Optional[GitFileInfo] = None + + # [start abstractmethod git_file_info] + @abstractmethod + def get_git_file_info(self, path: str): + """Get the detailed information of GIT file according to the file URL.""" + raise NotImplementedError + + # [end abstractmethod git_file_info] + + +# [end Git] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py new file mode 100644 index 0000000000..95a43a8ce4 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""DolphinScheduler github resource plugin.""" +import base64 +from typing import Optional +from urllib.parse import urljoin + +import requests + +from pydolphinscheduler.core.resource_plugin import ResourcePlugin +from pydolphinscheduler.resources_plugin.base.git import Git, GitHubFileInfo + + +class GitHub(ResourcePlugin, Git): + """GitHub resource plugin, a plugin for task and workflow to dolphinscheduler to read resource. + + :param prefix: A string representing the prefix of GitHub. + :param access_token: A string used for identity authentication of GitHub private repository. + """ + + def __init__( + self, prefix: str, access_token: Optional[str] = None, *args, **kwargs + ): + super().__init__(prefix, *args, **kwargs) + self.access_token = access_token + + def build_req_api( + self, + user: str, + repo_name: str, + file_path: str, + api: str, + ): + """Build request file content API.""" + api = api.replace("{user}", user) + api = api.replace("{repo_name}", repo_name) + api = api.replace("{file_path}", file_path) + return api + + def get_git_file_info(self, path: str): + """Get file information from the file url, like repository name, user, branch, and file path.""" + elements = path.split("/") + index = self.get_index(path, "/", 7) + index = index + 1 + file_info = GitHubFileInfo( + user=elements[3], + repo_name=elements[4], + branch=elements[6], + file_path=path[index:], + ) + self._git_file_info = file_info + + def get_req_url(self): + """Build request URL according to file information.""" + return self.build_req_api( + user=self._git_file_info.user, + repo_name=self._git_file_info.repo_name, + file_path=self._git_file_info.file_path, + api="https://api.github.com/repos/{user}/{repo_name}/contents/{file_path}", + ) + + def read_file(self, suf: str): + """Get the content of the file. + + The address of the file is the prefix of the resource plugin plus the parameter suf. + """ + path = urljoin(self.prefix, suf) + return self.req(path) + + def req(self, path: str): + """Send HTTP request, parse response data, and get file content.""" + headers = { + "Content-Type": "application/json; charset=utf-8", + } + if self.access_token is not None: + headers.setdefault("Authorization", "Bearer %s" % self.access_token) + self.get_git_file_info(path) + response = requests.get( + headers=headers, + url=self.get_req_url(), + params={"ref": self._git_file_info.branch}, + ) + if response.status_code == requests.codes.ok: + json_response = response.json() + content = base64.b64decode(json_response["content"]) + return content.decode("utf-8") + else: + raise Exception(response.json()) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py new file mode 100644 index 0000000000..1f1a631649 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py @@ -0,0 +1,195 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Test github resource plugin.""" +from unittest.mock import PropertyMock, patch + +import pytest + +from pydolphinscheduler.resources_plugin import GitHub +from pydolphinscheduler.resources_plugin.base.git import GitFileInfo + + +@pytest.mark.parametrize( + "attr, expected", + [ + ( + { + "user": "apache", + "repo_name": "dolphinscheduler", + "file_path": "script/install.sh", + "api": "https://api.github.com/repos/{user}/{repo_name}/contents/{file_path}", + }, + "https://api.github.com/repos/apache/dolphinscheduler/contents/script/install.sh", + ), + ], +) +def test_github_build_req_api(attr, expected): + """Test the build_req_api function of the github resource plug-in.""" + github = GitHub(prefix="prefix") + assert expected == github.build_req_api(**attr) + + +@pytest.mark.parametrize( + "attr, expected", + [ + ( + "https://github.com/apache/dolphinscheduler/blob/dev/script/install.sh", + { + "user": "apache", + "repo_name": "dolphinscheduler", + "branch": "dev", + "file_path": "script/install.sh", + }, + ), + ( + "https://github.com/apache/dolphinscheduler/blob/master/pom.xml", + { + "user": "apache", + "repo_name": "dolphinscheduler", + "branch": "master", + "file_path": "pom.xml", + }, + ), + ( + "https://github.com/apache/dolphinscheduler/blob/1.3.9-release/docker/build/startup.sh", + { + "user": "apache", + "repo_name": "dolphinscheduler", + "branch": "1.3.9-release", + "file_path": "docker/build/startup.sh", + }, + ), + ], +) +def test_github_get_git_file_info(attr, expected): + """Test the get_git_file_info function of the github resource plug-in.""" + github = GitHub(prefix="prefix") + github.get_git_file_info(attr) + assert expected == github._git_file_info.__dict__ + + +@pytest.mark.parametrize( + "attr, expected", + [ + ( + ( + { + "user": "apache", + "repo_name": "dolphinscheduler", + "file_path": "docker/build/startup.sh", + } + ), + "https://api.github.com/repos/apache/dolphinscheduler/contents/docker/build/startup.sh", + ), + ( + ( + { + "user": "apache", + "repo_name": "dolphinscheduler", + "file_path": "pom.xml", + } + ), + "https://api.github.com/repos/apache/dolphinscheduler/contents/pom.xml", + ), + ( + ( + { + "user": "apache", + "repo_name": "dolphinscheduler", + "file_path": "script/create-dolphinscheduler.sh", + } + ), + "https://api.github.com/repos/apache/dolphinscheduler/contents/script/create-dolphinscheduler.sh", + ), + ], +) +@patch( + "pydolphinscheduler.resources_plugin.github.GitHub._git_file_info", + new_callable=PropertyMock, +) +def test_github_get_req_url(m_git_file_info, attr, expected): + """Test the get_req_url function of the github resource plug-in.""" + github = GitHub(prefix="prefix") + m_git_file_info.return_value = GitFileInfo(**attr) + assert expected == github.get_req_url() + + +@pytest.mark.parametrize( + "attr, expected", + [ + ( + { + "init": {"prefix": "prefix", "access_token": "access_token"}, + "file_path": "github_resource_plugin.sh", + "file_content": "github resource plugin", + }, + "github resource plugin", + ), + ( + { + "init": { + "prefix": "prefix", + }, + "file_path": "github_resource_plugin.sh", + "file_content": "github resource plugin", + }, + "github resource plugin", + ), + ], +) +@patch("pydolphinscheduler.resources_plugin.github.GitHub.req") +def test_github_read_file(m_req, attr, expected): + """Test the read_file function of the github resource plug-in.""" + github = GitHub(**attr.get("init")) + m_req.return_value = attr.get("file_content") + assert expected == github.read_file(attr.get("file_path")) + + +@pytest.mark.skip(reason="Lack of test environment, need stable repository") +@pytest.mark.parametrize( + "attr, expected", + [ + ( + "https://github.com/apache/dolphinscheduler/blob/dev/lombok.config", + "#\n" + "# Licensed to the Apache Software Foundation (ASF) under one or more\n" + "# contributor license agreements. See the NOTICE file distributed with\n" + "# this work for additional information regarding copyright ownership.\n" + "# The ASF licenses this file to You under the Apache License, Version 2.0\n" + '# (the "License"); you may not use this file except in compliance with\n' + "# the License. You may obtain a copy of the License at\n" + "#\n" + "# http://www.apache.org/licenses/LICENSE-2.0\n" + "#\n" + "# Unless required by applicable law or agreed to in writing, software\n" + '# distributed under the License is distributed on an "AS IS" BASIS,\n' + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" + "# See the License for the specific language governing permissions and\n" + "# limitations under the License.\n" + "#\n" + "\n" + "lombok.addLombokGeneratedAnnotation = true\n", + ), + ], +) +def test_github_req(attr, expected): + """Test the req function of the github resource plug-in.""" + github = GitHub( + prefix="prefix", + ) + assert expected == github.req(attr) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py new file mode 100644 index 0000000000..63e619a600 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Test abstract class resource_plugin.""" + +import pytest + +from pydolphinscheduler.exceptions import PyResPluginException +from pydolphinscheduler.resources_plugin import GitHub + + +@pytest.mark.parametrize( + "attr, expected", + [ + ( + { + "s": "https://api.github.com/repos/apache/dolphinscheduler/contents/script/install.sh", + "x": "/", + "n": 2, + }, + 7, + ), + ( + { + "s": "https://api.github.com", + "x": ":", + "n": 1, + }, + 5, + ), + ], +) +def test_github_get_index(attr, expected): + """Test the get_index function of the abstract class resource_plugin.""" + github = GitHub(prefix="prefix") + assert expected == github.get_index(**attr) + + +@pytest.mark.parametrize( + "attr", + [ + { + "s": "https://api.github.com", + "x": "/", + "n": 3, + }, + { + "s": "https://api.github.com/", + "x": "/", + "n": 4, + }, + ], +) +def test_github_get_index_exception(attr): + """Test exception to get_index function of abstract class resource_plugin.""" + with pytest.raises( + PyResPluginException, + match="Incomplete path.", + ): + github = GitHub(prefix="prefix") + github.get_index(**attr)