mirror of
https://gitee.com/dolphinscheduler/DolphinScheduler.git
synced 2024-11-30 11:17:54 +08:00
[Feature] Enable users to create python env from requirements.txt (#10658)
This commit is contained in:
parent
426567348e
commit
71f0168510
@ -26,7 +26,8 @@ Click [here](https://docs.conda.io/en/latest/) for more information about `conda
|
||||
|
||||
1. Use [Conda-Pack](https://conda.github.io/conda-pack/) to pack your conda environment into `tarball`.
|
||||
2. Upload packed conda environment to `resource center`.
|
||||
3. Select your packed conda environment as `resource` in your `jupyter task`, e.g. `jupyter_env.tar.gz`.
|
||||
3. Set `condaEnvName` as the name of your packed conda environment in your `jupyter task`, e.g. `jupyter_env.tar.gz`.
|
||||
4. Select your packed conda environment as `resource` in your `jupyter task`, e.g. `jupyter_env.tar.gz`.
|
||||
|
||||
> NOTE: Make sure you follow the [Conda-Pack](https://conda.github.io/conda-pack/) official instructions.
|
||||
> If you unpack your packed conda environment, the directory structure should be the same as below:
|
||||
@ -46,6 +47,55 @@ Click [here](https://docs.conda.io/en/latest/) for more information about `conda
|
||||
> `Jupyter Task Plugin` uses `source` command to activate your packed conda environment.
|
||||
> If you are concerned about using `source`, choose other options to manage your python dependency.
|
||||
|
||||
### Construct From Requirements
|
||||
|
||||
1. Upload or create a `.txt` file of requirements with your python dependencies in `Resource Center`.
|
||||
2. Set `condaEnvName` as the name of your file of requirements in your `jupyter task`, e.g. `requirements.txt`.
|
||||
3. Select your file of requirements as `resource` in your `jupyter task`, e.g. `requirements.txt`.
|
||||
|
||||
Here is an example file of requirements, from which `jupyter task plugin` will automatically
|
||||
construct your python dependencies, run your python code and finally tear down the environment:
|
||||
|
||||
```text
|
||||
fastjsonschema==2.15.3
|
||||
fonttools==4.33.3
|
||||
geojson==2.5.0
|
||||
identify==2.4.11
|
||||
idna==3.3
|
||||
importlib-metadata==4.11.3
|
||||
importlib-resources==5.7.1
|
||||
ipykernel==5.5.6
|
||||
ipython==8.2.0
|
||||
ipython-genutils==0.2.0
|
||||
jedi==0.18.1
|
||||
Jinja2==3.1.1
|
||||
json5==0.9.6
|
||||
jsonschema==4.4.0
|
||||
jupyter-client==7.3.0
|
||||
jupyter-core==4.10.0
|
||||
jupyter-server==1.17.0
|
||||
jupyterlab==3.3.4
|
||||
jupyterlab-pygments==0.2.2
|
||||
jupyterlab-server==2.13.0
|
||||
kiwisolver==1.4.2
|
||||
MarkupSafe==2.1.1
|
||||
matplotlib==3.5.2
|
||||
matplotlib-inline==0.1.3
|
||||
mistune==0.8.4
|
||||
nbclassic==0.3.7
|
||||
nbclient==0.6.0
|
||||
nbconvert==6.5.0
|
||||
nbformat==5.3.0
|
||||
nest-asyncio==1.5.5
|
||||
notebook==6.4.11
|
||||
notebook-shim==0.1.0
|
||||
numpy==1.22.3
|
||||
packaging==21.3
|
||||
pandas==1.4.2
|
||||
pandocfilters==1.5.0
|
||||
papermill==2.3.4
|
||||
```
|
||||
|
||||
## Create Task
|
||||
|
||||
- Click `Project Management-Project Name-Workflow Definition`, and click the `Create Workflow` button to enter the DAG editing page.
|
||||
|
@ -45,6 +45,55 @@
|
||||
> `Jupyter任务插件`使用`source`命令激活您打包的conda环境。
|
||||
> 若您对使用`source`命令有安全性上的担忧,请使用其他方法管理您的python依赖。
|
||||
|
||||
### 由依赖需求文本文件临时构建
|
||||
|
||||
1. 在`资源中心`创建或上传`.txt`格式的python依赖需求文本文件。
|
||||
2. 将`jupyter任务`中的`condaEnvName`参数设置成您的python依赖需求文本文件,如`requirements.txt`。
|
||||
3. 在您`jupyter任务`的`资源`中选取您的python依赖需求文本文件,如`requirements.txt`。
|
||||
|
||||
如下是一个依赖需求文本文件的样例,通过该文件,`jupyter任务插件`会自动构建您的python依赖,并执行您的python代码,
|
||||
执行完成后会自动释放临时构建的环境。
|
||||
|
||||
```text
|
||||
fastjsonschema==2.15.3
|
||||
fonttools==4.33.3
|
||||
geojson==2.5.0
|
||||
identify==2.4.11
|
||||
idna==3.3
|
||||
importlib-metadata==4.11.3
|
||||
importlib-resources==5.7.1
|
||||
ipykernel==5.5.6
|
||||
ipython==8.2.0
|
||||
ipython-genutils==0.2.0
|
||||
jedi==0.18.1
|
||||
Jinja2==3.1.1
|
||||
json5==0.9.6
|
||||
jsonschema==4.4.0
|
||||
jupyter-client==7.3.0
|
||||
jupyter-core==4.10.0
|
||||
jupyter-server==1.17.0
|
||||
jupyterlab==3.3.4
|
||||
jupyterlab-pygments==0.2.2
|
||||
jupyterlab-server==2.13.0
|
||||
kiwisolver==1.4.2
|
||||
MarkupSafe==2.1.1
|
||||
matplotlib==3.5.2
|
||||
matplotlib-inline==0.1.3
|
||||
mistune==0.8.4
|
||||
nbclassic==0.3.7
|
||||
nbclient==0.6.0
|
||||
nbconvert==6.5.0
|
||||
nbformat==5.3.0
|
||||
nest-asyncio==1.5.5
|
||||
notebook==6.4.11
|
||||
notebook-shim==0.1.0
|
||||
numpy==1.22.3
|
||||
packaging==21.3
|
||||
pandas==1.4.2
|
||||
pandocfilters==1.5.0
|
||||
papermill==2.3.4
|
||||
```
|
||||
|
||||
## 创建任务
|
||||
|
||||
- 点击项目管理-项目名称-工作流定义,点击"创建工作流"按钮,进入DAG编辑页面。
|
||||
|
@ -429,4 +429,12 @@ public class DateUtils {
|
||||
}
|
||||
return TimeZone.getTimeZone(timezoneId);
|
||||
}
|
||||
|
||||
/**
|
||||
* get timestamp in String
|
||||
* PowerMock 2.0.9 fails to mock System.currentTimeMillis(), this method helps in UT
|
||||
*/
|
||||
public static String getTimestampString() {
|
||||
return String.valueOf(System.currentTimeMillis());
|
||||
}
|
||||
}
|
||||
|
@ -23,6 +23,16 @@ public class JupyterConstants {
|
||||
throw new IllegalStateException("Utility class");
|
||||
}
|
||||
|
||||
/**
|
||||
* execution flag, ignore errors and keep executing till the end
|
||||
*/
|
||||
public static final String EXECUTION_FLAG = "set +e";
|
||||
|
||||
/**
|
||||
* new line symbol
|
||||
*/
|
||||
public static final String NEW_LINE_SYMBOL = "\n";
|
||||
|
||||
/**
|
||||
* conda init
|
||||
*/
|
||||
@ -40,11 +50,28 @@ public class JupyterConstants {
|
||||
"tar -xzf %s -C jupyter_env && " +
|
||||
"source jupyter_env/bin/activate";
|
||||
|
||||
/**
|
||||
* create and activate tmp conda env from txt
|
||||
*/
|
||||
public static final String CREATE_ENV_FROM_TXT = "conda create -n jupyter-tmp-env-%s -y && " +
|
||||
"conda activate jupyter-tmp-env-%s && " +
|
||||
"pip install -r %s";
|
||||
|
||||
/**
|
||||
* remove tmp conda env
|
||||
*/
|
||||
public static final String REMOVE_ENV = "conda deactivate && conda remove --name jupyter-tmp-env-%s --all -y";
|
||||
|
||||
/**
|
||||
* file suffix tar.gz
|
||||
*/
|
||||
public static final String TAR_SUFFIX = ".tar.gz";
|
||||
|
||||
/**
|
||||
* file suffix .txt
|
||||
*/
|
||||
public static final String TXT_SUFFIX = ".txt";
|
||||
|
||||
/**
|
||||
* jointer to combine two command
|
||||
*/
|
||||
|
@ -28,6 +28,7 @@ import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters
|
||||
import org.apache.dolphinscheduler.plugin.task.api.parser.ParamUtils;
|
||||
import org.apache.dolphinscheduler.plugin.task.api.parser.ParameterUtils;
|
||||
import org.apache.dolphinscheduler.plugin.task.api.utils.MapUtils;
|
||||
import org.apache.dolphinscheduler.spi.utils.DateUtils;
|
||||
import org.apache.dolphinscheduler.spi.utils.JSONUtils;
|
||||
import org.apache.dolphinscheduler.spi.utils.PropertyUtils;
|
||||
import org.apache.dolphinscheduler.spi.utils.StringUtils;
|
||||
@ -104,12 +105,20 @@ public class JupyterTask extends AbstractTaskExecutor {
|
||||
*/
|
||||
List<String> args = new ArrayList<>();
|
||||
final String condaPath = PropertyUtils.getString(TaskConstants.CONDA_PATH);
|
||||
final String timestamp = DateUtils.getTimestampString();
|
||||
String condaEnvName = jupyterParameters.getCondaEnvName();
|
||||
if (condaEnvName.endsWith(JupyterConstants.TXT_SUFFIX)) {
|
||||
args.add(JupyterConstants.EXECUTION_FLAG);
|
||||
args.add(JupyterConstants.NEW_LINE_SYMBOL);
|
||||
}
|
||||
|
||||
args.add(JupyterConstants.CONDA_INIT);
|
||||
args.add(condaPath);
|
||||
args.add(JupyterConstants.JOINTER);
|
||||
String condaEnvName = jupyterParameters.getCondaEnvName();
|
||||
if (condaEnvName.endsWith(JupyterConstants.TAR_SUFFIX)) {
|
||||
args.add(String.format(JupyterConstants.CREATE_ENV_FROM_TAR, condaEnvName));
|
||||
} else if (condaEnvName.endsWith(JupyterConstants.TXT_SUFFIX)) {
|
||||
args.add(String.format(JupyterConstants.CREATE_ENV_FROM_TXT, timestamp, timestamp, condaEnvName));
|
||||
} else {
|
||||
args.add(JupyterConstants.CONDA_ACTIVATE);
|
||||
args.add(jupyterParameters.getCondaEnvName());
|
||||
@ -126,10 +135,17 @@ public class JupyterTask extends AbstractTaskExecutor {
|
||||
// populate jupyter options
|
||||
args.addAll(populateJupyterOptions());
|
||||
|
||||
// remove tmp conda env, if created from requirements.txt
|
||||
if (condaEnvName.endsWith(JupyterConstants.TXT_SUFFIX)) {
|
||||
args.add(JupyterConstants.NEW_LINE_SYMBOL);
|
||||
args.add(String.format(JupyterConstants.REMOVE_ENV, timestamp));
|
||||
}
|
||||
|
||||
// replace placeholder, and combining local and global parameters
|
||||
Map<String, Property> paramsMap = taskExecutionContext.getPrepareParamsMap();
|
||||
|
||||
String command = ParameterUtils.convertParameterPlaceholders(String.join(" ", args), ParamUtils.convert(paramsMap));
|
||||
String command = ParameterUtils
|
||||
.convertParameterPlaceholders(String.join(" ", args), ParamUtils.convert(paramsMap));
|
||||
|
||||
logger.info("jupyter task command: {}", command);
|
||||
|
||||
|
@ -19,8 +19,8 @@ package org.apache.dolphinscheduler.plugin.task.jupyter;
|
||||
|
||||
|
||||
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
|
||||
import org.apache.dolphinscheduler.spi.utils.DateUtils;
|
||||
import org.apache.dolphinscheduler.spi.utils.JSONUtils;
|
||||
|
||||
import org.apache.dolphinscheduler.spi.utils.PropertyUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
@ -30,8 +30,6 @@ import org.powermock.core.classloader.annotations.PowerMockIgnore;
|
||||
import org.powermock.core.classloader.annotations.PrepareForTest;
|
||||
import org.powermock.core.classloader.annotations.SuppressStaticInitializationFor;
|
||||
import org.powermock.modules.junit4.PowerMockRunner;
|
||||
import org.apache.dolphinscheduler.plugin.task.api.TaskConstants;
|
||||
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.powermock.api.mockito.PowerMockito.spy;
|
||||
import static org.powermock.api.mockito.PowerMockito.when;
|
||||
@ -40,6 +38,7 @@ import static org.powermock.api.mockito.PowerMockito.when;
|
||||
@PrepareForTest({
|
||||
JSONUtils.class,
|
||||
PropertyUtils.class,
|
||||
DateUtils.class
|
||||
})
|
||||
@PowerMockIgnore({"javax.*"})
|
||||
@SuppressStaticInitializationFor("org.apache.dolphinscheduler.spi.utils.PropertyUtils")
|
||||
@ -99,6 +98,39 @@ public class JupyterTaskTest {
|
||||
"--progress-bar");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuildJupyterCommandWithRequirements() throws Exception {
|
||||
String parameters = buildJupyterCommandWithRequirements();
|
||||
TaskExecutionContext taskExecutionContext = PowerMockito.mock(TaskExecutionContext.class);
|
||||
when(taskExecutionContext.getTaskParams()).thenReturn(parameters);
|
||||
PowerMockito.mockStatic(PropertyUtils.class);
|
||||
when(PropertyUtils.getString(any())).thenReturn("/opt/anaconda3/etc/profile.d/conda.sh");
|
||||
PowerMockito.mockStatic(DateUtils.class);
|
||||
when(DateUtils.getTimestampString()).thenReturn("123456789");
|
||||
JupyterTask jupyterTask = spy(new JupyterTask(taskExecutionContext));
|
||||
jupyterTask.init();
|
||||
Assert.assertEquals(jupyterTask.buildCommand(),
|
||||
"set +e \n " +
|
||||
"source /opt/anaconda3/etc/profile.d/conda.sh && " +
|
||||
"conda create -n jupyter-tmp-env-123456789 -y && " +
|
||||
"conda activate jupyter-tmp-env-123456789 && " +
|
||||
"pip install -r requirements.txt && " +
|
||||
"papermill " +
|
||||
"/test/input_note.ipynb " +
|
||||
"/test/output_note.ipynb " +
|
||||
"--parameters city Shanghai " +
|
||||
"--parameters factor 0.01 " +
|
||||
"--kernel python3 " +
|
||||
"--engine default_engine " +
|
||||
"--execution-timeout 10 " +
|
||||
"--start-timeout 3 " +
|
||||
"--version " +
|
||||
"--inject-paths " +
|
||||
"--progress-bar \n " +
|
||||
"conda deactivate && conda remove --name jupyter-tmp-env-123456789 --all -y"
|
||||
);
|
||||
}
|
||||
|
||||
private String buildJupyterCommandWithLocalEnv() {
|
||||
JupyterParameters jupyterParameters = new JupyterParameters();
|
||||
jupyterParameters.setCondaEnvName("jupyter-lab");
|
||||
@ -127,4 +159,18 @@ public class JupyterTaskTest {
|
||||
return JSONUtils.toJsonString(jupyterParameters);
|
||||
}
|
||||
|
||||
private String buildJupyterCommandWithRequirements() {
|
||||
JupyterParameters jupyterParameters = new JupyterParameters();
|
||||
jupyterParameters.setCondaEnvName("requirements.txt");
|
||||
jupyterParameters.setInputNotePath("/test/input_note.ipynb");
|
||||
jupyterParameters.setOutputNotePath("/test/output_note.ipynb");
|
||||
jupyterParameters.setParameters("{\"city\": \"Shanghai\", \"factor\": \"0.01\"}");
|
||||
jupyterParameters.setKernel("python3");
|
||||
jupyterParameters.setEngine("default_engine");
|
||||
jupyterParameters.setExecutionTimeout("10");
|
||||
jupyterParameters.setStartTimeout("3");
|
||||
jupyterParameters.setOthers("--version");
|
||||
return JSONUtils.toJsonString(jupyterParameters);
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user