[Improvement][SQL Task]use default sql segment separator (#10869)

This commit is contained in:
zhuxt2015 2022-09-26 11:08:57 +08:00 committed by GitHub
parent 64f4cb4f3e
commit e6832220c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 51 additions and 110 deletions

View File

@ -9,4 +9,4 @@ This document records the incompatible updates between each version. You need to
## 3.0.0
* Copy and import workflow without 'copy' suffix [#10607](https://github.com/apache/dolphinscheduler/pull/10607)
* Use semicolon as default sql segment separator [#10869](https://github.com/apache/dolphinscheduler/pull/10869)

View File

@ -20,9 +20,7 @@ SQL任务类型用于连接数据库并执行相应SQL。
- sql类型支持查询和非查询两种。
- 查询:支持 `DML select` 类型的命令,是有结果集返回的,可以指定邮件通知为表格、附件或表格附件三种模板;
- 非查询:支持 `DDL`全部命令 和 `DML update、delete、insert` 三种类型的命令;
- 分段执行符号提供在数据源不支持一次执行多段SQL语句时拆分SQL语句的符号来进行多次调用数据源执行方法。
例子1.当数据源选择Hive数据源时不需要填写此参数。因为Hive数据源本身支持一次执行多段SQL语句
2.当数据源选择MySQL数据源时并且要执行多段SQL语句时需要填写此参数为分号 `;`。因为MySQL数据源不支持一次执行多段SQL语句
- 默认采用`;\n`作为SQL分隔符,拆分成多段SQL语句执行。Hive支持一次执行多段SQL语句,故不会拆分。
- sql参数输入参数格式为key1=value1;key2=value2…
- sql语句SQL语句
- UDF函数对于HIVE类型的数据源可以引用资源中心中创建的UDF函数其他类型的数据源暂不支持UDF函数。

View File

@ -9,4 +9,4 @@
## 3.0.0
* Copy and import workflow without 'copy' suffix [#10607](https://github.com/apache/dolphinscheduler/pull/10607)
* Use semicolon as default sql segment separator [#10869](https://github.com/apache/dolphinscheduler/pull/10869)

View File

@ -75,4 +75,12 @@ public enum DbType {
public boolean isHive() {
return this == DbType.HIVE;
}
/**
* support execute multiple segmented statements at a time
* @return
*/
public boolean isSupportMultipleStatement() {
return isHive() || this == DbType.SPARK;
}
}

View File

@ -114,15 +114,6 @@ public class SqlParameters extends AbstractParameters {
private int limit;
/**
* segment separator
*
* <p>The segment separator is used
* when the data source does not support multi-segment SQL execution,
* and the client needs to split the SQL and execute it multiple times.</p>
*/
private String segmentSeparator;
public int getLimit() {
return limit;
}
@ -235,14 +226,6 @@ public class SqlParameters extends AbstractParameters {
this.groupId = groupId;
}
public String getSegmentSeparator() {
return segmentSeparator;
}
public void setSegmentSeparator(String segmentSeparator) {
this.segmentSeparator = segmentSeparator;
}
@Override
public boolean checkParameters() {
return datasource != 0 && StringUtils.isNotEmpty(type) && StringUtils.isNotEmpty(sql);
@ -310,7 +293,6 @@ public class SqlParameters extends AbstractParameters {
+ ", sendEmail=" + sendEmail
+ ", displayRows=" + displayRows
+ ", limit=" + limit
+ ", segmentSeparator=" + segmentSeparator
+ ", udfs='" + udfs + '\''
+ ", showType='" + showType + '\''
+ ", connParams='" + connParams + '\''

View File

@ -1,64 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.plugin.task.sql;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import com.google.common.base.Strings;
public class SqlSplitter {
private SqlSplitter() {
}
private static final String LINE_SEPARATOR = "\n";
/**
* split sql by segment separator
* <p>The segment separator is used
* when the data source does not support multi-segment SQL execution,
* and the client needs to split the SQL and execute it multiple times.</p>
* @param sql
* @param segmentSeparator
* @return
*/
public static List<String> split(String sql, String segmentSeparator) {
if (Strings.isNullOrEmpty(segmentSeparator)) {
return Collections.singletonList(sql);
}
String[] lines = sql.split(LINE_SEPARATOR);
List<String> segments = new ArrayList<>();
StringBuilder stmt = new StringBuilder();
for (String line : lines) {
if (line.trim().isEmpty() || line.startsWith("--")) {
continue;
}
stmt.append(LINE_SEPARATOR).append(line);
if (line.trim().endsWith(segmentSeparator)) {
segments.add(stmt.toString());
stmt.setLength(0);
}
}
if (stmt.length() > 0) {
segments.add(stmt.toString());
}
return segments;
}
}

View File

@ -17,9 +17,6 @@
package org.apache.dolphinscheduler.plugin.task.sql;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.dolphinscheduler.plugin.datasource.api.plugin.DataSourceClientProvider;
import org.apache.dolphinscheduler.plugin.datasource.api.utils.CommonUtils;
import org.apache.dolphinscheduler.plugin.datasource.api.utils.DataSourceUtils;
@ -38,7 +35,8 @@ import org.apache.dolphinscheduler.spi.datasource.BaseConnectionParam;
import org.apache.dolphinscheduler.spi.enums.DbType;
import org.apache.dolphinscheduler.spi.utils.JSONUtils;
import org.apache.dolphinscheduler.spi.utils.StringUtils;
import org.slf4j.Logger;
import org.apache.commons.collections4.CollectionUtils;
import java.sql.Connection;
import java.sql.PreparedStatement;
@ -48,6 +46,7 @@ import java.sql.SQLException;
import java.sql.Statement;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -56,6 +55,11 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
public class SqlTask extends AbstractTask {
/**
@ -88,6 +92,8 @@ public class SqlTask extends AbstractTask {
public static final int TEST_FLAG_YES = 1;
private static final String SQL_SEPARATOR = ";\n";
/**
* Abstract Yarn Task
*
@ -127,15 +133,18 @@ public class SqlTask extends AbstractTask {
sqlParameters.getConnParams(),
sqlParameters.getVarPool(),
sqlParameters.getLimit());
String separator = SQL_SEPARATOR;
try {
// get datasource
baseConnectionParam = (BaseConnectionParam) DataSourceUtils.buildConnectionParams(
DbType.valueOf(sqlParameters.getType()),
sqlTaskExecutionContext.getConnectionParams());
if (DbType.valueOf(sqlParameters.getType()).isSupportMultipleStatement()) {
separator = "";
}
// ready to execute SQL and parameter entity Map
List<SqlBinds> mainStatementSqlBinds = SqlSplitter.split(sqlParameters.getSql(), sqlParameters.getSegmentSeparator())
List<SqlBinds> mainStatementSqlBinds = split(sqlParameters.getSql(), separator)
.stream()
.map(this::getSqlAndSqlParamsMap)
.collect(Collectors.toList());
@ -170,6 +179,31 @@ public class SqlTask extends AbstractTask {
}
/**
* split sql by segment separator
* <p>The segment separator is used
* when the data source does not support multi-segment SQL execution,
* and the client needs to split the SQL and execute it multiple times.</p>
* @param sql
* @param segmentSeparator
* @return
*/
public static List<String> split(String sql, String segmentSeparator) {
if (StringUtils.isEmpty(segmentSeparator)) {
return Collections.singletonList(sql);
}
String[] lines = sql.split(segmentSeparator);
List<String> segments = new ArrayList<>();
for (String line : lines) {
if (line.trim().isEmpty() || line.startsWith("--")) {
continue;
}
segments.add(line);
}
return segments;
}
/**
* execute function and sql
*

View File

@ -648,8 +648,6 @@ export default {
emr_flow_define_json_tips: 'Please enter the definition of the job flow.',
emr_steps_define_json: 'stepsDefineJson',
emr_steps_define_json_tips: 'Please enter the definition of the emr step.',
segment_separator: 'Segment Execution Separator',
segment_separator_tips: 'Please enter the segment execution separator',
zeppelin_note_id: 'zeppelinNoteId',
zeppelin_note_id_tips: 'Please enter the note id of your zeppelin note',
zeppelin_paragraph_id: 'zeppelinParagraphId',

View File

@ -642,8 +642,6 @@ export default {
emr_flow_define_json_tips: '请输入工作流定义',
emr_steps_define_json: 'stepsDefineJson',
emr_steps_define_json_tips: '请输入EMR步骤定义',
segment_separator: '分段执行符号',
segment_separator_tips: '请输入分段执行符号',
zeppelin_note_id: 'zeppelinNoteId',
zeppelin_note_id_tips: '请输入zeppelin note id',
zeppelin_paragraph_id: 'zeppelinParagraphId',

View File

@ -24,7 +24,6 @@ import type { IJsonItem } from '../types'
export function useSqlType(model: { [field: string]: any }): IJsonItem[] {
const { t } = useI18n()
const querySpan = computed(() => (model.sqlType === '0' ? 6 : 0))
const nonQuerySpan = computed(() => (model.sqlType === '1' ? 18 : 0))
const emailSpan = computed(() =>
model.sqlType === '0' && model.sendEmail ? 24 : 0
)
@ -68,15 +67,6 @@ export function useSqlType(model: { [field: string]: any }): IJsonItem[] {
required: true
}
},
{
type: 'input',
field: 'segmentSeparator',
name: t('project.node.segment_separator'),
props: {
placeholder: t('project.node.segment_separator_tips')
},
span: nonQuerySpan
},
{
type: 'switch',
field: 'sendEmail',

View File

@ -189,7 +189,6 @@ export function formatParams(data: INodeData): {
taskParams.sqlType = data.sqlType
taskParams.preStatements = data.preStatements
taskParams.postStatements = data.postStatements
taskParams.segmentSeparator = data.segmentSeparator
taskParams.sendEmail = data.sendEmail
taskParams.displayRows = data.displayRows
if (data.sqlType === '0' && data.sendEmail) {

View File

@ -46,7 +46,6 @@ export function useSql({
timeout: 30,
type: 'MYSQL',
displayRows: 10,
segmentSeparator: '',
sql: '',
sqlType: '0',
preStatements: [],

View File

@ -253,7 +253,6 @@ interface ITaskParams {
datasource?: string
sql?: string
sqlType?: string
segmentSeparator?: string
sendEmail?: boolean
displayRows?: number
title?: string