diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandleFailure.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandleFailure.java new file mode 100644 index 0000000000..5e757c7858 --- /dev/null +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandleFailure.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.server.master.event; + +/** + * This exception represent the exception can be recovered, when we get this exception, + * we will move the event to the fail of the queue. + */ +public class StateEventHandleFailure extends Exception { + + public StateEventHandleFailure(String message) { + super(message); + } + + public StateEventHandleFailure(String message, Throwable throwable) { + super(message, throwable); + } +} diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandler.java index 00808b2e29..377ea71f62 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandler.java @@ -28,9 +28,10 @@ public interface StateEventHandler { * @param stateEvent given state event. * @throws StateEventHandleException this exception means it can be recovered. * @throws StateEventHandleError this exception means it cannot be recovered, so the event need to drop. + * @throws StateEventHandleException this means it can be recovered. */ - boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, StateEvent stateEvent) - throws StateEventHandleException, StateEventHandleError; + boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, + StateEvent stateEvent) throws StateEventHandleException, StateEventHandleError, StateEventHandleFailure; StateEventType getEventType(); } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskWaitTaskGroupStateHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskWaitTaskGroupStateHandler.java index 9a3c59a949..bb0c7b8b70 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskWaitTaskGroupStateHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskWaitTaskGroupStateHandler.java @@ -20,13 +20,24 @@ package org.apache.dolphinscheduler.server.master.event; import org.apache.dolphinscheduler.common.enums.StateEventType; import org.apache.dolphinscheduler.server.master.runner.WorkflowExecuteRunnable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import com.google.auto.service.AutoService; @AutoService(StateEventHandler.class) public class TaskWaitTaskGroupStateHandler implements StateEventHandler { + + private static final Logger logger = LoggerFactory.getLogger(TaskWaitTaskGroupStateHandler.class); + @Override - public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, StateEvent stateEvent) { - return workflowExecuteRunnable.checkForceStartAndWakeUp(stateEvent); + public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, + StateEvent stateEvent) throws StateEventHandleFailure { + logger.info("Handle task instance wait task group event, taskInstanceId: {}", stateEvent.getTaskInstanceId()); + if (!workflowExecuteRunnable.checkForceStartAndWakeUp(stateEvent)) { + throw new StateEventHandleFailure("Task state event handle failed due to robing taskGroup resource failed"); + } + return true; } @Override diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java index 5c0fec018d..fda109e1f4 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java @@ -70,6 +70,7 @@ import org.apache.dolphinscheduler.server.master.dispatch.executor.NettyExecutor import org.apache.dolphinscheduler.server.master.event.StateEvent; import org.apache.dolphinscheduler.server.master.event.StateEventHandleError; import org.apache.dolphinscheduler.server.master.event.StateEventHandleException; +import org.apache.dolphinscheduler.server.master.event.StateEventHandleFailure; import org.apache.dolphinscheduler.server.master.event.StateEventHandler; import org.apache.dolphinscheduler.server.master.event.StateEventHandlerManager; import org.apache.dolphinscheduler.server.master.metrics.TaskMetrics; @@ -279,6 +280,13 @@ public class WorkflowExecuteRunnable implements Callable { stateEvent, stateEventHandleException); ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS); + } catch (StateEventHandleFailure stateEventHandleFailure) { + logger.error("State event handle failed, will move event to the tail: {}", + stateEvent, + stateEventHandleFailure); + this.stateEvents.remove(stateEvent); + this.stateEvents.offer(stateEvent); + ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS); } catch (Exception e) { // we catch the exception here, since if the state event handle failed, the state event will still keep in the stateEvents queue. logger.error("State event handle error, get a unknown exception, will retry this event: {}",