mirror of
https://gitee.com/dolphinscheduler/DolphinScheduler.git
synced 2024-11-29 18:58:05 +08:00
Solve the deadlock problem caused by queuing (#13191)
* Solve the deadlock problem caused by queuing
* Solve the deadlock problem caused by queuing
* Solve the deadlock problem caused by queuing
* Solve the deadlock problem caused by queuing,move the event to the tail by throwing a exception
Co-authored-by: wfs <wangfushun@cdqcp.cpm>
(cherry picked from commit 7a0a2c2a46
)
This commit is contained in:
parent
19771e506f
commit
bc1cf25f4d
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.dolphinscheduler.server.master.event;
|
||||
|
||||
/**
|
||||
* This exception represent the exception can be recovered, when we get this exception,
|
||||
* we will move the event to the fail of the queue.
|
||||
*/
|
||||
public class StateEventHandleFailure extends Exception {
|
||||
|
||||
public StateEventHandleFailure(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public StateEventHandleFailure(String message, Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
}
|
@ -28,9 +28,10 @@ public interface StateEventHandler {
|
||||
* @param stateEvent given state event.
|
||||
* @throws StateEventHandleException this exception means it can be recovered.
|
||||
* @throws StateEventHandleError this exception means it cannot be recovered, so the event need to drop.
|
||||
* @throws StateEventHandleException this means it can be recovered.
|
||||
*/
|
||||
boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, StateEvent stateEvent)
|
||||
throws StateEventHandleException, StateEventHandleError;
|
||||
boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable,
|
||||
StateEvent stateEvent) throws StateEventHandleException, StateEventHandleError, StateEventHandleFailure;
|
||||
|
||||
StateEventType getEventType();
|
||||
}
|
||||
|
@ -20,13 +20,24 @@ package org.apache.dolphinscheduler.server.master.event;
|
||||
import org.apache.dolphinscheduler.common.enums.StateEventType;
|
||||
import org.apache.dolphinscheduler.server.master.runner.WorkflowExecuteRunnable;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.auto.service.AutoService;
|
||||
|
||||
@AutoService(StateEventHandler.class)
|
||||
public class TaskWaitTaskGroupStateHandler implements StateEventHandler {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(TaskWaitTaskGroupStateHandler.class);
|
||||
|
||||
@Override
|
||||
public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, StateEvent stateEvent) {
|
||||
return workflowExecuteRunnable.checkForceStartAndWakeUp(stateEvent);
|
||||
public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable,
|
||||
StateEvent stateEvent) throws StateEventHandleFailure {
|
||||
logger.info("Handle task instance wait task group event, taskInstanceId: {}", stateEvent.getTaskInstanceId());
|
||||
if (!workflowExecuteRunnable.checkForceStartAndWakeUp(stateEvent)) {
|
||||
throw new StateEventHandleFailure("Task state event handle failed due to robing taskGroup resource failed");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -70,6 +70,7 @@ import org.apache.dolphinscheduler.server.master.dispatch.executor.NettyExecutor
|
||||
import org.apache.dolphinscheduler.server.master.event.StateEvent;
|
||||
import org.apache.dolphinscheduler.server.master.event.StateEventHandleError;
|
||||
import org.apache.dolphinscheduler.server.master.event.StateEventHandleException;
|
||||
import org.apache.dolphinscheduler.server.master.event.StateEventHandleFailure;
|
||||
import org.apache.dolphinscheduler.server.master.event.StateEventHandler;
|
||||
import org.apache.dolphinscheduler.server.master.event.StateEventHandlerManager;
|
||||
import org.apache.dolphinscheduler.server.master.metrics.TaskMetrics;
|
||||
@ -279,6 +280,13 @@ public class WorkflowExecuteRunnable implements Callable<WorkflowSubmitStatue> {
|
||||
stateEvent,
|
||||
stateEventHandleException);
|
||||
ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS);
|
||||
} catch (StateEventHandleFailure stateEventHandleFailure) {
|
||||
logger.error("State event handle failed, will move event to the tail: {}",
|
||||
stateEvent,
|
||||
stateEventHandleFailure);
|
||||
this.stateEvents.remove(stateEvent);
|
||||
this.stateEvents.offer(stateEvent);
|
||||
ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS);
|
||||
} catch (Exception e) {
|
||||
// we catch the exception here, since if the state event handle failed, the state event will still keep in the stateEvents queue.
|
||||
logger.error("State event handle error, get a unknown exception, will retry this event: {}",
|
||||
|
Loading…
Reference in New Issue
Block a user