mirror of
https://gitee.com/dolphinscheduler/DolphinScheduler.git
synced 2024-12-02 04:08:31 +08:00
1038 lines
65 KiB
HTML
1038 lines
65 KiB
HTML
|
|
<!DOCTYPE HTML>
|
|
<html lang="zh-hans" >
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
|
<title>系统架构分析 · 调度系统-EasyScheduler</title>
|
|
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
|
<meta name="description" content="">
|
|
<meta name="generator" content="GitBook 3.2.3">
|
|
<meta name="author" content="YIGUAN">
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../gitbook/style.css">
|
|
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../gitbook/gitbook-plugin-expandable-chapters/expandable-chapters.css">
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../gitbook/gitbook-plugin-insert-logo-link/plugin.css">
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../gitbook/gitbook-plugin-highlight/website.css">
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../gitbook/gitbook-plugin-search/search.css">
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../gitbook/gitbook-plugin-fontsettings/website.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../styles/website.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<meta name="HandheldFriendly" content="true"/>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
|
|
<meta name="apple-mobile-web-app-capable" content="yes">
|
|
<meta name="apple-mobile-web-app-status-bar-style" content="black">
|
|
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
|
|
<link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
|
|
|
|
|
|
<link rel="next" href="guide-architecture.html" />
|
|
|
|
|
|
|
|
</head>
|
|
<body>
|
|
|
|
<div class="book">
|
|
<div class="book-summary">
|
|
|
|
|
|
<div id="book-search-input" role="search">
|
|
<input type="text" placeholder="输入并搜索" />
|
|
</div>
|
|
|
|
|
|
<nav role="navigation">
|
|
|
|
|
|
|
|
<ul class="summary">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="chapter " data-level="1.1" data-path="../">
|
|
|
|
<a href="../">
|
|
|
|
|
|
Easyscheduler简介
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2" >
|
|
|
|
<span>
|
|
|
|
|
|
导读指南
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter active" data-level="1.2.1" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html">
|
|
|
|
|
|
系统架构分析
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.2.1.1" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html#导语">
|
|
|
|
|
|
导语
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.1.2" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html#背景">
|
|
|
|
|
|
背景
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.1.3" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html#总架构设计">
|
|
|
|
|
|
总架构设计
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.1.4" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html#中心化思想">
|
|
|
|
|
|
中心化思想
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.1.5" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html#分布式锁实践">
|
|
|
|
|
|
分布式锁实践
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.1.6" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html#线程不足循环等待问题">
|
|
|
|
|
|
线程不足循环等待问题
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.1.7" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html#容错设计">
|
|
|
|
|
|
容错设计
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.1.8" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html#日志访问">
|
|
|
|
|
|
Logback和gRPC实现日志访问
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.1.9" data-path="guide-architecture.html">
|
|
|
|
<a href="guide-architecture.html#总结">
|
|
|
|
|
|
总结
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.2" data-path="guide-manual.html">
|
|
|
|
<a href="guide-manual.html">
|
|
|
|
|
|
使用手册
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.2.2.1" data-path="guide-manual.html">
|
|
|
|
<a href="guide-manual.html#登录">
|
|
|
|
|
|
登录
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.2.2" data-path="guide-manual.html">
|
|
|
|
<a href="guide-manual.html#安全中心">
|
|
|
|
|
|
安全中心
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.2.3" data-path="guide-manual.html">
|
|
|
|
<a href="guide-manual.html#资源中心">
|
|
|
|
|
|
资源中心
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.2.4" data-path="guide-manual.html">
|
|
|
|
<a href="guide-manual.html#数据源中心">
|
|
|
|
|
|
数据源中心
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.2.5" data-path="guide-manual.html">
|
|
|
|
<a href="guide-manual.html#项目管理">
|
|
|
|
|
|
项目管理
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.2.6" data-path="guide-manual.html">
|
|
|
|
<a href="guide-manual.html#首页">
|
|
|
|
|
|
站点首页
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.2.7" data-path="guide-manual.html">
|
|
|
|
<a href="guide-manual.html#任务依赖">
|
|
|
|
|
|
任务依赖
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.2.2.8" data-path="guide-manual.html">
|
|
|
|
<a href="guide-manual.html#系统参数">
|
|
|
|
|
|
系统参数
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3" >
|
|
|
|
<span>
|
|
|
|
|
|
项目部署文档
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.3.1" data-path="deploy-foreground.html">
|
|
|
|
<a href="deploy-foreground.html">
|
|
|
|
|
|
前端部署
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.3.1.1" data-path="deploy-foreground.html">
|
|
|
|
<a href="deploy-foreground.html#前端项目环境构建及编译">
|
|
|
|
|
|
前端项目环境构建及编译
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.1.2" data-path="deploy-foreground.html">
|
|
|
|
<a href="deploy-foreground.html#安装及配置">
|
|
|
|
|
|
安装及配置
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.1.3" data-path="deploy-foreground.html">
|
|
|
|
<a href="deploy-foreground.html#项目生产环境配置">
|
|
|
|
|
|
项目生产环境Nginx配置
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.1.4" data-path="deploy-foreground.html">
|
|
|
|
<a href="deploy-foreground.html#前端项目发布">
|
|
|
|
|
|
前端项目发布
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.1.5" data-path="deploy-foreground.html">
|
|
|
|
<a href="deploy-foreground.html#问题">
|
|
|
|
|
|
问题
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html">
|
|
|
|
|
|
后端部署
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.3.2.1" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#基础软件安装">
|
|
|
|
|
|
基础软件安装
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.2" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#项目编译">
|
|
|
|
|
|
项目编译
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.3" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#数据库初始化">
|
|
|
|
|
|
数据库初始化
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.4" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#创建部署用户">
|
|
|
|
|
|
创建部署用户
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.5" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#配置文件">
|
|
|
|
|
|
配置文件
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.3.2.5.1" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#escheduler-alert">
|
|
|
|
|
|
escheduler-alert
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.5.2" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#escheduler-common">
|
|
|
|
|
|
escheduler-common
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.5.3" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#escheduler-dao">
|
|
|
|
|
|
escheduler-dao
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.5.4" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#escheduler-server">
|
|
|
|
|
|
escheduler-server
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.5.5" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#escheduler-web">
|
|
|
|
|
|
escheduler-web
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.6" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#启动停止命令">
|
|
|
|
|
|
启动停止命令
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.7" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#一键启停脚本">
|
|
|
|
|
|
一键启停脚本
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.8" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#监控服务">
|
|
|
|
|
|
监控服务
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.3.2.9" data-path="deploy-background.html">
|
|
|
|
<a href="deploy-background.html#日志查看">
|
|
|
|
|
|
日志查看
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.4" >
|
|
|
|
<span>
|
|
|
|
|
|
前端开发文档
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.4.1" data-path="foreground-2.html">
|
|
|
|
<a href="foreground-2.html">
|
|
|
|
|
|
安装
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
<li class="chapter " data-level="1.5" >
|
|
|
|
<span>
|
|
|
|
|
|
后端开发文档
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<ul class="articles">
|
|
|
|
|
|
<li class="chapter " data-level="1.5.1" data-path="background-2.html">
|
|
|
|
<a href="background-2.html">
|
|
|
|
|
|
后端说明2
|
|
|
|
</a>
|
|
|
|
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
<li class="divider"></li>
|
|
|
|
<li>
|
|
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
|
|
本书使用 GitBook 发布
|
|
</a>
|
|
</li>
|
|
</ul>
|
|
|
|
|
|
</nav>
|
|
|
|
|
|
</div>
|
|
|
|
<div class="book-body">
|
|
|
|
<div class="body-inner">
|
|
|
|
|
|
|
|
<div class="book-header" role="navigation">
|
|
|
|
|
|
<!-- Title -->
|
|
<h1>
|
|
<i class="fa fa-circle-o-notch fa-spin"></i>
|
|
<a href=".." >系统架构分析</a>
|
|
</h1>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<div class="page-wrapper" tabindex="-1" role="main">
|
|
<div class="page-inner">
|
|
|
|
<div id="book-search-results">
|
|
<div class="search-noresults">
|
|
|
|
<section class="normal markdown-section">
|
|
|
|
<h1 id="easyscheduler大数据调度系统架构分析">EasyScheduler大数据调度系统架构分析</h1>
|
|
<h2 id="架构设计">架构设计</h2>
|
|
<p><img src="../images/esr_2.png" alt="PNG"></p>
|
|
<blockquote>
|
|
<h4 id="easyscheduler设计围绕四个服务展开,ui、web、server和alert。">EasyScheduler设计围绕四个服务展开,UI、Web、Server和Alert。</h4>
|
|
<ul>
|
|
<li><h5 id="ui--使用易观封装的vue及jsplumb组件开发">UI : 使用易观封装的Vue及jsplumb组件开发</h5>
|
|
</li>
|
|
<li><h5 id="web:使用springboot提供rest-api和ui分离交互">Web:使用SpringBoot提供Rest Api和UI分离交互</h5>
|
|
</li>
|
|
<li><h5 id="server--scheduler调度及分布式任务执行引擎">Server : Scheduler调度及分布式任务执行引擎</h5>
|
|
</li>
|
|
<li><h5 id="alert:告警微服务">Alert:告警微服务</h5>
|
|
</li>
|
|
</ul>
|
|
</blockquote>
|
|
<h2 id="以下将详细介绍server的设计思想和遇到的问题">以下将详细介绍Server的设计思想和遇到的问题</h2>
|
|
<h2 id="去中心化vs中心化">去中心化vs中心化</h2>
|
|
<div id="中心化思想"></div>
|
|
|
|
<h3 id="1-中心化思想">1. 中心化思想</h3>
|
|
<blockquote>
|
|
<h4 id="       中心化的设计理念比较简单,分布式集群中的节点按照角色分工,大体上分为两种角色:">       中心化的设计理念比较简单,分布式集群中的节点按照角色分工,大体上分为两种角色:</h4>
|
|
<p><img src="../images/esr_3.png" alt="PNG"></p>
|
|
<h4 id="       master的角色主要负责任务分发并监督slave的健康状态,可以动态的将任务均衡到slave上,以致slave节点不至于忙死或闲死的状态。">       Master的角色主要负责任务分发并监督Slave的健康状态,可以动态的将任务均衡到Slave上,以致Slave节点不至于“忙死”或”闲死”的状态。</h4>
|
|
<h4 id="       worker的角色主要负责任务的执行工作并维护和master的心跳,以便master可以分配任务给slave。">       Worker的角色主要负责任务的执行工作并维护和Master的心跳,以便Master可以分配任务给Slave。</h4>
|
|
</blockquote>
|
|
<h3 id="2-中心化思想设计存在的问题:">2. 中心化思想设计存在的问题:</h3>
|
|
<blockquote>
|
|
<h4 id="       一旦master出现了问题,则群龙无首,整个集群就崩溃。为了解决这个问题,大多数masterslave架构模式都采用了主备master的设计方案,可以是热备或者冷备,也可以是自动切换或手动切换,而且越来越多的新系统都开始具备自动选举切换master的能力以提升系统的可用性。">       一旦Master出现了问题,则群龙无首,整个集群就崩溃。为了解决这个问题,大多数Master/Slave架构模式都采用了主备Master的设计方案,可以是热备或者冷备,也可以是自动切换或手动切换,而且越来越多的新系统都开始具备自动选举切换Master的能力,以提升系统的可用性。</h4>
|
|
<h4 id="       另外一个问题是如果scheduler在master上,虽然可以支持一个dag中不同的任务运行在不同的机器上,但是会产生master的过负载。如果scheduler在slave上,则一个dag中所有的任务都只能在某一台机器上进行作业提交,则并行任务比较多的时候,slave的压力可能会比较大。">       另外一个问题是如果Scheduler在Master上,虽然可以支持一个DAG中不同的任务运行在不同的机器上,但是会产生Master的过负载。如果Scheduler在Slave上,则一个DAG中所有的任务都只能在某一台机器上进行作业提交,则并行任务比较多的时候,Slave的压力可能会比较大。</h4>
|
|
</blockquote>
|
|
<h3 id="3-去中心化">3. 去中心化</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_4.png" alt="PNG"></p>
|
|
<h4 id="       去中心化设计里,通常没有masterslave的概念,所有的角色都是一样的,地位是平等的,全球互联网就是一个典型的去中心化的分布式系统,联网的任意节点设备down机,都只会影响很小范围的功能。">       去中心化设计里,通常没有Master/Slave的概念,所有的角色都是一样的,地位是平等的,全球互联网就是一个典型的去中心化的分布式系统,联网的任意节点设备down机,都只会影响很小范围的功能。</h4>
|
|
<h4 id="       去中心化设计的核心设计在于整个分布式系统中不存在一个区别于其他节点的管理者,因此不存在单点故障问题。但由于不存在-管理者节点所以每个节点都需要跟其他节点通信才得到必须要的机器信息,而分布式系统通信的不可靠行,则大大增加了上述功能的实现难度。">       去中心化设计的核心设计在于整个分布式系统中不存在一个区别于其他节点的”管理者”,因此不存在单点故障问题。但由于不存在” 管理者”节点所以每个节点都需要跟其他节点通信才得到必须要的机器信息,而分布式系统通信的不可靠行,则大大增加了上述功能的实现难度。</h4>
|
|
<h4 id="       实际上,真正去中心化的分布式系统并不多见。反而动态中心化分布式系统正在不断涌出。在这种架构下,集群中的管理者是被动态选择出来的,而不是预置的,并且集群在发生故障的时候,集群的节点会自发的举行会议选举新的管理者主持工作。最典型的案例就是zookeeper及go语言实现的etcd。">       实际上,真正去中心化的分布式系统并不多见。反而动态中心化分布式系统正在不断涌出。在这种架构下,集群中的管理者是被动态选择出来的,而不是预置的,并且集群在发生故障的时候,集群的节点会自发的举行"会议"选举新的"管理者"主持工作。最典型的案例就是ZooKeeper及Go语言实现的Etcd。</h4>
|
|
</blockquote>
|
|
<p> </p>
|
|
<blockquote>
|
|
<h3 id="easyscheduler的去中心化是masterworker注册到zookeeper中,实现master集群和worker集群无中心,并使用zookeeper分布式锁来选举其中的一台master或worker为管理者来执行任务。">EasyScheduler的去中心化是Master/Worker注册到Zookeeper中,实现Master集群和Worker集群无中心,并使用Zookeeper分布式锁来选举其中的一台Master或Worker为“管理者”来执行任务。</h3>
|
|
</blockquote>
|
|
<h2 id="分布式锁实践">分布式锁实践</h2>
|
|
<h3 id="easyscheduler使用zookeeper分布式锁来实现同一时刻只有一台master执行scheduler,或者只有一台worker执行任务的提交。">EasyScheduler使用Zookeeper分布式锁来实现同一时刻只有一台Master执行Scheduler,或者只有一台Worker执行任务的提交。</h3>
|
|
<h3 id="1-获取分布式锁的核心流程算法如下:">1. 获取分布式锁的核心流程算法如下:</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_5.png" alt="PNG"></p>
|
|
</blockquote>
|
|
<h3 id="2-easyscheduler中scheduler线程分布式锁实现流程图:">2. EasyScheduler中Scheduler线程分布式锁实现流程图:</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_6.png" alt="PNG"></p>
|
|
</blockquote>
|
|
<h2 id="线程不足循环等待问题">线程不足循环等待问题</h2>
|
|
<ul>
|
|
<li><h3 id="如果一个dag中没有子流程,则如果command中的数据条数大于线程池设置的阈值,则直接流程等待或失败。">如果一个DAG中没有子流程,则如果Command中的数据条数大于线程池设置的阈值,则直接流程等待或失败。</h3>
|
|
</li>
|
|
<li><h3 id="如果一个大的dag中嵌套了很多子流程,如下图:">如果一个大的DAG中嵌套了很多子流程,如下图:</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_7.png" alt="PNG"></p>
|
|
<h4 id="       则会产生死等状态。mainflowthread等待subflowthread1结束,subflowthread1等待subflowthread2结束,subflowthread2等待subflowthread3结束,而subflowthread3等待线程池有新线程,则整个dag流程不能结束,从而其中的线程也不能释放。">       则会产生“死等”状态。MainFlowThread等待SubFlowThread1结束,SubFlowThread1等待SubFlowThread2结束,SubFlowThread2等待SubFlowThread3结束,而SubFlowThread3等待线程池有新线程,则整个DAG流程不能结束,从而其中的线程也不能释放。</h4>
|
|
<h4 id="       这样就形成的子父流程循环等待的状态。此时除非启动新的master来增加线程来打破这样的僵局,否则调度集群将不能再使用。">       这样就形成的子父流程循环等待的状态。此时除非启动新的Master来增加线程来打破这样的”僵局”,否则调度集群将不能再使用。</h4>
|
|
</blockquote>
|
|
</li>
|
|
</ul>
|
|
<ul>
|
|
<li><h3 id="对于启动新master来打破僵局,似乎有点差强人意,于是我们提出了以下三种方案来降低这种风险:">对于启动新Master来打破僵局,似乎有点差强人意,于是我们提出了以下三种方案来降低这种风险:</h3>
|
|
<blockquote>
|
|
<ul>
|
|
<li><h4 id="计算所有master的线程总和,然后对每一个dag需要计算其需要的线程数,也就是在dag流程执行之前做预计算。因为是多master线程池,所以总线程数不太可能实时获取。">计算所有Master的线程总和,然后对每一个DAG需要计算其需要的线程数,也就是在DAG流程执行之前做预计算。因为是多Master线程池,所以总线程数不太可能实时获取。</h4>
|
|
</li>
|
|
<li><h4 id="对单master线程池进行判断,如果线程池已经满了,则让线程直接失败。">对单Master线程池进行判断,如果线程池已经满了,则让线程直接失败。</h4>
|
|
</li>
|
|
<li><h4 id="增加一种资源不足的command类型,如果线程池不足,则将主流程挂起。这样线程池就有了新的线程,可以让资源不足挂起的流程重新唤醒执行。">增加一种资源不足的Command类型,如果线程池不足,则将主流程挂起。这样线程池就有了新的线程,可以让资源不足挂起的流程重新唤醒执行。</h4>
|
|
</li>
|
|
</ul>
|
|
</blockquote>
|
|
</li>
|
|
<li><h3 id="注意:master-scheduler线程在获取command的时候是fifo的方式执行的。">注意:Master Scheduler线程在获取Command的时候是FIFO的方式执行的。</h3>
|
|
</li>
|
|
<li><h3 id="于是我们选择了第三种方式来解决线程不足的问题。">于是我们选择了第三种方式来解决线程不足的问题。</h3>
|
|
</li>
|
|
</ul>
|
|
<h2 id="容错设计">容错设计</h2>
|
|
<ul>
|
|
<li><h3 id="easyscheduler容错设计依赖于zookeeper的watcher机制,实现原理如图:">EasyScheduler容错设计依赖于Zookeeper的Watcher机制,实现原理如图:</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_8.png" alt="PNG"></p>
|
|
<h3 id="       master监控其他master和worker的目录,如果监听到remove事件,则会根据具体的业务逻辑进行流程实例容错或者任务实例容错。">       Master监控其他Master和Worker的目录,如果监听到remove事件,则会根据具体的业务逻辑进行流程实例容错或者任务实例容错。</h3>
|
|
</blockquote>
|
|
</li>
|
|
<li><h3 id="master容错流程图:">Master容错流程图:</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_9.png" alt="PNG"></p>
|
|
<h3 id="       zookeeper--master容错完成之后则重新由easyscheduler中scheduler线程调度,遍历-dag-找到正在运行和提交成功的任务,对正在运行的任务监控其任务实例的状态,对提交成功的任务需要判断task-queue中是否已经存在,如果存在则同样监控任务实例的状态,如果不存在则重新提交任务实例。">       ZooKeeper Master容错完成之后则重新由EasyScheduler中Scheduler线程调度,遍历 DAG 找到”正在运行”和“提交成功”的任务,对”正在运行”的任务监控其任务实例的状态,对”提交成功”的任务需要判断Task Queue中是否已经存在,如果存在则同样监控任务实例的状态,如果不存在则重新提交任务实例。</h3>
|
|
</blockquote>
|
|
</li>
|
|
</ul>
|
|
<ul>
|
|
<li><h3 id="worker容错流程图:">Worker容错流程图:</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_10.png" alt="PNG"></p>
|
|
<h3 id="       master-scheduler线程一旦发现任务实例为-需要容错状态,则接管任务并进行重新提交。">       Master Scheduler线程一旦发现任务实例为” 需要容错”状态,则接管任务并进行重新提交。</h3>
|
|
</blockquote>
|
|
</li>
|
|
</ul>
|
|
<ul>
|
|
<li><h3 id="注意:由于-网络抖动可能会使得节点短时间内失去和zk的心跳,从而发生节点的remove事件。对于这种情况,我们使用最简单的方式,那就是节点一旦和zk发生超时连接,则直接将master或worker服务停掉。">注意:由于” 网络抖动”可能会使得节点短时间内失去和zk的心跳,从而发生节点的remove事件。对于这种情况,我们使用最简单的方式,那就是节点一旦和zk发生超时连接,则直接将Master或Worker服务停掉。</h3>
|
|
</li>
|
|
</ul>
|
|
<h2 id="logback和grpc实现日志访问">Logback和gRPC实现日志访问</h2>
|
|
<div id="日志访问"></div>
|
|
|
|
<ul>
|
|
<li><h3 id="由于web和worker不一定在同一台机器上,所以查看日志不能像查询本地文件那样。有两种方案:">由于Web和Worker不一定在同一台机器上,所以查看日志不能像查询本地文件那样。有两种方案:</h3>
|
|
<ul>
|
|
<li><h4 id="将日志放到es搜索引擎上">将日志放到ES搜索引擎上</h4>
|
|
</li>
|
|
<li><h4 id="通过grpc通信获取远程日志信息">通过gRPC通信获取远程日志信息</h4>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><h3 id="介于考虑到尽可能的easyscheduler的轻量级性,所以选择了grpc实现远程访问日志信息。">介于考虑到尽可能的EasyScheduler的轻量级性,所以选择了gRPC实现远程访问日志信息。</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_11.png" alt="PNG"></p>
|
|
</blockquote>
|
|
</li>
|
|
</ul>
|
|
<ul>
|
|
<li><h3 id="我们使用自定义logback的fileappender和filter功能,实现每个任务实例生成一个日志文件。">我们使用自定义Logback的FileAppender和Filter功能,实现每个任务实例生成一个日志文件。</h3>
|
|
</li>
|
|
</ul>
|
|
<ul>
|
|
<li><h3 id="fileappender实现如下:">FileAppender实现如下:</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_12.png" alt="PNG"></p>
|
|
<h4 id="以流程定义id流程实例id任务实例idlog的形式生成日志。">以…/流程定义id/流程实例id/任务实例id.log的形式生成日志。</h4>
|
|
</blockquote>
|
|
</li>
|
|
</ul>
|
|
<ul>
|
|
<li><h3 id="过滤匹配以taskloginfo开始的线程名称:">过滤匹配以TaskLogInfo开始的线程名称:</h3>
|
|
<blockquote>
|
|
<p><img src="../images/esr_13.png" alt="PNG"></p>
|
|
</blockquote>
|
|
</li>
|
|
</ul>
|
|
<h2 id="总结">总结</h2>
|
|
<blockquote>
|
|
<h3 id="本文从调度出发,介绍了易观研发的大数据分布式工作流调度系统。easyscheduler在易观数据平台起着中流砥柱的作用。本章着重介绍了easyscheduler的架构原理及实现思路。">本文从调度出发,介绍了易观研发的大数据分布式工作流调度系统。EasyScheduler在易观数据平台起着中流砥柱的作用。本章着重介绍了EasyScheduler的架构原理及实现思路。</h3>
|
|
</blockquote>
|
|
|
|
|
|
</section>
|
|
|
|
</div>
|
|
<div class="search-results">
|
|
<div class="has-results">
|
|
|
|
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
|
|
<ul class="search-results-list"></ul>
|
|
|
|
</div>
|
|
<div class="no-results">
|
|
|
|
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<a href="guide-architecture.html#导语" class="navigation navigation-next navigation-unique" aria-label="Next page: 导语">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
|
|
|
|
|
|
</div>
|
|
|
|
<script>
|
|
var gitbook = gitbook || [];
|
|
gitbook.push(function() {
|
|
gitbook.page.hasChanged({"page":{"title":"系统架构分析","level":"1.2.1","depth":2,"next":{"title":"导语","level":"1.2.1.1","depth":3,"anchor":"#导语","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#导语","articles":[]},"previous":{"title":"导读指南","level":"1.2","depth":1,"ref":"","articles":[{"title":"系统架构分析","level":"1.2.1","depth":2,"path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md","articles":[{"title":"导语","level":"1.2.1.1","depth":3,"anchor":"#导语","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#导语","articles":[]},{"title":"背景","level":"1.2.1.2","depth":3,"anchor":"#背景","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#背景","articles":[]},{"title":"总架构设计","level":"1.2.1.3","depth":3,"anchor":"#总架构设计","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#总架构设计","articles":[]},{"title":"中心化思想","level":"1.2.1.4","depth":3,"anchor":"#中心化思想","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#中心化思想","articles":[]},{"title":"分布式锁实践","level":"1.2.1.5","depth":3,"anchor":"#分布式锁实践","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#分布式锁实践","articles":[]},{"title":"线程不足循环等待问题","level":"1.2.1.6","depth":3,"anchor":"#线程不足循环等待问题","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#线程不足循环等待问题","articles":[]},{"title":"容错设计","level":"1.2.1.7","depth":3,"anchor":"#容错设计","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#容错设计","articles":[]},{"title":"Logback和gRPC实现日志访问","level":"1.2.1.8","depth":3,"anchor":"#日志访问","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#日志访问","articles":[]},{"title":"总结","level":"1.2.1.9","depth":3,"anchor":"#总结","path":"pages/guide-architecture.md","ref":"pages/guide-architecture.md#总结","articles":[]}]},{"title":"使用手册","level":"1.2.2","depth":2,"path":"pages/guide-manual.md","ref":"pages/guide-manual.md","articles":[{"title":"登录","level":"1.2.2.1","depth":3,"anchor":"#登录","path":"pages/guide-manual.md","ref":"pages/guide-manual.md#登录","articles":[]},{"title":"安全中心","level":"1.2.2.2","depth":3,"anchor":"#安全中心","path":"pages/guide-manual.md","ref":"pages/guide-manual.md#安全中心","articles":[]},{"title":"资源中心","level":"1.2.2.3","depth":3,"anchor":"#资源中心","path":"pages/guide-manual.md","ref":"pages/guide-manual.md#资源中心","articles":[]},{"title":"数据源中心","level":"1.2.2.4","depth":3,"anchor":"#数据源中心","path":"pages/guide-manual.md","ref":"pages/guide-manual.md#数据源中心","articles":[]},{"title":"项目管理","level":"1.2.2.5","depth":3,"anchor":"#项目管理","path":"pages/guide-manual.md","ref":"pages/guide-manual.md#项目管理","articles":[]},{"title":"站点首页","level":"1.2.2.6","depth":3,"anchor":"#首页","path":"pages/guide-manual.md","ref":"pages/guide-manual.md#首页","articles":[]},{"title":"任务依赖","level":"1.2.2.7","depth":3,"anchor":"#任务依赖","path":"pages/guide-manual.md","ref":"pages/guide-manual.md#任务依赖","articles":[]},{"title":"系统参数","level":"1.2.2.8","depth":3,"anchor":"#系统参数","path":"pages/guide-manual.md","ref":"pages/guide-manual.md#系统参数","articles":[]}]}]},"dir":"ltr"},"config":{"plugins":["expandable-chapters","insert-logo-link","livereload"],"styles":{"website":"./styles/website.css"},"pluginsConfig":{"livereload":{},"insert-logo-link":{"src":"../images/logo.png","url":"/"},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"fontsettings":{"theme":"white","family":"sans","size":2},"highlight":{},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false},"expandable-chapters":{}},"theme":"default","author":"YIGUAN","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"调度系统-EasyScheduler","language":"zh-hans","gitbook":"3.2.3","description":"调度系统"},"file":{"path":"pages/guide-architecture.md","mtime":"2019-03-07T08:53:26.536Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2019-03-07T11:30:18.635Z"},"basePath":"..","book":{"language":""}});
|
|
});
|
|
</script>
|
|
</div>
|
|
|
|
|
|
<script src="../gitbook/gitbook.js"></script>
|
|
<script src="../gitbook/theme.js"></script>
|
|
|
|
|
|
<script src="../gitbook/gitbook-plugin-expandable-chapters/expandable-chapters.js"></script>
|
|
|
|
|
|
|
|
<script src="../gitbook/gitbook-plugin-insert-logo-link/plugin.js"></script>
|
|
|
|
|
|
|
|
<script src="../gitbook/gitbook-plugin-livereload/plugin.js"></script>
|
|
|
|
|
|
|
|
<script src="../gitbook/gitbook-plugin-search/search-engine.js"></script>
|
|
|
|
|
|
|
|
<script src="../gitbook/gitbook-plugin-search/search.js"></script>
|
|
|
|
|
|
|
|
<script src="../gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
|
|
|
|
|
|
|
|
<script src="../gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
|
|
|
|
|
|
|
|
<script src="../gitbook/gitbook-plugin-sharing/buttons.js"></script>
|
|
|
|
|
|
|
|
<script src="../gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
|
|
|
|
|
|
|
|
</body>
|
|
</html>
|