mirror of
https://gitee.com/rainbond/Rainbond.git
synced 2024-12-04 04:38:04 +08:00
245 lines
6.9 KiB
Go
245 lines
6.9 KiB
Go
// Copyright (C) 2014-2018 Goodrain Co., Ltd.
|
|
// RAINBOND, Application Management Platform
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version. For any non-GPL usage of Rainbond,
|
|
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
|
|
// must be obtained first.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package connect
|
|
|
|
import (
|
|
"errors"
|
|
"strings"
|
|
"time"
|
|
|
|
dis "github.com/goodrain/rainbond/eventlog/cluster/discover"
|
|
"github.com/goodrain/rainbond/eventlog/cluster/distribution"
|
|
"github.com/goodrain/rainbond/eventlog/conf"
|
|
"github.com/goodrain/rainbond/eventlog/store"
|
|
|
|
"golang.org/x/net/context"
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
"github.com/goodrain/rainbond/eventlog/db"
|
|
|
|
"strconv"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/pebbe/zmq4"
|
|
)
|
|
|
|
type Sub struct {
|
|
conf conf.PubSubConf
|
|
log *logrus.Entry
|
|
cancel func()
|
|
context context.Context
|
|
storemanager store.Manager
|
|
subMessageChan chan [][]byte
|
|
listenErr chan error
|
|
discover dis.Manager
|
|
instanceMap map[string]*dis.Instance
|
|
subClient map[string]*SubClient
|
|
mapLock sync.Mutex
|
|
subLock sync.Mutex
|
|
distribution *distribution.Distribution
|
|
}
|
|
type SubClient struct {
|
|
context *zmq4.Context
|
|
socket *zmq4.Socket
|
|
lock sync.Mutex
|
|
quit chan interface{}
|
|
}
|
|
|
|
//NewSub 创建zmq sub客户端
|
|
func NewSub(conf conf.PubSubConf, log *logrus.Entry, storeManager store.Manager, discover dis.Manager, distribution *distribution.Distribution) *Sub {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
return &Sub{
|
|
conf: conf,
|
|
log: log,
|
|
cancel: cancel,
|
|
context: ctx,
|
|
storemanager: storeManager,
|
|
listenErr: make(chan error),
|
|
discover: discover,
|
|
instanceMap: make(map[string]*dis.Instance),
|
|
subClient: make(map[string]*SubClient),
|
|
subMessageChan: storeManager.SubMessageChan(),
|
|
distribution: distribution,
|
|
}
|
|
}
|
|
|
|
//Run 执行
|
|
func (s *Sub) Run() error {
|
|
go s.instanceListen()
|
|
go s.checkHealth()
|
|
s.log.Info("Message Sub manager start")
|
|
return nil
|
|
}
|
|
|
|
//Stop 停止
|
|
func (s *Sub) Stop() {
|
|
s.cancel()
|
|
s.subLock.Lock()
|
|
defer s.subLock.Unlock()
|
|
for _, v := range s.subClient {
|
|
close(v.quit)
|
|
}
|
|
s.log.Info("Message Sub manager stop")
|
|
}
|
|
|
|
func (s *Sub) checkHealth() {
|
|
tike := time.Tick(time.Minute * 2)
|
|
for {
|
|
select {
|
|
case <-s.context.Done():
|
|
return
|
|
case <-tike:
|
|
}
|
|
var unHealth []string
|
|
s.mapLock.Lock()
|
|
s.subLock.Lock()
|
|
for k, ins := range s.instanceMap {
|
|
if _, ok := s.subClient[k]; !ok {
|
|
unHealth = append(unHealth, ins.HostName)
|
|
go s.listen(ins)
|
|
}
|
|
}
|
|
s.subLock.Unlock()
|
|
s.mapLock.Unlock()
|
|
if len(unHealth) == 0 {
|
|
s.log.Debug("sub manager check listen client health: All health.")
|
|
} else {
|
|
s.log.Info("sub manager check listen client health: UnHealth instances:", unHealth)
|
|
}
|
|
|
|
}
|
|
}
|
|
func (s *Sub) instanceListen() {
|
|
for {
|
|
select {
|
|
case instance := <-s.discover.MonitorAddInstances():
|
|
key := fmt.Sprintf("%s:%d", instance.HostIP, instance.PubPort)
|
|
s.mapLock.Lock()
|
|
if _, ok := s.instanceMap[key]; !ok {
|
|
go s.listen(instance)
|
|
}
|
|
s.instanceMap[key] = instance
|
|
s.mapLock.Unlock()
|
|
case instance := <-s.discover.MonitorDelInstances():
|
|
s.log.Debugf("Sub manager receive a del instance %s", instance.HostID)
|
|
key := fmt.Sprintf("%s:%d", instance.HostIP, instance.PubPort)
|
|
s.mapLock.Lock()
|
|
if _, ok := s.instanceMap[key]; ok {
|
|
delete(s.instanceMap, key)
|
|
}
|
|
s.unlisten(instance)
|
|
s.mapLock.Unlock()
|
|
case <-s.context.Done():
|
|
s.mapLock.Lock()
|
|
s.instanceMap = nil
|
|
s.mapLock.Unlock()
|
|
s.log.Debug("Instance listen manager stop.")
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Sub) listen(ins *dis.Instance) {
|
|
chQuit := make(chan interface{})
|
|
chErr := make(chan error, 2)
|
|
newInstanceListen := func(sock *zmq4.Socket, instance string) {
|
|
socketHandler := func(state zmq4.State) error {
|
|
msgs, err := sock.RecvMessageBytes(0)
|
|
if err != nil {
|
|
s.log.Error("sub client receive message error.", err.Error())
|
|
return err
|
|
}
|
|
if len(msgs) == 2 {
|
|
if string(msgs[0]) == string(db.EventMessage) || string(msgs[0]) == string(db.ServiceMonitorMessage) || string(msgs[0]) == string(db.ServiceNewMonitorMessage) {
|
|
s.subMessageChan <- msgs
|
|
} else if string(msgs[0]) == string(db.MonitorMessage) {
|
|
//s.log.Debug("Receive a monitor message ", string(msgs[1]))
|
|
data := strings.Split(string(msgs[1]), ",")
|
|
if len(data) == 3 {
|
|
serviceSize, _ := strconv.Atoi(data[1])
|
|
logSize, _ := strconv.Atoi(data[2])
|
|
s.distribution.Update(db.MonitorData{InstanceID: data[0], ServiceSize: serviceSize, LogSizePeerM: int64(logSize)})
|
|
} else {
|
|
s.log.Error("cluster sub receive a message protocol error.")
|
|
}
|
|
} else {
|
|
s.log.Error("cluster sub receive a message protocol error.")
|
|
}
|
|
} else {
|
|
s.log.Error("cluster sub receive a message protocol error.")
|
|
}
|
|
return nil
|
|
}
|
|
quitHandler := func(interface{}) error {
|
|
s.log.Infof("Sub instance %s quit.", instance)
|
|
sock.Close()
|
|
return errors.New("Quit")
|
|
}
|
|
reactor := zmq4.NewReactor()
|
|
reactor.AddSocket(sock, zmq4.POLLIN, socketHandler)
|
|
reactor.AddChannel(chQuit, 0, quitHandler)
|
|
err := reactor.Run(100 * time.Millisecond)
|
|
chErr <- err
|
|
}
|
|
for {
|
|
context, err := zmq4.NewContext()
|
|
if err != nil {
|
|
s.log.Error("create sub context error", err.Error())
|
|
time.Sleep(time.Second * 5)
|
|
continue
|
|
}
|
|
subscriber, err := context.NewSocket(zmq4.SUB)
|
|
if err != nil {
|
|
s.log.Errorf("create sub client to instance %s error %s", ins.HostName, err.Error())
|
|
time.Sleep(time.Second * 5)
|
|
continue
|
|
}
|
|
err = subscriber.Connect(fmt.Sprintf("tcp://%s:%d", ins.HostIP, ins.PubPort))
|
|
if err != nil {
|
|
s.log.Errorf("sub client connect to instance %s host %s port %d error", ins.HostName, ins.HostIP, ins.PubPort)
|
|
time.Sleep(time.Second * 5)
|
|
continue
|
|
}
|
|
subscriber.SetSubscribe("")
|
|
go newInstanceListen(subscriber, ins.HostName)
|
|
|
|
key := fmt.Sprintf("%s:%d", ins.HostIP, ins.PubPort)
|
|
s.subLock.Lock()
|
|
client := &SubClient{socket: subscriber, context: context, quit: chQuit}
|
|
s.subClient[key] = client
|
|
s.subLock.Unlock()
|
|
s.log.Infof("message client sub instance %s ", ins.HostName)
|
|
break
|
|
}
|
|
}
|
|
|
|
func (s *Sub) unlisten(ins *dis.Instance) {
|
|
s.subLock.Lock()
|
|
defer s.subLock.Unlock()
|
|
key := fmt.Sprintf("%s:%d", ins.HostIP, ins.PubPort)
|
|
if client, ok := s.subClient[key]; ok {
|
|
client.quit <- true
|
|
close(client.quit)
|
|
delete(s.subClient, key)
|
|
}
|
|
}
|