[ADD] record abnormal deletion

This commit is contained in:
GLYASAI 2019-08-29 17:22:04 +08:00
parent 8b90939357
commit 6bdab4c924
8 changed files with 354 additions and 35 deletions

View File

@ -144,7 +144,7 @@ func GetManager() Manager {
return defaultManager
}
// SetManager sets the default manager, usally for unit test
func SetManager(m Manager) {
// SetTestManager sets the default manager for unit test
func SetTestManager(m Manager) {
defaultManager = m
}

View File

@ -33,7 +33,7 @@ func NewClientset(kubecfg string) (kubernetes.Interface, error) {
func NewRainbondFilteredSharedInformerFactory(clientset kubernetes.Interface) informers.SharedInformerFactory {
return informers.NewFilteredSharedInformerFactory(
clientset, 30*time.Second, corev1.NamespaceAll, func(options *metav1.ListOptions) {
options.LabelSelector = "service_id=81f86ea23bb22c37385b8e7edf36f4a9"
options.LabelSelector = "creater=Rainbond"
},
)
}

View File

@ -82,7 +82,7 @@ func NewMasterController(conf option.Config, store store.Storer) (*Controller, e
conf: conf,
pc: pc,
store: store,
mstore: mstore.New(conf.KubeClient),
mstore: mstore.New(conf.KubeClient, store),
stopCh: make(chan struct{}),
cancel: cancel,
ctx: ctx,

View File

@ -30,6 +30,11 @@ type Informer struct {
Pod cache.SharedIndexInformer
}
// Lister contains object listers (stores).
type Lister struct {
Pod PodLister
}
// Run initiates the synchronization of the informers against the API server.
func (i *Informer) Run(stopCh chan struct{}) {
go i.Pod.Run(stopCh)

View File

@ -0,0 +1,39 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package store
import (
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
)
// PodLister makes a store that lists Pod.
type PodLister struct {
cache.Store
}
// ByKey returns the Pod matching key in the local Pod store.
func (pl PodLister) ByKey(key string) (*corev1.Pod, error) {
i, exists, err := pl.GetByKey(key)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return i.(*corev1.Pod), nil
}

View File

@ -12,10 +12,13 @@ import (
"github.com/goodrain/rainbond/event"
"github.com/goodrain/rainbond/util"
k8sutil "github.com/goodrain/rainbond/util/k8s"
astore "github.com/goodrain/rainbond/worker/appm/store"
"github.com/goodrain/rainbond/worker/server/pb"
wutil "github.com/goodrain/rainbond/worker/util"
"github.com/jinzhu/gorm"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
@ -38,22 +41,37 @@ var PodEventTypeLivenessProbeFailed PodEventType = "LivenessProbeFailed"
// PodEventTypeReadinessProbeFailed -
var PodEventTypeReadinessProbeFailed PodEventType = "ReadinessProbeFailed"
// PodEventTypeAbnormalRecovery -
var PodEventTypeAbnormalRecovery PodEventType = "AbnormalRecovery"
// PodEventTypeAbnormalShtdown -
var PodEventTypeAbnormalShtdown PodEventType = "AbnormalShtdown"
//Storer is the interface that wraps the required methods to gather information
type Storer interface {
// Run initiates the synchronization of the controllers
Run(stopCh chan struct{})
ListPodsBySID(sid string) ([]*corev1.Pod, error)
IsSvcClosed(sid string) bool
}
type k8sStore struct {
// informer contains the cache Informers
informers *Informer
informers *Informer
// Lister contains object listers (stores).
listers *Lister
sharedInformer informers.SharedInformerFactory
appmstore astore.Storer
}
// New creates a new Storer
func New(clientset kubernetes.Interface) Storer {
func New(clientset kubernetes.Interface, appmstore astore.Storer) Storer {
store := &k8sStore{
informers: &Informer{},
appmstore: appmstore,
}
// create informers factory, enable and assign required informers
@ -61,7 +79,7 @@ func New(clientset kubernetes.Interface) Storer {
store.informers.Pod = store.sharedInformer.Core().V1().Pods().Informer()
store.informers.Pod.AddEventHandler(podEventHandler(clientset))
store.informers.Pod.AddEventHandler(podEventHandler(clientset, store))
return store
}
@ -72,32 +90,47 @@ func (s *k8sStore) Run(stopCh chan struct{}) {
s.informers.Run(stopCh)
}
func podEventHandler(clientset kubernetes.Interface) cache.ResourceEventHandlerFuncs {
func (s *k8sStore) ListPodsBySID(sid string) ([]*corev1.Pod, error) {
seletor := labels.NewSelector()
rm, err := labels.NewRequirement("service_id", selection.Equals, []string{sid})
if err != nil {
return nil, err
}
seletor.Add(*rm)
return s.sharedInformer.Core().V1().Pods().Lister().List(seletor)
}
func (s *k8sStore) IsSvcClosed(sid string) bool {
appservice := s.appmstore.GetAppService(sid)
if appservice == nil {
return true
}
return appservice.IsClosed()
}
func podEventHandler(clientset kubernetes.Interface, store Storer) cache.ResourceEventHandlerFuncs {
return cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
},
DeleteFunc: func(obj interface{}) {
},
UpdateFunc: func(old, cur interface{}) {
opod := old.(*corev1.Pod)
cpod := cur.(*corev1.Pod)
// judge the state of the event
oldPodstatus := &pb.PodStatus{}
wutil.DescribePodStatus(opod, oldPodstatus)
curPodstatus := &pb.PodStatus{}
wutil.DescribePodStatus(cpod, curPodstatus)
if oldPodstatus.Type == curPodstatus.Type {
pod := obj.(*corev1.Pod)
tenantID, serviceID, _, _ := k8sutil.ExtractLabels(pod.GetLabels())
if hasUnfinishedUserActions(serviceID) {
return
}
// extract the service information from the pod
// _, serviceID, _, _ := k8sutil.ExtractLabels(cpod.GetLabels())
// // ignore user actions
// if hasUnfinishedUserActions(serviceID) {
// logrus.Debugf("service id: %s; has unfinished user actions.", serviceID)
// return
// }
if store.IsSvcClosed(serviceID) {
return
}
_, err := createSystemEvent(tenantID, serviceID, pod.GetName(), PodEventTypeAbnormalShtdown.String(), model.EventStatusSuccess.String())
if err != nil {
logrus.Warningf("pod: %s; type: %s; error creating event: %v", pod.GetName(), PodEventTypeAbnormalShtdown.String(), err)
return
}
},
UpdateFunc: func(old, cur interface{}) {
cpod := cur.(*corev1.Pod)
recordUpdateEvent(clientset, cpod, defDetermineOptType)
},
@ -128,14 +161,14 @@ func recordUpdateEvent(clientset kubernetes.Interface, pod *corev1.Pod, f determ
// the pod in the pending status has no start time and container statuses
for _, cs := range pod.Status.ContainerStatuses {
state := cs.State
if podstatus.Type == pb.PodStatus_ABNORMAL { // TODO: not ready
if podstatus.Type == pb.PodStatus_ABNORMAL || podstatus.Type == pb.PodStatus_NOTREADY { // TODO: not ready
var eventID string
optType, message := f(clientset, pod, &state, k8sutil.DefListEventsByPod)
if optType == "" {
continue
}
if evt == nil { // create event
eventID, err = createSystemEvent(tenantID, serviceID, pod.GetName(), optType.String())
eventID, err = createSystemEvent(tenantID, serviceID, pod.GetName(), optType.String(), model.EventStatusFailure.String())
if err != nil {
logrus.Warningf("pod: %s; type: %s; error creating event: %v", pod.GetName(), optType.String(), err)
continue
@ -160,15 +193,19 @@ func recordUpdateEvent(clientset kubernetes.Interface, pod *corev1.Pod, f determ
logrus.Debugf("Service id: %s; %s.", serviceID, msg)
loggerOpt := event.GetLoggerOption("failure")
if time.Now().Sub(state.Running.StartedAt.Time) > 2*time.Minute {
evt.FinalStatus = model.EventFinalStatusComplete.String()
loggerOpt = event.GetLastLoggerOption()
loggerOpt = event.GetLastLoggerOption() // TODO
_, err := createSystemEvent(tenantID, serviceID, pod.GetName(), PodEventTypeAbnormalRecovery.String(), model.EventStatusSuccess.String())
if err != nil {
logrus.Warningf("pod: %s; type: %s; error creating event: %v", pod.GetName(), PodEventTypeAbnormalRecovery.String(), err)
continue
}
}
logger.Info(msg, loggerOpt)
}
}
}
func createSystemEvent(tenantID, serviceID, targetID, optType string) (eventID string, err error) {
func createSystemEvent(tenantID, serviceID, targetID, optType, status string) (eventID string, err error) {
eventID = util.NewUUID()
et := &model.ServiceEvent{
EventID: eventID,
@ -178,7 +215,7 @@ func createSystemEvent(tenantID, serviceID, targetID, optType string) (eventID s
TargetID: targetID,
UserName: model.UsernameSystem,
OptType: optType,
Status: model.EventStatusFailure.String(),
Status: status,
FinalStatus: model.EventFinalStatusEmpty.String(),
}
if err = db.GetManager().ServiceEventDao().AddModel(et); err != nil {
@ -199,7 +236,7 @@ func defDetermineOptType(clientset kubernetes.Interface, pod *corev1.Pod, state
if strings.Contains(evt.Message, "Liveness probe failed") && state.Waiting != nil {
return PodEventTypeLivenessProbeFailed, evt.Message
}
if strings.Contains(evt.Message, "Readiness probe failed") && state.Running != nil {
if strings.Contains(evt.Message, "Readiness probe failed") {
return PodEventTypeReadinessProbeFailed, evt.Message
}
}

View File

@ -139,7 +139,7 @@ func TestRecordUpdateEvent(t *testing.T) {
// mock db
dbmanager := db.NewMockManager(ctrl)
db.SetManager(dbmanager)
db.SetTestManager(dbmanager)
serviceEventDao := dao.NewMockEventDao(ctrl)
dbmanager.EXPECT().ServiceEventDao().AnyTimes().Return(serviceEventDao)
var evt *model.ServiceEvent
@ -254,7 +254,7 @@ func TestK8sStore_Run(t *testing.T) {
if err != nil {
t.Fatalf("error creating k8s clientset: %s", err.Error())
}
store := New(clientset)
store := New(clientset, nil)
stop := make(chan struct{})
store.Run(stop)

View File

@ -0,0 +1,238 @@
{
"metadata": {
"name": "05459b941f86156e1093c14ced4c4cc4-deployment-695b6bfdc7-w4hwg",
"generateName": "05459b941f86156e1093c14ced4c4cc4-deployment-695b6bfdc7-",
"namespace": "2d15195dcda242a3aab6f89ec56d25f0",
"selfLink": "/api/v1/namespaces/2d15195dcda242a3aab6f89ec56d25f0/pods/05459b941f86156e1093c14ced4c4cc4-deployment-695b6bfdc7-w4hwg",
"uid": "2e5ba52c-ca2f-11e9-943a-ea9b3e44aed0",
"resourceVersion": "357147",
"creationTimestamp": "2019-08-29T07:32:40Z",
"labels": {
"creater": "Rainbond",
"creater_id": "1567063959797319374",
"name": "gr4c4cc4",
"pod-template-hash": "2516269873",
"service_alias": "gr4c4cc4",
"service_id": "05459b941f86156e1093c14ced4c4cc4",
"tenant_id": "2d15195dcda242a3aab6f89ec56d25f0",
"tenant_name": "v9dw2hx5",
"version": "20190829144831"
},
"annotations": {
"rainbond.com/tolerate-unready-endpoints": "true"
},
"ownerReferences": [
{
"apiVersion": "extensions/v1beta1",
"kind": "ReplicaSet",
"name": "05459b941f86156e1093c14ced4c4cc4-deployment-695b6bfdc7",
"uid": "2e3aa713-ca2f-11e9-943a-ea9b3e44aed0",
"controller": true,
"blockOwnerDeletion": true
}
]
},
"spec": {
"volumes": [
{
"name": "default-token-x42zm",
"secret": {
"secretName": "default-token-x42zm",
"defaultMode": 420
}
}
],
"containers": [
{
"name": "05459b941f86156e1093c14ced4c4cc4",
"image": "goodrain.me/05459b941f86156e1093c14ced4c4cc4:20190829144831",
"ports": [
{
"containerPort": 5000,
"protocol": "TCP"
}
],
"env": [
{
"name": "LOGGER_DRIVER_NAME",
"value": "streamlog"
},
{
"name": "PORT",
"value": "5000"
},
{
"name": "PROTOCOL",
"value": "http"
},
{
"name": "MONITOR_PORT",
"value": "5000"
},
{
"name": "CUR_NET",
"value": "midonet"
},
{
"name": "TENANT_ID",
"value": "2d15195dcda242a3aab6f89ec56d25f0"
},
{
"name": "SERVICE_ID",
"value": "05459b941f86156e1093c14ced4c4cc4"
},
{
"name": "MEMORY_SIZE",
"value": "small"
},
{
"name": "SERVICE_NAME",
"value": "gr4c4cc4"
},
{
"name": "SERVICE_EXTEND_METHOD",
"value": "stateless"
},
{
"name": "SERVICE_POD_NUM",
"value": "1"
},
{
"name": "HOST_IP",
"valueFrom": {
"fieldRef": {
"apiVersion": "v1",
"fieldPath": "status.hostIP"
}
}
},
{
"name": "POD_IP",
"valueFrom": {
"fieldRef": {
"apiVersion": "v1",
"fieldPath": "status.podIP"
}
}
}
],
"resources": {
"limits": {
"cpu": "0",
"memory": "4Mi"
},
"requests": {
"cpu": "0",
"memory": "4Mi"
}
},
"volumeMounts": [
{
"name": "default-token-x42zm",
"readOnly": true,
"mountPath": "/var/run/secrets/kubernetes.io/serviceaccount"
}
],
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File",
"imagePullPolicy": "IfNotPresent"
}
],
"restartPolicy": "Always",
"terminationGracePeriodSeconds": 30,
"dnsPolicy": "ClusterFirst",
"serviceAccountName": "default",
"serviceAccount": "default",
"nodeName": "469bb7ef-e5b5-4f53-a08a-c87b16fa6686",
"securityContext": {},
"affinity": {
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "beta.kubernetes.io/os",
"operator": "NotIn",
"values": [
"windows"
]
}
]
}
]
}
}
},
"schedulerName": "default-scheduler",
"tolerations": [
{
"key": "node.kubernetes.io/not-ready",
"operator": "Exists",
"effect": "NoExecute",
"tolerationSeconds": 300
},
{
"key": "node.kubernetes.io/unreachable",
"operator": "Exists",
"effect": "NoExecute",
"tolerationSeconds": 300
}
]
},
"status": {
"phase": "Running",
"conditions": [
{
"type": "Ready",
"status": "False",
"lastProbeTime": null,
"lastTransitionTime": "2019-08-29T07: 32: 40Z",
"reason": "ContainersNotReady",
"message": "containers with unready status: []"
},
{
"type": "Initialized",
"status": "True",
"lastProbeTime": null,
"lastTransitionTime": "2019-08-29T07: 32: 40Z"
},
{
"type": "PodScheduled",
"status": "True",
"lastProbeTime": null,
"lastTransitionTime": "2019-08-29T07: 32: 40Z"
}
],
"hostIP": "192.168.2.202",
"podIP": "10.0.14.220",
"startTime": "2019-08-29T07: 32: 40Z",
"containerStatuses": [
{
"name": "05459b941f86156e1093c14ced4c4cc4",
"state": {
"waiting": {
"reason": "RunContainerError",
"message": "failed to start container \"b5a5e5a8a3a5274adf4d9cbe14827d1e483fb2ec0e791e8691db9a69bca2f8a3\": Error response from daemon: OCI runtime create failed: container_linux.go:348: starting container process caused \"process_linux.go:301: running exec setns process for init caused \\\"signal: killed\\\"\": unknown"
}
},
"lastState": {
"terminated": {
"exitCode": 128,
"reason": "ContainerCannotRun",
"message": "OCI runtime create failed: container_linux.go:348: starting container process caused \"process_linux.go:301: running exec setns process for init caused \\\"signal: killed\\\"\": unknown",
"startedAt": "2019-08-29T07:32:51Z",
"finishedAt": "2019-08-29T07:32:51Z",
"containerID": "docker://b5a5e5a8a3a5274adf4d9cbe14827d1e483fb2ec0e791e8691db9a69bca2f8a3"
}
},
"ready": false,
"restartCount": 0,
"image": "goodrain.me/05459b941f86156e1093c14ced4c4cc4:20190829144831",
"imageID": "docker-pullable://goodrain.me/05459b941f86156e1093c14ced4c4cc4@sha256:96f866c788862a9ba29b1b0377cb856136b73e02ed4f26a5b4b338f709d7d5ce",
"containerID": "docker://b5a5e5a8a3a5274adf4d9cbe14827d1e483fb2ec0e791e8691db9a69bca2f8a3"
}
],
"qosClass": "Burstable"
}
}