Rainbond/api/handler/tenant.go

// Copyright (C) 2014-2018 Goodrain Co., Ltd.
// RAINBOND, Application Management Platform

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

package handler

import (
	"context"
	"fmt"
	"k8s.io/apimachinery/pkg/fields"
	"sort"
	"strings"
	"time"

	"github.com/goodrain/rainbond/api/client/prometheus"
	"github.com/goodrain/rainbond/api/model"
	api_model "github.com/goodrain/rainbond/api/model"
	"github.com/goodrain/rainbond/api/util"
	"github.com/goodrain/rainbond/api/util/bcode"
	"github.com/goodrain/rainbond/cmd/api/option"
	"github.com/goodrain/rainbond/db"
	dbmodel "github.com/goodrain/rainbond/db/model"
	mqclient "github.com/goodrain/rainbond/mq/client"
	"github.com/goodrain/rainbond/pkg/apis/rainbond/v1alpha1"
	rutil "github.com/goodrain/rainbond/util"
	"github.com/goodrain/rainbond/worker/client"
	"github.com/goodrain/rainbond/worker/server/pb"
	"github.com/pkg/errors"
	"github.com/sirupsen/logrus"
	corev1 "k8s.io/api/core/v1"
	v1 "k8s.io/api/core/v1"
	k8sErrors "k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/types"
	"k8s.io/client-go/kubernetes"
	k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
)

//TenantAction tenant act
type TenantAction struct {
	MQClient                  mqclient.MQClient
	statusCli                 *client.AppRuntimeSyncClient
	OptCfg                    *option.Config
	kubeClient                *kubernetes.Clientset
	cacheClusterResourceStats *ClusterResourceStats
	cacheTime                 time.Time
	prometheusCli             prometheus.Interface
	k8sClient                 k8sclient.Client
	resources                 map[string]k8sclient.Object
}

//CreateTenManager create Manger
func CreateTenManager(mqc mqclient.MQClient, statusCli *client.AppRuntimeSyncClient,
	optCfg *option.Config,
	kubeClient *kubernetes.Clientset,
	prometheusCli prometheus.Interface,
	k8sClient k8sclient.Client) *TenantAction {

	resources := map[string]k8sclient.Object{
		"helmApp": &v1alpha1.HelmApp{},
		"service": &corev1.Service{},
	}

	return &TenantAction{
		MQClient:      mqc,
		statusCli:     statusCli,
		OptCfg:        optCfg,
		kubeClient:    kubeClient,
		prometheusCli: prometheusCli,
		k8sClient:     k8sClient,
		resources:     resources,
	}
}

//BindTenantsResource query tenant resource used and sort
func (t *TenantAction) BindTenantsResource(source []*dbmodel.Tenants) api_model.TenantList {
	var list api_model.TenantList
	var resources = make(map[string]*pb.TenantResource, len(source))
	if len(source) == 1 {
		re, err := t.statusCli.GetTenantResource(source[0].UUID)
		if err != nil {
			logrus.Errorf("get tenant %s resource failure %s", source[0].UUID, err.Error())
		}
		if re != nil {
			resources[source[0].UUID] = re
		}
	} else {
		res, err := t.statusCli.GetAllTenantResource()
		if err != nil {
			logrus.Errorf("get all tenant resource failure %s", err.Error())
		}
		if res != nil {
			resources = res.Resources
		}
	}
	for i, ten := range source {
		var item = &api_model.TenantAndResource{
			Tenants: *source[i],
		}
		re := resources[ten.UUID]
		if re != nil {
			item.CPULimit = re.CpuLimit
			item.CPURequest = re.CpuRequest
			item.MemoryLimit = re.MemoryLimit
			item.MemoryRequest = re.MemoryRequest
			item.RunningAppNum = re.RunningAppNum
			item.RunningAppInternalNum = re.RunningAppInternalNum
			item.RunningAppThirdNum = re.RunningAppThirdNum
			item.RunningApplications = re.RunningApplications
		}
		list.Add(item)
	}
	sort.Sort(list)
	return list
}

//GetTenants get tenants
func (t *TenantAction) GetTenants(query string) ([]*dbmodel.Tenants, error) {
	tenants, err := db.GetManager().TenantDao().GetALLTenants(query)
	if err != nil {
		return nil, err
	}
	return tenants, err
}

//GetTenantsByEid GetTenantsByEid
func (t *TenantAction) GetTenantsByEid(eid, query string) ([]*dbmodel.Tenants, error) {
	tenants, err := db.GetManager().TenantDao().GetTenantByEid(eid, query)
	if err != nil {
		return nil, err
	}
	return tenants, err
}

//UpdateTenant update tenant info
func (t *TenantAction) UpdateTenant(tenant *dbmodel.Tenants) error {
	return db.GetManager().TenantDao().UpdateModel(tenant)
}

// DeleteTenant deletes tenant based on the given tenantID.
//
// tenant can only be deleted without service or plugin
func (t *TenantAction) DeleteTenant(ctx context.Context, tenantID string) error {
	// check if there are still services
	services, err := db.GetManager().TenantServiceDao().ListServicesByTenantID(tenantID)
	if err != nil {
		return err
	}
	if len(services) > 0 {
		for _, service := range services {
			GetServiceManager().TransServieToDelete(ctx, tenantID, service.ServiceID)
		}
	}

	// check if there are still plugins
	plugins, err := db.GetManager().TenantPluginDao().ListByTenantID(tenantID)
	if err != nil {
		return err
	}
	if len(plugins) > 0 {
		for _, plugin := range plugins {
			GetPluginManager().DeletePluginAct(plugin.PluginID, tenantID)
		}
	}

	tenant, err := db.GetManager().TenantDao().GetTenantByUUID(tenantID)
	if err != nil {
		return err
	}
	oldStatus := tenant.Status
	var rollback = func() {
		tenant.Status = oldStatus
		_ = db.GetManager().TenantDao().UpdateModel(tenant)
	}
	tenant.Status = dbmodel.TenantStatusDeleting.String()
	if err := db.GetManager().TenantDao().UpdateModel(tenant); err != nil {
		return err
	}

	// delete namespace in k8s
	err = t.MQClient.SendBuilderTopic(mqclient.TaskStruct{
		TaskType: "delete_tenant",
		Topic:    mqclient.WorkerTopic,
		TaskBody: map[string]string{
			"tenant_id": tenantID,
		},
	})
	if err != nil {
		rollback()
		logrus.Error("send task 'delete tenant'", err)
		return err
	}

	return nil
}

//TotalMemCPU StatsMemCPU
func (t *TenantAction) TotalMemCPU(services []*dbmodel.TenantServices) (*api_model.StatsInfo, error) {
	cpus := 0
	mem := 0
	for _, service := range services {
		logrus.Debugf("service is %d, cpus is %d, mem is %v", service.ID, service.ContainerCPU, service.ContainerMemory)
		cpus += service.ContainerCPU
		mem += service.ContainerMemory
	}
	si := &api_model.StatsInfo{
		CPU: cpus,
		MEM: mem,
	}
	return si, nil
}

//GetTenantsName get tenants name
func (t *TenantAction) GetTenantsName() ([]string, error) {
	tenants, err := db.GetManager().TenantDao().GetALLTenants("")
	if err != nil {
		return nil, err
	}
	var result []string
	for _, v := range tenants {
		result = append(result, strings.ToLower(v.Name))
	}
	return result, err
}

//GetTenantsByName get tenants
func (t *TenantAction) GetTenantsByName(name string) (*dbmodel.Tenants, error) {
	tenant, err := db.GetManager().TenantDao().GetTenantIDByName(name)
	if err != nil {
		return nil, err
	}
	return tenant, err
}

//GetTenantsByUUID get tenants
func (t *TenantAction) GetTenantsByUUID(uuid string) (*dbmodel.Tenants, error) {
	tenant, err := db.GetManager().TenantDao().GetTenantByUUID(uuid)
	if err != nil {
		return nil, err
	}

	return tenant, err
}

//StatsMemCPU StatsMemCPU
func (t *TenantAction) StatsMemCPU(services []*dbmodel.TenantServices) (*api_model.StatsInfo, error) {
	cpus := 0
	mem := 0
	for _, service := range services {
		status := t.statusCli.GetStatus(service.ServiceID)
		if t.statusCli.IsClosedStatus(status) {
			continue
		}
		cpus += service.ContainerCPU
		mem += service.ContainerMemory
	}
	si := &api_model.StatsInfo{
		CPU: cpus,
		MEM: mem,
	}
	return si, nil
}

// QueryResult contains result data for a query.
type QueryResult struct {
	Data struct {
		Type   string                   `json:"resultType"`
		Result []map[string]interface{} `json:"result"`
	} `json:"data"`
	Status string `json:"status"`
}

//GetTenantsResources Gets the resource usage of the specified tenant.
func (t *TenantAction) GetTenantsResources(ctx context.Context, tr *api_model.TenantResources) (map[string]map[string]interface{}, error) {
	ids, err := db.GetManager().TenantDao().GetTenantIDsByNames(tr.Body.TenantNames)
	if err != nil {
		return nil, err
	}
	limits, err := db.GetManager().TenantDao().GetTenantLimitsByNames(tr.Body.TenantNames)
	if err != nil {
		return nil, err
	}
	services, err := db.GetManager().TenantServiceDao().GetServicesByTenantIDs(ids)
	if err != nil {
		return nil, err
	}
	var serviceTenantCount = make(map[string]int, len(ids))
	for _, s := range services {
		serviceTenantCount[s.TenantID]++
	}
	// get cluster resources
	clusterStats, err := t.GetAllocatableResources(ctx)
	if err != nil {
		return nil, fmt.Errorf("error getting allocatalbe cpu and memory: %v", err)
	}
	var result = make(map[string]map[string]interface{}, len(ids))
	var resources = make(map[string]*pb.TenantResource, len(ids))
	if len(ids) == 1 {
		re, err := t.statusCli.GetTenantResource(ids[0])
		if err != nil {
			logrus.Errorf("get tenant %s resource failure %s", ids[0], err.Error())
		}
		if re != nil {
			resources[ids[0]] = re
		}
	} else {
		res, err := t.statusCli.GetAllTenantResource()
		if err != nil {
			logrus.Errorf("get all tenant resource failure %s", err.Error())
		}
		if res != nil {
			resources = res.Resources
		}
	}
	for _, tenantID := range ids {
		var limitMemory int64
		if l, ok := limits[tenantID]; ok && l != 0 {
			limitMemory = int64(l)
		} else {
			limitMemory = clusterStats.AllMemory
		}
		result[tenantID] = map[string]interface{}{
			"tenant_id":           tenantID,
			"limit_memory":        limitMemory,
			"limit_cpu":           clusterStats.AllCPU,
			"service_total_num":   serviceTenantCount[tenantID],
			"disk":                0,
			"service_running_num": 0,
			"cpu":                 0,
			"memory":              0,
			"app_running_num":     0,
		}
		tr, _ := resources[tenantID]
		if tr != nil {
			result[tenantID]["service_running_num"] = tr.RunningAppNum
			result[tenantID]["cpu"] = tr.CpuRequest
			result[tenantID]["memory"] = tr.MemoryRequest
			result[tenantID]["app_running_num"] = tr.RunningApplications
		}
	}
	//query disk used in prometheus
	query := fmt.Sprintf(`sum(app_resource_appfs{tenant_id=~"%s"}) by(tenant_id)`, strings.Join(ids, "|"))
	metric := t.prometheusCli.GetMetric(query, time.Now())
	for _, mv := range metric.MetricData.MetricValues {
		var tenantID = mv.Metadata["tenant_id"]
		var disk int
		if mv.Sample != nil {
			disk = int(mv.Sample.Value() / 1024)
		}
		if tenantID != "" {
			result[tenantID]["disk"] = disk
		}
	}
	return result, nil
}

//TenantResourceStats tenant resource stats
type TenantResourceStats struct {
	TenantID            string `json:"tenant_id,omitempty"`
	CPURequest          int64  `json:"cpu_request,omitempty"`
	CPULimit            int64  `json:"cpu_limit,omitempty"`
	MemoryRequest       int64  `json:"memory_request,omitempty"`
	MemoryLimit         int64  `json:"memory_limit,omitempty"`
	RunningAppNum       int64  `json:"running_app_num"`
	UnscdCPUReq         int64  `json:"unscd_cpu_req,omitempty"`
	UnscdCPULimit       int64  `json:"unscd_cpu_limit,omitempty"`
	UnscdMemoryReq      int64  `json:"unscd_memory_req,omitempty"`
	UnscdMemoryLimit    int64  `json:"unscd_memory_limit,omitempty"`
	RunningApplications int64  `json:"running_applications"`
}

//GetTenantResource get tenant resource
func (t *TenantAction) GetTenantResource(tenantID string) (ts TenantResourceStats, err error) {
	tr, err := t.statusCli.GetTenantResource(tenantID)
	if err != nil {
		return ts, err
	}
	ts.TenantID = tenantID
	ts.CPULimit = tr.CpuLimit
	ts.CPURequest = tr.CpuRequest
	ts.MemoryLimit = tr.MemoryLimit
	ts.MemoryRequest = tr.MemoryRequest
	ts.RunningAppNum = tr.RunningAppNum
	ts.RunningApplications = tr.RunningApplications
	return
}

type PodResourceInformation struct {
	NodeName         string
	ServiceID        string
	AppID            string
	Memory           int64
	ResourceVersion  string
	Cpu              int64
	StorageEphemeral int64
}

//ClusterResourceStats cluster resource stats
type ClusterResourceStats struct {
	AllCPU        int64
	AllMemory     int64
	RequestCPU    int64
	RequestMemory int64
	NodePods      []PodResourceInformation
	AllPods       int64
}

func (t *TenantAction) initClusterResource(ctx context.Context) error {
	if t.cacheClusterResourceStats == nil || t.cacheTime.Add(time.Minute*3).Before(time.Now()) {
		var crs ClusterResourceStats
		nodes, err := t.kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
		if err != nil {
			logrus.Errorf("get cluster nodes failure %s", err.Error())
			return err
		}
		usedNodeList := make([]v1.Node, len(nodes.Items))
		for i, node := range nodes.Items {
			// check if node contains taints
			if containsTaints(&node) {
				logrus.Debugf("[GetClusterInfo] node(%s) contains NoSchedule taints", node.GetName())
				continue
			}
			if node.Spec.Unschedulable {
				continue
			}
			usedNodeList[i] = node
			for _, c := range node.Status.Conditions {
				if c.Type == v1.NodeReady && c.Status != v1.ConditionTrue {
					continue
				}
			}
			crs.AllMemory += node.Status.Allocatable.Memory().Value() / (1024 * 1024)
			crs.AllCPU += node.Status.Allocatable.Cpu().MilliValue()
		}
		var nodePodsList []PodResourceInformation
		for i := range usedNodeList {
			node := usedNodeList[i]
			time.Sleep(50 * time.Microsecond)
			podList, err := t.kubeClient.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{
				FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}).String()})
			if err != nil {
				logrus.Errorf("get node %v pods error:%v", node.Name, err)
				continue
			}
			crs.AllPods += int64(len(podList.Items))
			for _, pod := range podList.Items {
				var nodePod PodResourceInformation
				nodePod.NodeName = node.Name
				if componentID, ok := pod.Labels["service_id"]; ok {
					nodePod.ServiceID = componentID
				}
				if appID, ok := pod.Labels["app_id"]; ok {
					nodePod.AppID = appID
				}
				nodePod.ResourceVersion = pod.ResourceVersion
				for _, c := range pod.Spec.Containers {
					nodePod.Memory += c.Resources.Requests.Memory().Value()
					nodePod.Cpu += c.Resources.Requests.Cpu().MilliValue()
					nodePod.StorageEphemeral += c.Resources.Requests.StorageEphemeral().Value()
				}
				nodePodsList = append(nodePodsList, nodePod)
			}
		}
		crs.NodePods = nodePodsList
		t.cacheClusterResourceStats = &crs
		t.cacheTime = time.Now()
	}
	return nil
}

// GetAllocatableResources returns allocatable cpu and memory (MB)
func (t *TenantAction) GetAllocatableResources(ctx context.Context) (*ClusterResourceStats, error) {
	var crs ClusterResourceStats
	if t.initClusterResource(ctx) != nil {
		return &crs, nil
	}
	ts, err := t.statusCli.GetAllTenantResource()
	if err != nil {
		logrus.Errorf("get tenant resource failure %s", err.Error())
	}
	re := t.cacheClusterResourceStats
	if ts != nil {
		crs.RequestCPU = 0
		crs.RequestMemory = 0
		for _, re := range ts.Resources {
			crs.RequestCPU += re.CpuRequest
			crs.RequestMemory += re.MemoryRequest
		}
	}
	return re, nil
}

//GetServicesResources Gets the resource usage of the specified service.
func (t *TenantAction) GetServicesResources(tr *api_model.ServicesResources) (re map[string]map[string]interface{}, err error) {
	status := t.statusCli.GetStatuss(strings.Join(tr.Body.ServiceIDs, ","))
	var running, closed []string
	for k, v := range status {
		if !t.statusCli.IsClosedStatus(v) {
			running = append(running, k)
		} else {
			closed = append(closed, k)
		}
	}

	podList, err := t.statusCli.GetMultiServicePods(running)
	if err != nil {
		return nil, err
	}

	res := make(map[string]map[string]interface{})
	for serviceID, item := range podList.ServicePods {
		pods := item.NewPods
		pods = append(pods, item.OldPods...)
		var memory, cpu int64
		for _, pod := range pods {
			for _, c := range pod.Containers {
				memory += c.MemoryRequest
				cpu += c.CpuRequest
			}
		}
		res[serviceID] = map[string]interface{}{"memory": memory / 1024 / 1024, "cpu": cpu}
	}

	for _, c := range closed {
		res[c] = map[string]interface{}{"memory": 0, "cpu": 0}
	}

	disks := GetServicesDiskDeprecated(tr.Body.ServiceIDs, t.prometheusCli)
	for serviceID, disk := range disks {
		if _, ok := res[serviceID]; ok {
			res[serviceID]["disk"] = disk / 1024
		} else {
			res[serviceID] = make(map[string]interface{})
			res[serviceID]["disk"] = disk / 1024
		}
	}
	return res, nil
}

func (t *TenantAction) getPodNums(serviceID string) int {
	pods, err := t.statusCli.GetAppPods(context.TODO(), &pb.ServiceRequest{
		ServiceId: serviceID,
	})

	if err != nil {
		logrus.Warningf("get app pods: %v", err)
		return 0
	}

	return len(pods.OldPods) + len(pods.NewPods)
}

//TenantsSum TenantsSum
func (t *TenantAction) TenantsSum() (int, error) {
	s, err := db.GetManager().TenantDao().GetALLTenants("")
	if err != nil {
		return 0, err
	}
	return len(s), nil
}

//GetProtocols GetProtocols
func (t *TenantAction) GetProtocols() ([]*dbmodel.RegionProcotols, *util.APIHandleError) {
	return []*dbmodel.RegionProcotols{
		{
			ProtocolGroup: "http",
			ProtocolChild: "http",
			APIVersion:    "v2",
			IsSupport:     true,
		},
		{
			ProtocolGroup: "http",
			ProtocolChild: "grpc",
			APIVersion:    "v2",
			IsSupport:     true,
		}, {
			ProtocolGroup: "stream",
			ProtocolChild: "tcp",
			APIVersion:    "v2",
			IsSupport:     true,
		}, {
			ProtocolGroup: "stream",
			ProtocolChild: "udp",
			APIVersion:    "v2",
			IsSupport:     true,
		}, {
			ProtocolGroup: "stream",
			ProtocolChild: "mysql",
			APIVersion:    "v2",
			IsSupport:     true,
		},
	}, nil
}

//TransPlugins TransPlugins
func (t *TenantAction) TransPlugins(tenantID, tenantName, fromTenant string, pluginList []string) *util.APIHandleError {
	tenantInfo, err := db.GetManager().TenantDao().GetTenantIDByName(fromTenant)
	if err != nil {
		return util.CreateAPIHandleErrorFromDBError("get tenant infos", err)
	}
	goodrainID := tenantInfo.UUID
	tx := db.GetManager().Begin()
	defer func() {
		if r := recover(); r != nil {
			logrus.Errorf("Unexpected panic occurred, rollback transaction: %v", r)
			tx.Rollback()
		}
	}()
	for _, p := range pluginList {
		pluginInfo, err := db.GetManager().TenantPluginDao().GetPluginByID(p, goodrainID)
		if err != nil {
			tx.Rollback()
			return util.CreateAPIHandleErrorFromDBError("get plugin infos", err)
		}
		pluginInfo.TenantID = tenantID
		pluginInfo.Domain = tenantName
		pluginInfo.ID = 0
		err = db.GetManager().TenantPluginDaoTransactions(tx).AddModel(pluginInfo)
		if err != nil {
			if !strings.Contains(err.Error(), "is exist") {
				tx.Rollback()
				return util.CreateAPIHandleErrorFromDBError("add plugin Info", err)
			}
		}
	}
	if err := tx.Commit().Error; err != nil {
		return util.CreateAPIHandleErrorFromDBError("trans plugins infos", err)
	}
	return nil
}

// GetServicesStatus returns a list of service status matching ids.
func (t *TenantAction) GetServicesStatus(ids string) map[string]string {
	return t.statusCli.GetStatuss(ids)
}

//IsClosedStatus checks if the status is closed status.
func (t *TenantAction) IsClosedStatus(status string) bool {
	return t.statusCli.IsClosedStatus(status)
}

//GetClusterResource get cluster resource
func (t *TenantAction) GetClusterResource(ctx context.Context) *ClusterResourceStats {
	if t.initClusterResource(ctx) != nil {
		return nil
	}
	return t.cacheClusterResourceStats
}

// CheckResourceName checks resource name.
func (t *TenantAction) CheckResourceName(ctx context.Context, namespace string, req *model.CheckResourceNameReq) (*model.CheckResourceNameResp, error) {
	obj, ok := t.resources[req.Type]
	if !ok {
		return nil, bcode.NewBadRequest("unsupported resource: " + req.Type)
	}

	nctx, cancel := context.WithTimeout(ctx, 5*time.Second)
	defer cancel()

	retries := 3
	for i := 0; i < retries; i++ {
		if err := t.k8sClient.Get(nctx, types.NamespacedName{Namespace: namespace, Name: req.Name}, obj); err != nil {
			if k8sErrors.IsNotFound(err) {
				break
			}
			return nil, errors.Wrap(err, "ensure app name")
		}
		req.Name += "-" + rutil.NewUUID()[:5]
	}

	return &model.CheckResourceNameResp{
		Name: req.Name,
	}, nil
}