he3pg/contrib/sepgsql/uavc.c
2022-08-31 15:03:14 +08:00

521 lines
14 KiB
C

/* -------------------------------------------------------------------------
*
* contrib/sepgsql/uavc.c
*
* Implementation of userspace access vector cache; that enables to cache
* access control decisions recently used, and reduce number of kernel
* invocations to avoid unnecessary performance hit.
*
* Copyright (c) 2011-2021, PostgreSQL Global Development Group
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_proc.h"
#include "commands/seclabel.h"
#include "common/hashfn.h"
#include "sepgsql.h"
#include "storage/ipc.h"
#include "utils/guc.h"
#include "utils/memutils.h"
/*
* avc_cache
*
* It enables to cache access control decision (and behavior on execution of
* trusted procedure, db_procedure class only) for a particular pair of
* security labels and object class in userspace.
*/
typedef struct
{
uint32 hash; /* hash value of this cache entry */
char *scontext; /* security context of the subject */
char *tcontext; /* security context of the target */
uint16 tclass; /* object class of the target */
uint32 allowed; /* permissions to be allowed */
uint32 auditallow; /* permissions to be audited on allowed */
uint32 auditdeny; /* permissions to be audited on denied */
bool permissive; /* true, if permissive rule */
bool hot_cache; /* true, if recently referenced */
bool tcontext_is_valid;
/* true, if tcontext is valid */
char *ncontext; /* temporary scontext on execution of trusted
* procedure, or NULL elsewhere */
} avc_cache;
/*
* Declaration of static variables
*/
#define AVC_NUM_SLOTS 512
#define AVC_NUM_RECLAIM 16
#define AVC_DEF_THRESHOLD 384
static MemoryContext avc_mem_cxt;
static List *avc_slots[AVC_NUM_SLOTS]; /* avc's hash buckets */
static int avc_num_caches; /* number of caches currently used */
static int avc_lru_hint; /* index of the buckets to be reclaimed next */
static int avc_threshold; /* threshold to launch cache-reclaiming */
static char *avc_unlabeled; /* system 'unlabeled' label */
/*
* Hash function
*/
static uint32
sepgsql_avc_hash(const char *scontext, const char *tcontext, uint16 tclass)
{
return hash_any((const unsigned char *) scontext, strlen(scontext))
^ hash_any((const unsigned char *) tcontext, strlen(tcontext))
^ tclass;
}
/*
* Reset all the avc caches
*/
static void
sepgsql_avc_reset(void)
{
MemoryContextReset(avc_mem_cxt);
memset(avc_slots, 0, sizeof(List *) * AVC_NUM_SLOTS);
avc_num_caches = 0;
avc_lru_hint = 0;
avc_unlabeled = NULL;
}
/*
* Reclaim caches recently unreferenced
*/
static void
sepgsql_avc_reclaim(void)
{
ListCell *cell;
int index;
while (avc_num_caches >= avc_threshold - AVC_NUM_RECLAIM)
{
index = avc_lru_hint;
foreach(cell, avc_slots[index])
{
avc_cache *cache = lfirst(cell);
if (!cache->hot_cache)
{
avc_slots[index]
= foreach_delete_current(avc_slots[index], cell);
pfree(cache->scontext);
pfree(cache->tcontext);
if (cache->ncontext)
pfree(cache->ncontext);
pfree(cache);
avc_num_caches--;
}
else
{
cache->hot_cache = false;
}
}
avc_lru_hint = (avc_lru_hint + 1) % AVC_NUM_SLOTS;
}
}
/* -------------------------------------------------------------------------
*
* sepgsql_avc_check_valid
*
* This function checks whether the cached entries are still valid. If
* the security policy has been reloaded (or any other events that requires
* resetting userspace caches has occurred) since the last reference to
* the access vector cache, we must flush the cache.
*
* Access control decisions must be atomic, but multiple system calls may
* be required to make a decision; thus, when referencing the access vector
* cache, we must loop until we complete without an intervening cache flush
* event. In practice, looping even once should be very rare. Callers should
* do something like this:
*
* sepgsql_avc_check_valid();
* do {
* :
* <reference to uavc>
* :
* } while (!sepgsql_avc_check_valid())
*
* -------------------------------------------------------------------------
*/
static bool
sepgsql_avc_check_valid(void)
{
if (selinux_status_updated() > 0)
{
sepgsql_avc_reset();
return false;
}
return true;
}
/*
* sepgsql_avc_unlabeled
*
* Returns an alternative label to be applied when no label or an invalid
* label would otherwise be assigned.
*/
static char *
sepgsql_avc_unlabeled(void)
{
if (!avc_unlabeled)
{
char *unlabeled;
if (security_get_initial_context_raw("unlabeled", &unlabeled) < 0)
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("SELinux: failed to get initial security label: %m")));
PG_TRY();
{
avc_unlabeled = MemoryContextStrdup(avc_mem_cxt, unlabeled);
}
PG_FINALLY();
{
freecon(unlabeled);
}
PG_END_TRY();
}
return avc_unlabeled;
}
/*
* sepgsql_avc_compute
*
* A fallback path, when cache mishit. It asks SELinux its access control
* decision for the supplied pair of security context and object class.
*/
static avc_cache *
sepgsql_avc_compute(const char *scontext, const char *tcontext, uint16 tclass)
{
char *ucontext = NULL;
char *ncontext = NULL;
MemoryContext oldctx;
avc_cache *cache;
uint32 hash;
int index;
struct av_decision avd;
hash = sepgsql_avc_hash(scontext, tcontext, tclass);
index = hash % AVC_NUM_SLOTS;
/*
* Validation check of the supplied security context. Because it always
* invoke system-call, frequent check should be avoided. Unless security
* policy is reloaded, validation status shall be kept, so we also cache
* whether the supplied security context was valid, or not.
*/
if (security_check_context_raw(tcontext) != 0)
ucontext = sepgsql_avc_unlabeled();
/*
* Ask SELinux its access control decision
*/
if (!ucontext)
sepgsql_compute_avd(scontext, tcontext, tclass, &avd);
else
sepgsql_compute_avd(scontext, ucontext, tclass, &avd);
/*
* It also caches a security label to be switched when a client labeled as
* 'scontext' executes a procedure labeled as 'tcontext', not only access
* control decision on the procedure. The security label to be switched
* shall be computed uniquely on a pair of 'scontext' and 'tcontext',
* thus, it is reasonable to cache the new label on avc, and enables to
* reduce unnecessary system calls. It shall be referenced at
* sepgsql_needs_fmgr_hook to check whether the supplied function is a
* trusted procedure, or not.
*/
if (tclass == SEPG_CLASS_DB_PROCEDURE)
{
if (!ucontext)
ncontext = sepgsql_compute_create(scontext, tcontext,
SEPG_CLASS_PROCESS, NULL);
else
ncontext = sepgsql_compute_create(scontext, ucontext,
SEPG_CLASS_PROCESS, NULL);
if (strcmp(scontext, ncontext) == 0)
{
pfree(ncontext);
ncontext = NULL;
}
}
/*
* Set up an avc_cache object
*/
oldctx = MemoryContextSwitchTo(avc_mem_cxt);
cache = palloc0(sizeof(avc_cache));
cache->hash = hash;
cache->scontext = pstrdup(scontext);
cache->tcontext = pstrdup(tcontext);
cache->tclass = tclass;
cache->allowed = avd.allowed;
cache->auditallow = avd.auditallow;
cache->auditdeny = avd.auditdeny;
cache->hot_cache = true;
if (avd.flags & SELINUX_AVD_FLAGS_PERMISSIVE)
cache->permissive = true;
if (!ucontext)
cache->tcontext_is_valid = true;
if (ncontext)
cache->ncontext = pstrdup(ncontext);
avc_num_caches++;
if (avc_num_caches > avc_threshold)
sepgsql_avc_reclaim();
avc_slots[index] = lcons(cache, avc_slots[index]);
MemoryContextSwitchTo(oldctx);
return cache;
}
/*
* sepgsql_avc_lookup
*
* Look up a cache entry that matches the supplied security contexts and
* object class. If not found, create a new cache entry.
*/
static avc_cache *
sepgsql_avc_lookup(const char *scontext, const char *tcontext, uint16 tclass)
{
avc_cache *cache;
ListCell *cell;
uint32 hash;
int index;
hash = sepgsql_avc_hash(scontext, tcontext, tclass);
index = hash % AVC_NUM_SLOTS;
foreach(cell, avc_slots[index])
{
cache = lfirst(cell);
if (cache->hash == hash &&
cache->tclass == tclass &&
strcmp(cache->tcontext, tcontext) == 0 &&
strcmp(cache->scontext, scontext) == 0)
{
cache->hot_cache = true;
return cache;
}
}
/* not found, so insert a new cache */
return sepgsql_avc_compute(scontext, tcontext, tclass);
}
/*
* sepgsql_avc_check_perms(_label)
*
* It returns 'true', if the security policy suggested to allow the required
* permissions. Otherwise, it returns 'false' or raises an error according
* to the 'abort_on_violation' argument.
* The 'tobject' and 'tclass' identify the target object being referenced,
* and 'required' is a bitmask of permissions (SEPG_*__*) defined for each
* object classes.
* The 'audit_name' is the object name (optional). If SEPGSQL_AVC_NOAUDIT
* was supplied, it means to skip all the audit messages.
*/
bool
sepgsql_avc_check_perms_label(const char *tcontext,
uint16 tclass, uint32 required,
const char *audit_name,
bool abort_on_violation)
{
char *scontext = sepgsql_get_client_label();
avc_cache *cache;
uint32 denied;
uint32 audited;
bool result;
sepgsql_avc_check_valid();
do
{
result = true;
/*
* If the target object is unlabeled, we perform the check using the
* label supplied by sepgsql_avc_unlabeled().
*/
if (tcontext)
cache = sepgsql_avc_lookup(scontext, tcontext, tclass);
else
cache = sepgsql_avc_lookup(scontext,
sepgsql_avc_unlabeled(), tclass);
denied = required & ~cache->allowed;
/*
* Compute permissions to be audited
*/
if (sepgsql_get_debug_audit())
audited = (denied ? (denied & ~0) : (required & ~0));
else
audited = denied ? (denied & cache->auditdeny)
: (required & cache->auditallow);
if (denied)
{
/*
* In permissive mode or permissive domain, violated permissions
* shall be audited to the log files at once, and then implicitly
* allowed to avoid a flood of access denied logs, because the
* purpose of permissive mode/domain is to collect a violation log
* that will make it possible to fix up the security policy.
*/
if (!sepgsql_getenforce() || cache->permissive)
cache->allowed |= required;
else
result = false;
}
} while (!sepgsql_avc_check_valid());
/*
* In the case when we have something auditable actions here,
* sepgsql_audit_log shall be called with text representation of security
* labels for both of subject and object. It records this access
* violation, so DBA will be able to find out unexpected security problems
* later.
*/
if (audited != 0 &&
audit_name != SEPGSQL_AVC_NOAUDIT &&
sepgsql_get_mode() != SEPGSQL_MODE_INTERNAL)
{
sepgsql_audit_log(denied != 0,
cache->scontext,
cache->tcontext_is_valid ?
cache->tcontext : sepgsql_avc_unlabeled(),
cache->tclass,
audited,
audit_name);
}
if (abort_on_violation && !result)
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("SELinux: security policy violation")));
return result;
}
bool
sepgsql_avc_check_perms(const ObjectAddress *tobject,
uint16 tclass, uint32 required,
const char *audit_name,
bool abort_on_violation)
{
char *tcontext = GetSecurityLabel(tobject, SEPGSQL_LABEL_TAG);
bool rc;
rc = sepgsql_avc_check_perms_label(tcontext,
tclass, required,
audit_name, abort_on_violation);
if (tcontext)
pfree(tcontext);
return rc;
}
/*
* sepgsql_avc_trusted_proc
*
* If the supplied function OID is configured as a trusted procedure, this
* function will return a security label to be used during the execution of
* that function. Otherwise, it returns NULL.
*/
char *
sepgsql_avc_trusted_proc(Oid functionId)
{
char *scontext = sepgsql_get_client_label();
char *tcontext;
ObjectAddress tobject;
avc_cache *cache;
tobject.classId = ProcedureRelationId;
tobject.objectId = functionId;
tobject.objectSubId = 0;
tcontext = GetSecurityLabel(&tobject, SEPGSQL_LABEL_TAG);
sepgsql_avc_check_valid();
do
{
if (tcontext)
cache = sepgsql_avc_lookup(scontext, tcontext,
SEPG_CLASS_DB_PROCEDURE);
else
cache = sepgsql_avc_lookup(scontext, sepgsql_avc_unlabeled(),
SEPG_CLASS_DB_PROCEDURE);
} while (!sepgsql_avc_check_valid());
return cache->ncontext;
}
/*
* sepgsql_avc_exit
*
* Clean up userspace AVC on process exit.
*/
static void
sepgsql_avc_exit(int code, Datum arg)
{
selinux_status_close();
}
/*
* sepgsql_avc_init
*
* Initialize the userspace AVC. This should be called from _PG_init.
*/
void
sepgsql_avc_init(void)
{
int rc;
/*
* All the avc stuff shall be allocated in avc_mem_cxt
*/
avc_mem_cxt = AllocSetContextCreate(TopMemoryContext,
"userspace access vector cache",
ALLOCSET_DEFAULT_SIZES);
memset(avc_slots, 0, sizeof(avc_slots));
avc_num_caches = 0;
avc_lru_hint = 0;
avc_threshold = AVC_DEF_THRESHOLD;
/*
* SELinux allows to mmap(2) its kernel status page in read-only mode to
* inform userspace applications its status updating (such as policy
* reloading) without system-call invocations. This feature is only
* supported in Linux-2.6.38 or later, however, libselinux provides a
* fallback mode to know its status using netlink sockets.
*/
rc = selinux_status_open(1);
if (rc < 0)
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("SELinux: could not open selinux status : %m")));
else if (rc > 0)
ereport(LOG,
(errmsg("SELinux: kernel status page uses fallback mode")));
/* Arrange to close selinux status page on process exit. */
on_proc_exit(sepgsql_avc_exit, 0);
}