Kernel Self Protection Project/Recommended Settings

From Linux Kernel Security Subsystem
Revision as of 22:28, 18 March 2020 by KeesCook (talk | contribs) (→‎arm)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

Sometimes people ask the Kernel Self Protection Project what a secure set of build CONFIGs and runtime settings are. This is a brain-dump of the various options for a particularly paranoid system:

CONFIGs

# Report BUG() conditions and kill the offending process.
CONFIG_BUG=y

# Make sure kernel page tables have safe permissions.
CONFIG_DEBUG_KERNEL=y (prior to v4.11, needed to select CONFIG_DEBUG_RODATA below)
CONFIG_DEBUG_RODATA=y (prior to v4.11)
CONFIG_STRICT_KERNEL_RWX=y (since v4.11)

# Report any dangerous memory permissions (not available on all archs).
CONFIG_DEBUG_WX=y

# Use -fstack-protector-strong (gcc 4.9+) for best stack canary coverage.
# Prior to v4.18, these are:
#  CONFIG_CC_STACKPROTECTOR=y
#  CONFIG_CC_STACKPROTECTOR_STRONG=y
CONFIG_STACKPROTECTOR=y
CONFIG_STACKPROTECTOR_STRONG=y

# Do not allow direct physical memory access (but if you must have it, at least enable STRICT mode...)
# CONFIG_DEVMEM is not set
CONFIG_STRICT_DEVMEM=y
CONFIG_IO_STRICT_DEVMEM=y

# Provides some protections against SYN flooding.
CONFIG_SYN_COOKIES=y

# Perform additional validation of various commonly targeted structures.
CONFIG_DEBUG_CREDENTIALS=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_SCHED_STACK_END_CHECK=y

# Provide userspace with seccomp BPF API for syscall attack surface reduction.
CONFIG_SECCOMP=y
CONFIG_SECCOMP_FILTER=y

# Provide userspace with ptrace ancestry protections.
CONFIG_SECURITY=y
CONFIG_SECURITY_YAMA=y

# Perform usercopy bounds checking. (And disable fallback to gain full whitelist enforcement.)
CONFIG_HARDENED_USERCOPY=y
# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set

# Randomize allocator freelists, harden metadata.
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y

# Randomize high-order page allocation freelist.
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y

# Allow allocator validation checking to be enabled (see "slub_debug=P" below).
CONFIG_SLUB_DEBUG=y

# Wipe higher-level memory allocations when they are freed (needs "page_poison=1" command line below).
# (If you can afford even more performance penalty, leave CONFIG_PAGE_POISONING_NO_SANITY=n)
CONFIG_PAGE_POISONING=y
CONFIG_PAGE_POISONING_NO_SANITY=y
CONFIG_PAGE_POISONING_ZERO=y

# Wipe slab and page allocations (since v5.3)
# Instead of "slub_debug=P" and "page_poison=1", a single place can control memory allocation wiping now.
# The init_on_free is only needed if there is concern about minimizing stale data lifetime.
CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
CONFIG_INIT_ON_FREE_DEFAULT_ON=y

# Initialize all stack variables on function entry. (Clang builds only. For GCC, see CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y below)
CONFIG_INIT_STACK_ALL=y

# Adds guard pages to kernel stacks (not all architectures support this yet).
CONFIG_VMAP_STACK=y

# Perform extensive checks on reference counting.
CONFIG_REFCOUNT_FULL=y

# Check for memory copies that might overflow a structure in str*() and mem*() functions both at build-time and run-time.
CONFIG_FORTIFY_SOURCE=y

# Dangerous; enabling this allows direct physical memory writing.
# CONFIG_ACPI_CUSTOM_METHOD is not set

# Dangerous; enabling this disables brk ASLR.
# CONFIG_COMPAT_BRK is not set

# Dangerous; enabling this allows direct kernel memory writing.
# CONFIG_DEVKMEM is not set

# Dangerous; exposes kernel text image layout.
# CONFIG_PROC_KCORE is not set

# Dangerous; enabling this disables VDSO ASLR.
# CONFIG_COMPAT_VDSO is not set

# Dangerous; enabling this allows replacement of running kernel.
# CONFIG_KEXEC is not set

# Dangerous; enabling this allows replacement of running kernel.
# CONFIG_HIBERNATION is not set

# Prior to v4.1, assists heap memory attacks; best to keep interface disabled.
# CONFIG_INET_DIAG is not set

# Easily confused by misconfigured userspace, keep off.
# CONFIG_BINFMT_MISC is not set

# Use the modern PTY interface (devpts) only.
# CONFIG_LEGACY_PTYS is not set

# If SELinux can be disabled at runtime, the LSM structures cannot be read-only; keep off.
# CONFIG_SECURITY_SELINUX_DISABLE is not set

# Reboot devices immediately if kernel experiences an Oops.
CONFIG_PANIC_ON_OOPS=y
CONFIG_PANIC_TIMEOUT=-1

# Keep root from altering kernel memory via loadable modules.
# CONFIG_MODULES is not set

# But if CONFIG_MODULE=y is needed, at least they must be signed with a per-build key.
CONFIG_DEBUG_SET_MODULE_RONX=y (prior to v4.11)
CONFIG_STRICT_MODULE_RWX=y (since v4.11)
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SIG_FORCE=y
CONFIG_MODULE_SIG_ALL=y
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG_HASH="sha512"
CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"

GCC plugins

# Enable GCC Plugins
CONFIG_GCC_PLUGINS=y

# Gather additional entropy at boot time for systems that may not have appropriate entropy sources.
CONFIG_GCC_PLUGIN_LATENT_ENTROPY=y

# Force all structures to be initialized before they are passed to other functions.
# When building with GCC:
CONFIG_GCC_PLUGIN_STRUCTLEAK=y
CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y

# Wipe stack contents on syscall exit (reduces stale data lifetime in stack)
CONFIG_GCC_PLUGIN_STACKLEAK=y

# Randomize the layout of system structures. This may have dramatic performance impact, so
# use with caution or also use CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE=y
CONFIG_GCC_PLUGIN_RANDSTRUCT=y

x86_64

# Full 64-bit means PAE and NX bit.
CONFIG_X86_64=y

# Disallow allocating the first 64k of memory.
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536

# Randomize position of kernel and memory.
CONFIG_RANDOMIZE_BASE=y
CONFIG_RANDOMIZE_MEMORY=y

# Modern libc no longer needs a fixed-position mapping in userspace, remove it as a possible target.
CONFIG_LEGACY_VSYSCALL_NONE=y

# Enable Kernel Page Table Isolation to remove an entire class of cache timing side-channels.
CONFIG_PAGE_TABLE_ISOLATION=y

# Remove additional attack surface, unless you really need them.
# CONFIG_IA32_EMULATION is not set
# CONFIG_X86_X32 is not set
# CONFIG_MODIFY_LDT_SYSCALL is not set

arm64

# Disallow allocating the first 32k of memory (cannot be 64k due to ARM loader).
CONFIG_DEFAULT_MMAP_MIN_ADDR=32768

# Randomize position of kernel (requires UEFI RNG or bootloader support for /chosen/kaslr-seed DT property).
CONFIG_RANDOMIZE_BASE=y

# Make sure PAN emulation is enabled.
CONFIG_ARM64_SW_TTBR0_PAN=y

# Enable Kernel Page Table Isolation to remove an entire class of cache timing side-channels.
CONFIG_UNMAP_KERNEL_AT_EL0=y

x86_32

# On 32-bit kernels, require PAE for NX bit support.
# CONFIG_M486 is not set
# CONFIG_HIGHMEM4G is not set
CONFIG_HIGHMEM64G=y
CONFIG_X86_PAE=y

# Disallow allocating the first 64k of memory.
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536

# Randomize position of kernel.
CONFIG_RANDOMIZE_BASE=y

# Enable Kernel Page Table Isolation to remove an entire class of cache timing side-channels.
CONFIG_PAGE_TABLE_ISOLATION=y

# Don't allow for 16-bit program emulation and associated LDT tricks.
# CONFIG_MODIFY_LDT_SYSCALL is not set

arm

# Disallow allocating the first 32k of memory (cannot be 64k due to ARM loader).
CONFIG_DEFAULT_MMAP_MIN_ADDR=32768

# For maximal userspace memory area (and maximum ASLR).
CONFIG_VMSPLIT_3G=y

# If building an old out-of-tree Qualcomm kernel, this is similar to CONFIG_STRICT_KERNEL_RWX.
CONFIG_STRICT_MEMORY_RWX=y

# Make sure PXN/PAN emulation is enabled.
CONFIG_CPU_SW_DOMAIN_PAN=y

# Dangerous; old interfaces and needless additional attack surface.
# CONFIG_OABI_COMPAT is not set

kernel command line options

# Enable slub/slab allocator free poisoning (requires CONFIG_SLUB_DEBUG=y above).
slub_debug=P

# Enable buddy allocator free poisoning (requires CONFIG_PAGE_POISONING=y above).
page_poison=1

# Wipe slab and page allocations (supersedes "slub_debug=P" and "page_poison=1" above, since v5.3)
# See CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y and CONFIG_INIT_ON_FREE_DEFAULT_ON=y above.
init_on_alloc=1
init_on_free=1

# Disable slab merging (makes many heap overflow attacks more difficult).
slab_nomerge

# Always enable Kernel Page Table Isolation, even if the CPU claims it is safe from Meltdown.
pti=on

x86_64

# Remove vsyscall entirely to avoid it being a fixed-position ROP target of any kind.
# (Same as CONFIG_LEGACY_VSYSCALL_NONE=y above.)
vsyscall=none

sysctls

# Try to keep kernel address exposures out of various /proc files (kallsyms, modules, etc).
kernel.kptr_restrict = 1

# Avoid kernel memory address exposures via dmesg.
kernel.dmesg_restrict = 1

# Block non-uid-0 profiling (needs distro patch, otherwise this is the same as "= 2")
kernel.perf_event_paranoid = 3

# Turn off kexec, even if it's built in.
kernel.kexec_load_disabled = 1

# Avoid non-ancestor ptrace access to running processes and their credentials.
kernel.yama.ptrace_scope = 1

# Disable User Namespaces, as it opens up a large attack surface to unprivileged users.
user.max_user_namespaces = 0

# Turn off unprivileged eBPF access.
kernel.unprivileged_bpf_disabled = 1

# Turn on BPF JIT hardening, if the JIT is enabled.
net.core.bpf_jit_harden = 2