Kernel Self Protection Project/Recommended Settings

From Linux Kernel Security Subsystem
Revision as of 22:51, 15 September 2021 by Anthraxx (talk | contribs) (Add missing critical whitespaces to the newly added randomize_kstack_offset entries to preserve a single visual block)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

Sometimes people ask the Kernel Self Protection Project what a secure set of build CONFIGs and runtime settings are. This is a brain-dump of the various options for a particularly paranoid system.

Another place to find recommended kernel hardening settings is via the "kconfig-hardened-check" tool maintained by Alexander Popov.


CONFIGs

# Report BUG() conditions and kill the offending process.
CONFIG_BUG=y

# Make sure kernel page tables have safe permissions.
CONFIG_DEBUG_KERNEL=y (prior to v4.11, needed to select CONFIG_DEBUG_RODATA below)
CONFIG_DEBUG_RODATA=y (prior to v4.11)
CONFIG_STRICT_KERNEL_RWX=y (since v4.11)

# Report any dangerous memory permissions (not available on all archs).
CONFIG_DEBUG_WX=y

# Use -fstack-protector-strong (gcc 4.9+) for best stack canary coverage.
# Prior to v4.18, these are:
#  CONFIG_CC_STACKPROTECTOR=y
#  CONFIG_CC_STACKPROTECTOR_STRONG=y
CONFIG_STACKPROTECTOR=y
CONFIG_STACKPROTECTOR_STRONG=y

# Do not allow direct physical memory access (but if you must have it, at least enable STRICT mode...)
# CONFIG_DEVMEM is not set
CONFIG_STRICT_DEVMEM=y
CONFIG_IO_STRICT_DEVMEM=y

# Provides some protections against SYN flooding.
CONFIG_SYN_COOKIES=y

# Perform additional validation of various commonly targeted structures.
CONFIG_DEBUG_CREDENTIALS=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_SCHED_STACK_END_CHECK=y

# Provide userspace with seccomp BPF API for syscall attack surface reduction.
CONFIG_SECCOMP=y
CONFIG_SECCOMP_FILTER=y

# Provide userspace with ptrace ancestry protections.
CONFIG_SECURITY=y
CONFIG_SECURITY_YAMA=y

# Perform usercopy bounds checking. (And disable fallback to gain full whitelist enforcement.)
CONFIG_HARDENED_USERCOPY=y
# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set

# Randomize allocator freelists, harden metadata.
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y

# Randomize high-order page allocation freelist.
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y

# Allow allocator validation checking to be enabled (see "slub_debug=P" below).
CONFIG_SLUB_DEBUG=y

# Wipe higher-level memory allocations when they are freed (needs "page_poison=1" command line below).
# (If you can afford even more performance penalty, leave CONFIG_PAGE_POISONING_NO_SANITY=n)
CONFIG_PAGE_POISONING=y
CONFIG_PAGE_POISONING_NO_SANITY=y
CONFIG_PAGE_POISONING_ZERO=y

# Wipe slab and page allocations (since v5.3)
# Instead of "slub_debug=P" and "page_poison=1", a single place can control memory allocation wiping now.
# The init_on_free is only needed if there is concern about minimizing stale data lifetime.
CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
CONFIG_INIT_ON_FREE_DEFAULT_ON=y

# Initialize all stack variables on function entry. (Clang builds only. For GCC, see CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y below)
CONFIG_INIT_STACK_ALL=y

# Adds guard pages to kernel stacks (not all architectures support this yet).
CONFIG_VMAP_STACK=y

# Perform extensive checks on reference counting.
CONFIG_REFCOUNT_FULL=y

# Check for memory copies that might overflow a structure in str*() and mem*() functions both at build-time and run-time.
CONFIG_FORTIFY_SOURCE=y

# Avoid kernel memory address exposures via dmesg (sets sysctl kernel.dmesg_restrict initial value to 1)
CONFIG_SECURITY_DMESG_RESTRICT=y

# Dangerous; enabling this allows direct physical memory writing.
# CONFIG_ACPI_CUSTOM_METHOD is not set

# Dangerous; enabling this disables brk ASLR.
# CONFIG_COMPAT_BRK is not set

# Dangerous; enabling this allows direct kernel memory writing.
# CONFIG_DEVKMEM is not set

# Dangerous; exposes kernel text image layout.
# CONFIG_PROC_KCORE is not set

# Dangerous; enabling this disables VDSO ASLR.
# CONFIG_COMPAT_VDSO is not set

# Dangerous; enabling this allows replacement of running kernel.
# CONFIG_KEXEC is not set

# Dangerous; enabling this allows replacement of running kernel.
# CONFIG_HIBERNATION is not set

# Prior to v4.1, assists heap memory attacks; best to keep interface disabled.
# CONFIG_INET_DIAG is not set

# Easily confused by misconfigured userspace, keep off.
# CONFIG_BINFMT_MISC is not set

# Use the modern PTY interface (devpts) only.
# CONFIG_LEGACY_PTYS is not set

# If SELinux can be disabled at runtime, the LSM structures cannot be read-only; keep off.
# CONFIG_SECURITY_SELINUX_DISABLE is not set

# Reboot devices immediately if kernel experiences an Oops.
CONFIG_PANIC_ON_OOPS=y
CONFIG_PANIC_TIMEOUT=-1

# Keep root from altering kernel memory via loadable modules.
# CONFIG_MODULES is not set

# But if CONFIG_MODULE=y is needed, at least they must be signed with a per-build key.
CONFIG_DEBUG_SET_MODULE_RONX=y (prior to v4.11)
CONFIG_STRICT_MODULE_RWX=y (since v4.11)
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SIG_FORCE=y
CONFIG_MODULE_SIG_ALL=y
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG_HASH="sha512"
CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"

GCC plugins

# Enable GCC Plugins
CONFIG_GCC_PLUGINS=y

# Gather additional entropy at boot time for systems that may not have appropriate entropy sources.
CONFIG_GCC_PLUGIN_LATENT_ENTROPY=y

# Force all structures to be initialized before they are passed to other functions.
# When building with GCC:
CONFIG_GCC_PLUGIN_STRUCTLEAK=y
CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y

# Wipe stack contents on syscall exit (reduces stale data lifetime in stack)
CONFIG_GCC_PLUGIN_STACKLEAK=y

# Randomize the layout of system structures. This may have dramatic performance impact, so
# use with caution or also use CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE=y
CONFIG_GCC_PLUGIN_RANDSTRUCT=y

x86_64

# Full 64-bit means PAE and NX bit.
CONFIG_X86_64=y

# Disallow allocating the first 64k of memory.
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536

# Disable Model-Specific Register writes.
# CONFIG_X86_MSR is not set

# Randomize position of kernel and memory.
CONFIG_RANDOMIZE_BASE=y
CONFIG_RANDOMIZE_MEMORY=y

# Randomize kernel stack offset on syscall entry (since v5.13).
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y

# Modern libc no longer needs a fixed-position mapping in userspace, remove it as a possible target.
CONFIG_LEGACY_VSYSCALL_NONE=y

# Enable Kernel Page Table Isolation to remove an entire class of cache timing side-channels.
CONFIG_PAGE_TABLE_ISOLATION=y

# Remove additional attack surface, unless you really need them.
# CONFIG_IA32_EMULATION is not set
# CONFIG_X86_X32 is not set
# CONFIG_MODIFY_LDT_SYSCALL is not set

arm64

# Disallow allocating the first 32k of memory (cannot be 64k due to ARM loader).
CONFIG_DEFAULT_MMAP_MIN_ADDR=32768

# Randomize position of kernel (requires UEFI RNG or bootloader support for /chosen/kaslr-seed DT property).
CONFIG_RANDOMIZE_BASE=y

# Randomize kernel stack offset on syscall entry (since v5.13).
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y

# Make sure PAN emulation is enabled.
CONFIG_ARM64_SW_TTBR0_PAN=y

# Enable Kernel Page Table Isolation to remove an entire class of cache timing side-channels.
CONFIG_UNMAP_KERNEL_AT_EL0=y

x86_32

# On 32-bit kernels, require PAE for NX bit support.
# CONFIG_M486 is not set
# CONFIG_HIGHMEM4G is not set
CONFIG_HIGHMEM64G=y
CONFIG_X86_PAE=y

# Disallow allocating the first 64k of memory.
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536

# Disable Model-Specific Register writes.
# CONFIG_X86_MSR is not set

# Randomize position of kernel.
CONFIG_RANDOMIZE_BASE=y

# Randomize kernel stack offset on syscall entry (since v5.13).
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y

# Enable Kernel Page Table Isolation to remove an entire class of cache timing side-channels.
CONFIG_PAGE_TABLE_ISOLATION=y

# Don't allow for 16-bit program emulation and associated LDT tricks.
# CONFIG_MODIFY_LDT_SYSCALL is not set

arm

# Disallow allocating the first 32k of memory (cannot be 64k due to ARM loader).
CONFIG_DEFAULT_MMAP_MIN_ADDR=32768

# For maximal userspace memory area (and maximum ASLR).
CONFIG_VMSPLIT_3G=y

# If building an old out-of-tree Qualcomm kernel, this is similar to CONFIG_STRICT_KERNEL_RWX.
CONFIG_STRICT_MEMORY_RWX=y

# Make sure PXN/PAN emulation is enabled.
CONFIG_CPU_SW_DOMAIN_PAN=y

# Dangerous; old interfaces and needless additional attack surface.
# CONFIG_OABI_COMPAT is not set

kernel command line options

# Wipe slab and page allocations (Since v5.3; supersedes "slub_debug=P" and "page_poison=1" below)
# See CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y and CONFIG_INIT_ON_FREE_DEFAULT_ON=y above.
init_on_alloc=1
init_on_free=1

# Randomize kernel stack offset on syscall entry (since v5.13).
# See CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT above.
randomize_kstack_offset=on

# Disable slab merging (makes many heap overflow attacks more difficult).
slab_nomerge

# Always enable Kernel Page Table Isolation, even if the CPU claims it is safe from Meltdown.
pti=on

# To prevent against L1TF, at the cost of losing hyper threading (slow).
nosmt

# Enable SLUB redzoning and sanity checking (slow; requires CONFIG_SLUB_DEBUG=y above).
slub_debug=ZF

# (Before v5.3 without "init_on_free=1") Enable slub/slab allocator free poisoning (requires CONFIG_SLUB_DEBUG=y above).
slub_debug=P

# (Before v5.3 without "init_on_free=1") Enable buddy allocator free poisoning (requires CONFIG_PAGE_POISONING=y above).
page_poison=1

x86_64

# Remove vsyscall entirely to avoid it being a fixed-position ROP target of any kind.
# (Same as CONFIG_LEGACY_VSYSCALL_NONE=y above.)
vsyscall=none

sysctls

# Try to keep kernel address exposures out of various /proc files (kallsyms, modules, etc). (There is no CONFIG for the changing the initial value.)
kernel.kptr_restrict = 1

# Avoid kernel memory address exposures via dmesg (this value can also be set by CONFIG_SECURITY_DMESG_RESTRICT).
kernel.dmesg_restrict = 1

# Block non-uid-0 profiling (needs distro patch, otherwise this is the same as "= 2")
kernel.perf_event_paranoid = 3

# Turn off kexec, even if it's built in.
kernel.kexec_load_disabled = 1

# Avoid non-ancestor ptrace access to running processes and their credentials.
kernel.yama.ptrace_scope = 1

# Disable User Namespaces, as it opens up a large attack surface to unprivileged users.
user.max_user_namespaces = 0

# Turn off unprivileged eBPF access.
kernel.unprivileged_bpf_disabled = 1

# Turn on BPF JIT hardening, if the JIT is enabled.
net.core.bpf_jit_harden = 2