📘
linux CPUあれこれ
##CPU変数
z840の場合、Xeon E52697 V4を2個搭載している。
sudo dmesg | grep CPU
smpboot: Allowing 72 CPUs, 0 hotplug CPUs
setup_percpu: NR_CPUS:8192 nr_cpumask_bits:72 nr_cpu_ids:72 nr_node_ids:2
SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=72, Nodes=2
rcu: RCU restricting CPUs from NR_CPUS=8192 to nr_cpu_ids=72.
smpboot: CPU0: Intel(R) Xeon(R) CPU E5-2697 v4 @ 2.30GHz (family: 0x6, model: 0x4f, stepping: 0x1)
NUMAとは複数CPUを搭載したシステムのこと。
CPUとメモリを単位にノードという。
CPUを2個搭載しているのでnr_node_ids:2
nr_cpu_ids setup_nr_cpu_ids()
CPU変数
static int x = 1;のように変数宣言するが、それをCPUごとに変数宣言できるようになっている。
DEFINE_PER_CPU 変数設定
per_cpu 変数取得
CPU変数は.data..percpuセクションに配置される。
#define PER_CPU_BASE_SECTION ".data..percpu"
リンカスクリプトを見ると__per_cpu_startから __per_cpu_endまでに.data..percpuを埋め込んでいる。
include/asm-generic/vmlinux.lds.h
#define PERCPU_INPUT(cacheline)
__per_cpu_start = .;
*(.data..percpu..first)
. = ALIGN(PAGE_SIZE);
*(.data..percpu..page_aligned)
. = ALIGN(cacheline);
*(.data..percpu..read_mostly)
. = ALIGN(cacheline);
*(.data..percpu)
*(.data..percpu..shared_aligned)
PERCPU_DECRYPTED_SECTION
__per_cpu_end = .;
kallsyms 約380個ほどシンボルとして登録されている。
$ sudo cat /proc/kallsyms
0000000000000000 A fixed_percpu_data
0000000000000000 A __per_cpu_start
0000000000001000 A cpu_debug_store
0000000000002000 A irq_stack_backing_store
0000000000006000 A cpu_tss_rw
000000000000b000 A gdt_page
000000000000c000 A exception_stacks
0000000000018000 A entry_stack_storage
0000000000019000 A espfix_waddr
0000000000019008 A espfix_stack
0000000000019010 A cpu_llc_id
0000000000019020 A mce_banks_array
0000000000019420 A mce_num_banks
0000000000019440 A cpu_llc_shared_map
0000000000019448 A cpu_die_map
0000000000019450 A cpu_core_map
0000000000019458 A cpu_sibling_map
0000000000019460 A cpu_info
0000000000019568 A cpu_number
0000000000019570 A this_cpu_off
中略
00000000000320c0 A apf_reason
0000000000033000 A __per_cpu_end
カーネルで決めているのはここら辺
/arch/x86/kernel/cpu/common.c
static void get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
char *p, *q, *s;
if (c->extended_cpuid_level < 0x80000004)
return;
v = (unsigned int *)c->x86_model_id;
cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
c->x86_model_id[48] = 0;
/* Trim whitespace */
p = q = s = &c->x86_model_id[0];
while (*p == ' ')
p++;
while (*p) {
/* Note the last non-whitespace index */
if (!isspace(*p))
s = q;
*q++ = *p++;
}
*(s + 1) = '\0';
}
void detect_num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
c->x86_max_cores = 1;
if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
return;
cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
c->x86_max_cores = (eax >> 26) + 1;
}
static void generic_identify(struct cpuinfo_x86 *c)
{
c->extended_cpuid_level = 0;
if (!have_cpuid_p())
identify_cpu_without_cpuid(c);
/* cyrix could have cpuid enabled via c_identify()*/
if (!have_cpuid_p())
return;
cpu_detect(c);
get_cpu_vendor(c);
get_cpu_cap(c);
get_cpu_address_sizes(c);
if (c->cpuid_level >= 0x00000001) {
c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
#ifdef CONFIG_X86_32
# ifdef CONFIG_SMP
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
# else
c->apicid = c->initial_apicid;
# endif
#endif
c->phys_proc_id = c->initial_apicid;
}
get_model_name(c); /* Default name */
/*
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
* systems that run Linux at CPL > 0 may or may not have the
* issue, but, even if they have the issue, there's absolutely
* nothing we can do about it because we can't use the real IRET
* instruction.
*
* NB: For the time being, only 32-bit kernels support
* X86_BUG_ESPFIX as such. 64-bit kernels directly choose
* whether to apply espfix using paravirt hooks. If any
* non-paravirt system ever shows up that does *not* have the
* ESPFIX issue, we can change this.
*/
#ifdef CONFIG_X86_32
set_cpu_bug(c, X86_BUG_ESPFIX);
#endif
}
/*
* Validate that ACPI/mptables have the same information about the
* effective APIC id and update the package map.
*/
static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int apicid, cpu = smp_processor_id();
apicid = apic->cpu_present_to_apicid(cpu);
if (apicid != c->apicid) {
pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n",
cpu, apicid, c->initial_apicid);
}
BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
#else
c->logical_proc_id = 0;
#endif
}
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
static void identify_cpu(struct cpuinfo_x86 *c)
{
int i;
c->loops_per_jiffy = loops_per_jiffy;
c->x86_cache_size = 0;
c->x86_vendor = X86_VENDOR_UNKNOWN;
c->x86_model = c->x86_stepping = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
c->x86_coreid_bits = 0;
c->cu_id = 0xff;
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
c->x86_virt_bits = 48;
#else
c->cpuid_level = -1; /* CPUID not detected */
c->x86_clflush_size = 32;
c->x86_phys_bits = 32;
c->x86_virt_bits = 32;
#endif
c->x86_cache_alignment = c->x86_clflush_size;
memset(&c->x86_capability, 0, sizeof(c->x86_capability));
#ifdef CONFIG_X86_VMX_FEATURE_NAMES
memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
#endif
generic_identify(c);
if (this_cpu->c_identify)
this_cpu->c_identify(c);
/* Clear/Set all flags overridden by options, after probe */
apply_forced_caps(c);
#ifdef CONFIG_X86_64
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
* features a certain CPU supports which CPUID doesn't
* tell us, CPUID claiming incorrect flags, or other bugs,
* we handle them here.
*
* At the end of this section, c->x86_capability better
* indicate the features this CPU genuinely supports!
*/
if (this_cpu->c_init)
this_cpu->c_init(c);
/* Disable the PN if appropriate */
squash_the_stupid_serial_number(c);
/* Set up SMEP/SMAP/UMIP */
setup_smep(c);
setup_smap(c);
setup_umip(c);
/* Enable FSGSBASE instructions if available. */
if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
cr4_set_bits(X86_CR4_FSGSBASE);
elf_hwcap2 |= HWCAP2_FSGSBASE;
}
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
*/
/* Filter out anything that depends on CPUID levels we don't have */
filter_cpuid_features(c, true);
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
const char *p;
p = table_lookup_model(c);
if (p)
strcpy(c->x86_model_id, p);
else
/* Last resort... */
sprintf(c->x86_model_id, "%02x/%02x",
c->x86, c->x86_model);
}
#ifdef CONFIG_X86_64
detect_ht(c);
#endif
x86_init_rdrand(c);
setup_pku(c);
setup_cet(c);
/*
* Clear/Set all flags overridden by options, need do it
* before following smp all cpus cap AND.
*/
apply_forced_caps(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
* common between the CPUs. The first time this routine gets
* executed, c == &boot_cpu_data.
*/
if (c != &boot_cpu_data) {
/* AND the already accumulated flags with these */
for (i = 0; i < NCAPINTS; i++)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
/* OR, i.e. replicate the bug flags */
for (i = NCAPINTS; i < NCAPINTS + NBUGINTS; i++)
c->x86_capability[i] |= boot_cpu_data.x86_capability[i];
}
ppin_init(c);
/* Init Machine Check Exception if available. */
mcheck_cpu_init(c);
select_idle_routine(c);
#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
}
get_cpu_mask
##CPU Hotplug
CPU_HOTPLUGがenableになっていると動的にCPUを有効無効と切り替えることができる。
# 無効化
$ echo 0 > /sys/devices/system/cpu/cpuX/online
# 有効化
$ echo 1 > /sys/devices/system/cpu/cpuX/online
続く
Discussion