📘

linux CPUあれこれ

2022/10/14に公開

##CPU変数

z840の場合、Xeon E52697 V4を2個搭載している。

sudo dmesg | grep CPU
smpboot: Allowing 72 CPUs, 0 hotplug CPUs
setup_percpu: NR_CPUS:8192 nr_cpumask_bits:72 nr_cpu_ids:72 nr_node_ids:2
SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=72, Nodes=2
rcu: 	RCU restricting CPUs from NR_CPUS=8192 to nr_cpu_ids=72.
smpboot: CPU0: Intel(R) Xeon(R) CPU E5-2697 v4 @ 2.30GHz (family: 0x6, model: 0x4f, stepping: 0x1)

NUMAとは複数CPUを搭載したシステムのこと。
CPUとメモリを単位にノードという。
CPUを2個搭載しているのでnr_node_ids:2

nr_cpu_ids setup_nr_cpu_ids()

CPU変数
static int x = 1;のように変数宣言するが、それをCPUごとに変数宣言できるようになっている。
DEFINE_PER_CPU 変数設定
per_cpu 変数取得

CPU変数は.data..percpuセクションに配置される。
#define PER_CPU_BASE_SECTION ".data..percpu"

リンカスクリプトを見ると__per_cpu_startから __per_cpu_endまでに.data..percpuを埋め込んでいる。

include/asm-generic/vmlinux.lds.h
#define PERCPU_INPUT(cacheline)                                      
   __per_cpu_start = .;                                          
   *(.data..percpu..first)                                         
   . = ALIGN(PAGE_SIZE);                                          
   *(.data..percpu..page_aligned)                                 
   . = ALIGN(cacheline);                                           
   *(.data..percpu..read_mostly)                                  
   . = ALIGN(cacheline);                                           
   *(.data..percpu)                                                
   *(.data..percpu..shared_aligned)                                
   PERCPU_DECRYPTED_SECTION                                        
   __per_cpu_end = .;

kallsyms 約380個ほどシンボルとして登録されている。

$ sudo cat /proc/kallsyms
0000000000000000 A fixed_percpu_data
0000000000000000 A __per_cpu_start
0000000000001000 A cpu_debug_store
0000000000002000 A irq_stack_backing_store
0000000000006000 A cpu_tss_rw
000000000000b000 A gdt_page
000000000000c000 A exception_stacks
0000000000018000 A entry_stack_storage
0000000000019000 A espfix_waddr
0000000000019008 A espfix_stack
0000000000019010 A cpu_llc_id
0000000000019020 A mce_banks_array
0000000000019420 A mce_num_banks
0000000000019440 A cpu_llc_shared_map
0000000000019448 A cpu_die_map
0000000000019450 A cpu_core_map
0000000000019458 A cpu_sibling_map
0000000000019460 A cpu_info
0000000000019568 A cpu_number
0000000000019570 A this_cpu_off
     中略
00000000000320c0 A apf_reason
0000000000033000 A __per_cpu_end

カーネルで決めているのはここら辺

/arch/x86/kernel/cpu/common.c
static void get_model_name(struct cpuinfo_x86 *c)
{
	unsigned int *v;
	char *p, *q, *s;

	if (c->extended_cpuid_level < 0x80000004)
		return;

	v = (unsigned int *)c->x86_model_id;
	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
	c->x86_model_id[48] = 0;

	/* Trim whitespace */
	p = q = s = &c->x86_model_id[0];

	while (*p == ' ')
		p++;

	while (*p) {
		/* Note the last non-whitespace index */
		if (!isspace(*p))
			s = q;

		*q++ = *p++;
	}

	*(s + 1) = '\0';
}

void detect_num_cpu_cores(struct cpuinfo_x86 *c)
{
	unsigned int eax, ebx, ecx, edx;

	c->x86_max_cores = 1;
	if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
		return;

	cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
	if (eax & 0x1f)
		c->x86_max_cores = (eax >> 26) + 1;
}

static void generic_identify(struct cpuinfo_x86 *c)
{
	c->extended_cpuid_level = 0;

	if (!have_cpuid_p())
		identify_cpu_without_cpuid(c);

	/* cyrix could have cpuid enabled via c_identify()*/
	if (!have_cpuid_p())
		return;

	cpu_detect(c);

	get_cpu_vendor(c);

	get_cpu_cap(c);

	get_cpu_address_sizes(c);

	if (c->cpuid_level >= 0x00000001) {
		c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
#ifdef CONFIG_X86_32
# ifdef CONFIG_SMP
		c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
# else
		c->apicid = c->initial_apicid;
# endif
#endif
		c->phys_proc_id = c->initial_apicid;
	}

	get_model_name(c); /* Default name */

	/*
	 * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
	 * systems that run Linux at CPL > 0 may or may not have the
	 * issue, but, even if they have the issue, there's absolutely
	 * nothing we can do about it because we can't use the real IRET
	 * instruction.
	 *
	 * NB: For the time being, only 32-bit kernels support
	 * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
	 * whether to apply espfix using paravirt hooks.  If any
	 * non-paravirt system ever shows up that does *not* have the
	 * ESPFIX issue, we can change this.
	 */
#ifdef CONFIG_X86_32
	set_cpu_bug(c, X86_BUG_ESPFIX);
#endif
}

/*
 * Validate that ACPI/mptables have the same information about the
 * effective APIC id and update the package map.
 */
static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
	unsigned int apicid, cpu = smp_processor_id();

	apicid = apic->cpu_present_to_apicid(cpu);

	if (apicid != c->apicid) {
		pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n",
		       cpu, apicid, c->initial_apicid);
	}
	BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
	BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
#else
	c->logical_proc_id = 0;
#endif
}

/*
 * This does the hard work of actually picking apart the CPU stuff...
 */
static void identify_cpu(struct cpuinfo_x86 *c)
{
	int i;

	c->loops_per_jiffy = loops_per_jiffy;
	c->x86_cache_size = 0;
	c->x86_vendor = X86_VENDOR_UNKNOWN;
	c->x86_model = c->x86_stepping = 0;	/* So far unknown... */
	c->x86_vendor_id[0] = '\0'; /* Unset */
	c->x86_model_id[0] = '\0';  /* Unset */
	c->x86_max_cores = 1;
	c->x86_coreid_bits = 0;
	c->cu_id = 0xff;
#ifdef CONFIG_X86_64
	c->x86_clflush_size = 64;
	c->x86_phys_bits = 36;
	c->x86_virt_bits = 48;
#else
	c->cpuid_level = -1;	/* CPUID not detected */
	c->x86_clflush_size = 32;
	c->x86_phys_bits = 32;
	c->x86_virt_bits = 32;
#endif
	c->x86_cache_alignment = c->x86_clflush_size;
	memset(&c->x86_capability, 0, sizeof(c->x86_capability));
#ifdef CONFIG_X86_VMX_FEATURE_NAMES
	memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
#endif

	generic_identify(c);

	if (this_cpu->c_identify)
		this_cpu->c_identify(c);

	/* Clear/Set all flags overridden by options, after probe */
	apply_forced_caps(c);

#ifdef CONFIG_X86_64
	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif

	/*
	 * Vendor-specific initialization.  In this section we
	 * canonicalize the feature flags, meaning if there are
	 * features a certain CPU supports which CPUID doesn't
	 * tell us, CPUID claiming incorrect flags, or other bugs,
	 * we handle them here.
	 *
	 * At the end of this section, c->x86_capability better
	 * indicate the features this CPU genuinely supports!
	 */
	if (this_cpu->c_init)
		this_cpu->c_init(c);

	/* Disable the PN if appropriate */
	squash_the_stupid_serial_number(c);

	/* Set up SMEP/SMAP/UMIP */
	setup_smep(c);
	setup_smap(c);
	setup_umip(c);

	/* Enable FSGSBASE instructions if available. */
	if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
		cr4_set_bits(X86_CR4_FSGSBASE);
		elf_hwcap2 |= HWCAP2_FSGSBASE;
	}

	/*
	 * The vendor-specific functions might have changed features.
	 * Now we do "generic changes."
	 */

	/* Filter out anything that depends on CPUID levels we don't have */
	filter_cpuid_features(c, true);

	/* If the model name is still unset, do table lookup. */
	if (!c->x86_model_id[0]) {
		const char *p;
		p = table_lookup_model(c);
		if (p)
			strcpy(c->x86_model_id, p);
		else
			/* Last resort... */
			sprintf(c->x86_model_id, "%02x/%02x",
				c->x86, c->x86_model);
	}

#ifdef CONFIG_X86_64
	detect_ht(c);
#endif

	x86_init_rdrand(c);
	setup_pku(c);
	setup_cet(c);

	/*
	 * Clear/Set all flags overridden by options, need do it
	 * before following smp all cpus cap AND.
	 */
	apply_forced_caps(c);

	/*
	 * On SMP, boot_cpu_data holds the common feature set between
	 * all CPUs; so make sure that we indicate which features are
	 * common between the CPUs.  The first time this routine gets
	 * executed, c == &boot_cpu_data.
	 */
	if (c != &boot_cpu_data) {
		/* AND the already accumulated flags with these */
		for (i = 0; i < NCAPINTS; i++)
			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];

		/* OR, i.e. replicate the bug flags */
		for (i = NCAPINTS; i < NCAPINTS + NBUGINTS; i++)
			c->x86_capability[i] |= boot_cpu_data.x86_capability[i];
	}

	ppin_init(c);

	/* Init Machine Check Exception if available. */
	mcheck_cpu_init(c);

	select_idle_routine(c);

#ifdef CONFIG_NUMA
	numa_add_cpu(smp_processor_id());
#endif
}

get_cpu_mask

##CPU Hotplug
CPU_HOTPLUGがenableになっていると動的にCPUを有効無効と切り替えることができる。

# 無効化
$ echo 0 > /sys/devices/system/cpu/cpuX/online
# 有効化
$ echo 1 > /sys/devices/system/cpu/cpuX/online

続く

https://wiki.bit-hive.com/north/pg/CPU変数

https://mmi.hatenablog.com/entry/2017/03/30/000627

https://zhuanlan.zhihu.com/p/163850501

Discussion