kernel内存优化调研

kernel内存优化调研

Posted by Jeff on February 4, 2021

kernel内存优化调研,msm8937 32位,kernel3.18

物理内存加载

//start_kernel()->setup_arch()->early_init_dt_scan_nodes()->early_init_dt_scan_memory()
//从dts文件中加载物理内存
early_init_dt_scan_memory()
{
	early_init_dt_add_memory_arch(base=0x80000000, size=0x40000000);
}
void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
{
	memblock_add(base, size);
}
int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
{
	printk("---->>memblock_add() base=%#lx , size=%d kb\n",(unsigned long)base,size/1024);
	return memblock_add_range(&memblock.memory, base, size,
				   MAX_NUMNODES, 0);
}
//---->>memblock_add() base=0x80000000 , size=1048576 kb
//可以看到物理内存是1G
[ 0.000000] Memory: 714912K/1010688K available (11605K kernel code, 1204K rwdata, 4992K rodata, 474K init, 2316K bss, 295776K reserved, 118784K highmem)

从kernel输出log可以看出,内存大小只有1010688K,和实际物理内存1048576KB 相比少了37M左右,这是因为

在这里插入图片描述

这三块内存是no-map属性的,不会经过内存映射管理,因此实际内存会比物理内存少了这三块,大约就是37M

在这里插入图片描述

加载流程:

asmlinkage __visible void __init start_kernel(void)
{
	.....
	setup_arch(&command_line);
	.....
}

void __init setup_arch(char **cmdline_p)
{
	.....
	sanity_check_meminfo();
	arm_memblock_init(mdesc);
	.....
}

void __init arm_memblock_init(const struct machine_desc *mdesc)
{
	......
	arm_mm_memblock_reserve();
	early_init_fdt_scan_reserved_mem();
	dma_contiguous_reserve(arm_dma_limit);
	......
}


int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
					phys_addr_t size, bool nomap)
{
	//如果是no-map属性的就从memblock中移除
	if (nomap)
		return memblock_remove(base, size);
	//非no-map属性的添加到reserve中
	return memblock_reserve(base, size);
}



memblock中内存分两种:一种是memory类型的,代表可用内存。一种是reserve类型的,代表被使用的内存




******************************************




在这里插入图片描述

开机中前后两次dump memblock,reserved大小不一致:

前一次DUMP:

可以看出前一次dump reserved内存主要来自dts 如下:

reserved-memory {
		#address-cells = <2>;
		#size-cells = <2>;
		ranges;

		other_ext_mem: other_ext_region@0 {
			compatible = "removed-dma-pool";
			no-map;
			reg = <0x0 0x85b00000 0x0 0xd00000>;
		};

		modem_mem: modem_region@0 {
			compatible = "removed-dma-pool";
			no-map-fixup;
			reg = <0x0 0x86800000 0x0 0x5000000>;
		};

		adsp_fw_mem: adsp_fw_region@0 {
			compatible = "removed-dma-pool";
			no-map;
			reg = <0x0 0x8b800000 0x0 0x1100000>;
		};

		wcnss_fw_mem: wcnss_fw_region@0 {
			compatible = "removed-dma-pool";
			no-map;
			reg = <0x0 0x8c900000 0x0 0x700000>;
		};

		venus_mem: venus_region@0 {
			compatible = "shared-dma-pool";
			reusable;
			alloc-ranges = <0x0 0x80000000 0x0 0x10000000>;
			alignment = <0 0x400000>;
			size = <0 0x0800000>;
		};

		secure_mem: secure_region@0 {
			compatible = "shared-dma-pool";
			reusable;
			alignment = <0 0x400000>;
			size = <0 0x7000000>;
		};

		qseecom_mem: qseecom_region@0 {
			compatible = "shared-dma-pool";
			reusable;
			alignment = <0 0x400000>;
			size = <0 0x1000000>;
		};

		adsp_mem: adsp_region@0 {
			compatible = "shared-dma-pool";
			reusable;
			alignment = <0 0x400000>;
			size = <0 0x400000>;
		};

		cont_splash_mem: splash_region@83000000 {
			reg = <0x0 0x90000000 0x0 0x1400000>;
		};
	};

还有_stext、initrd、PDG等reserve 如下:

void __init arm_memblock_init(const struct machine_desc *mdesc)
{
	//大概21953K
	#ifdef CONFIG_XIP_KERNEL
	memblock_reserve(__pa(_sdata), _end - _sdata);
	#else
	memblock_reserve(__pa(_stext), _end - _stext);
	#endif
	//大概1782K
	memblock_reserve(phys_initrd_start, phys_initrd_size);
	
	//大概16K
	//arm_mm_memblock_reserve()->memblock_reserve()
	memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
	
	//大概213K
	//start_kernel()->setup_arch()->arm_memblock_init()->early_init_fdt_scan_reserved_mem()->early_init_dt_reserve_memory_arch()
	early_init_dt_reserve_memory_arch(__pa(initial_boot_params),fdt_totalsize(initial_boot_params),0);
}

所以前一次dump reserve = dts reserve + _stext、initrd、PDG等reserve = 286108K


后一次DUMP:

后一次dump reserve比前一次大,是因为中间有很多地方使用memblock分配内存使用,这部分内存都划到reserve中了如下:

MEMORY(1)

大约 8400K


//MEMORY(1)
//start_kernel()->setup_arch()->paging_init()
void __init paging_init(const struct machine_desc *mdesc)
{
	....
	//调用early_alloc_aligned()使用memblock分配内存,这块内存从memory列表移除添加到reserve列表中
	map_lowmem();
	
	//调用early_alloc_aligned()使用memblock分配内存,这块内存从memory列表移除添加到reserve列表中
	dma_contiguous_remap();
	
	//调用create_mapping->alloc_init_pud()->alloc_init_pmd->alloc_init_pte()->early_pte_alloc()->early_alloc()->early_alloc_aligned()->memblock_alloc()分配内存
	early_ioremap_reset();
	
	//调用early_alloc(PAGE_SIZE * 2); + create_mapping(&map);分配内存
	devicemaps_init();
	
	//early_pte_alloc()分配pte
	kmap_init();
	{
		#ifdef CONFIG_HIGHMEM
		pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),PKMAP_BASE, _PAGE_KERNEL_TABLE);
		#endif
		early_pte_alloc(pmd_off_k(FIXADDR_START), FIXADDR_START,_PAGE_KERNEL_TABLE);
	}
	
	//分配0号page
	zero_page = early_alloc(PAGE_SIZE);
	
	//
	bootmem_init();
	{
		zone_sizes_init();
		{
			free_area_init_node();
			{
				//alloc_node_mem_map()->memblock_virt_alloc_node_nopanic()->memblock_virt_alloc_try_nid_nopanic()->memblock_virt_alloc_internal()->memblock_reserve()
				alloc_node_mem_map();
				
				//free_area_init_core()->setup_usemap()->memblock_virt_alloc_node_nopanic()->memblock_virt_alloc_try_nid_nopanic()->memblock_virt_alloc_internal()->memblock_reserve()
				free_area_init_core(pgdat, start_pfn, end_pfn,zones_size, zholes_size);
			}
		}
	}


	....
}


MEMORY(2)

大约 4K


//start_kernel()->setup_arch()->request_standard_resources()
static void __init request_standard_resources(const struct machine_desc *mdesc)
{
	....
	struct resource *res;
	....
	for_each_memblock(memory, region) {
		....
		res = memblock_virt_alloc(sizeof(*res), 0);
		....
		}
	....
}

MEMORY(3)

大约 580K ```c

//start_kernel()->setup_arch()->unflatten_device_tree()->early_init_dt_alloc_memory_arch() void * __init __weak early_init_dt_alloc_memory_arch(u64 size, u64 align) { return __va(memblock_alloc(size, align)); } void __init unflatten_device_tree(void) { //这两个函数都会调用early_init_dt_alloc_memory_arch()分配内存 __unflatten_device_tree(initial_boot_params, &of_root, early_init_dt_alloc_memory_arch);

/* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */
of_alias_scan(early_init_dt_alloc_memory_arch); } ```

MEMORY(4)

大约 1875B = 1.8K

//start_kernel()->setup_command_line()
static void __init setup_command_line(char *command_line)
{
	saved_command_line =
		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
	initcall_command_line =
		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
	static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
	strcpy(saved_command_line, boot_command_line);
	strcpy(static_command_line, command_line);
}

MEMORY(5)

大约 180504B = 176K ```c //start_kernel()->setup_per_cpu_areas() void __init setup_per_cpu_areas(void) { …. rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); …. }

int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn) { …. pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, cpu_distance_fn); …. memblock_virt_alloc_nopanic(areas_size, 0); …. alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size); }

static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align) { return memblock_virt_alloc_from_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); }


## MEMORY(6)
>大约 16384 = 16K
>PID hash table entries: 4096 (order: 2, 16384 bytes) 分配了16K的内存

```c
//start_kernel()->pidhash_init()
void __init pidhash_init(void)
{
	pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
					   HASH_EARLY | HASH_SMALL,
					   &pidhash_shift, NULL,
					   0, 4096);
}

void *__init alloc_large_system_hash(const char *tablename,
				     unsigned long bucketsize,
				     unsigned long numentries,
				     int scale,
				     int flags,
				     unsigned int *_hash_shift,
				     unsigned int *_hash_mask,
				     unsigned long low_limit,
				     unsigned long high_limit)
{
	.......
	do {
		size = bucketsize << log2qty;
		if (flags & HASH_EARLY)
			table = memblock_virt_alloc_nopanic(size, 0);
		else if (hashdist)
			table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL);
		else {
			/*
			 * If bucketsize is not a power-of-two, we may free
			 * some pages at the end of hash table which
			 * alloc_pages_exact() automatically does
			 */
			if (get_order(size) < MAX_ORDER) {
				table = alloc_pages_exact(size, GFP_ATOMIC);
				kmemleak_alloc(table, size, 1, GFP_ATOMIC);
			}
		}
	} while (!table && size > PAGE_SIZE && --log2qty);
	......
}

MEMORY(7)

大约 786432B = 768K

//start_kernel()->vfs_caches_init_early()
void __init vfs_caches_init_early(void)
{
	dcache_init_early();
	inode_init_early();
}
//类似于PID
//Dentry cache hash table entries: 131072 (order: 7, 524288 bytes)
static void __init dcache_init_early(void)
{
	dentry_hashtable =
		alloc_large_system_hash("Dentry cache",
					sizeof(struct hlist_bl_head),
					dhash_entries,
					13,
					HASH_EARLY,
					&d_hash_shift,
					&d_hash_mask,
					0,
					0);
}

//Inode-cache hash table entries: 65536 (order: 6, 262144 bytes)
void __init inode_init_early(void)
{
	inode_hashtable =
		alloc_large_system_hash("Inode-cache",
					sizeof(struct hlist_head),
					ihash_entries,
					14,
					HASH_EARLY,
					&i_hash_shift,
					&i_hash_mask,
					0,
					0);
}

所以 [dts reserve] + [_stext、initrd、PDG等reserve] + [MEMORY1~7] 就等于现在dump时reserved的大小

从现在开始总内存就是:1010688K ,可使用内存:1010688K - 295776K(reserve内存) = 714912K






内存的释放


CMA内存的释放

加载过程

加载dts文件时,会把一部分内存保留为cma内存,大约156M,这份内存是可以使用的,所以要释放给可用内存
[    0.000000] Reserved memory: allocated memory for 'venus_region@0' node: base 0x8f800000, size 8 MiB
[    0.000000] Reserved memory: created CMA memory pool at 0x8f800000, size 8 MiB
[    0.000000] Reserved memory: initialized node venus_region@0, compatible id shared-dma-pool

[    0.000000] Reserved memory: allocated memory for 'secure_region@0' node: base 0xb9000000, size 112 MiB
[    0.000000] Reserved memory: created CMA memory pool at 0xb9000000, size 112 MiB
[    0.000000] Reserved memory: initialized node secure_region@0, compatible id shared-dma-pool

[    0.000000] Reserved memory: allocated memory for 'qseecom_region@0' node: base 0xb8000000, size 16 MiB
[    0.000000] Reserved memory: created CMA memory pool at 0xb8000000, size 16 MiB
[    0.000000] Reserved memory: initialized node qseecom_region@0, compatible id shared-dma-pool

[    0.000000] Reserved memory: allocated memory for 'adsp_region@0' node: base 0xb7c00000, size 4 MiB
[    0.000000] Reserved memory: created CMA memory pool at 0xb7c00000, size 4 MiB
[    0.000000] Reserved memory: initialized node adsp_region@0, compatible id shared-dma-pool

[    0.000000] cma: Reserved 16 MiB at 0xb6c00000

释放过程

cma_init_reserved_areas()->cma_activate_area()->init_cma_reserved_pageblock()->adjust_managed_page_count()

core_initcall(cma_init_reserved_areas);
static int __init cma_init_reserved_areas(void)
{
	for (i = 0; i < cma_area_count; i++) {
		int ret = cma_activate_area(&cma_areas[i]);
	}
}




static int __init cma_activate_area(struct cma *cma)
{
	init_cma_reserved_pageblock(pfn_to_page(base_pfn));
}


void __init init_cma_reserved_pageblock(struct page *page)
{
	....
	set_pageblock_migratetype(page, MIGRATE_CMA);
	....
	adjust_managed_page_count(page, pageblock_nr_pages);
	....
}


void adjust_managed_page_count(struct page *page, long count)
{
	spin_lock(&managed_page_count_lock);
	page_zone(page)->managed_pages += count;
	totalram_pages += count;
#ifdef CONFIG_HIGHMEM
	if (PageHighMem(page))
		totalhigh_pages += count;
#endif
	spin_unlock(&managed_page_count_lock);

}


此时总内存=1010688K , 可用内存 = 714912K + 156*1024 = 874656 K



initrd区域内存释放

initrd是initial ram disk的缩写。它是由 bootloader 初始化的内存盘。在 linux 启动之前,bootloader  会将它(通常是 initrd.img-xxx...xxx 文件)加载到内存中。
内核启动的时候会将这个文件解开,并作为根文件系 统使用。而启动阶段的驱动模块(如jbd)放在这些文件系统上,内核是无法读取文件系统的,从而只能通
过Linux initrd启动的虚拟文件系统来装载这些模块。
initrd文件的功能主要有两个:
	1、提供开机必需的但kernel文件(即vmlinuz)没有提供的驱动模块(modules)
	2、负责加载硬盘上的根文件系统并执行其中的/sbin/init程序进而将开机过程持续下去

释放流程

populate_rootfs()->free_initrd()->free_initrd_mem()->free_reserved_area()

rootfs_initcall(populate_rootfs);
static int __init populate_rootfs(void)
{
	err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
	if (initrd_start) {
#ifdef CONFIG_BLK_DEV_RAM
		
		err = unpack_to_rootfs((char *)initrd_start,
			initrd_end - initrd_start);
		if (!err) {
			free_initrd();
			goto done;
		} else {
			clean_rootfs();
			unpack_to_rootfs(__initramfs_start, __initramfs_size);
		}
		fd = sys_open("/initrd.image", O_WRONLY|O_CREAT, 0700);
		if (fd >= 0) {
			sys_close(fd);
			free_initrd();
		}
	done:
#else

		err = unpack_to_rootfs((char *)initrd_start,initrd_end - initrd_start);

		free_initrd();
#endif
}

static void __init free_initrd(void)
{
	free_initrd_mem(initrd_start, initrd_end);
}

void free_initrd_mem(unsigned long start, unsigned long end)
{
	free_reserved_area((void *)start, (void *)end, -1, "initrd");
}

unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
{
	start = (void *)PAGE_ALIGN((unsigned long)start);
	end = (void *)((unsigned long)end & PAGE_MASK);
	for (pos = start; pos < end; pos += PAGE_SIZE, pages++) {
		if ((unsigned int)poison <= 0xFF)
			memset(pos, poison, PAGE_SIZE);
		free_reserved_page(virt_to_page(pos));
	}

	if (pages && s)
		pr_info("Freeing %s memory: %ldK (%p - %p)\n",
			s, pages << (PAGE_SHIFT - 10), start, end);
}

Kernel Log输出

Freeing initrd memory: 1784K (c3600000 - c37be000)



此时总内存=1010688K , 可用内存 = 874656K + 1784K = 876440 K



释放一些不再使用的kernel 内存

主要是kernel init占的内存

释放流程

start_kernel()->rest_init()->kernel_thread(kernel_init, NULL, CLONE_FS)->kernel_init()->free_initmem()->free_initmem_default()

static inline unsigned long free_initmem_default(int poison)
{
	extern char __init_begin[], __init_end[];
	return free_reserved_area(&__init_begin, &__init_end,poison, "unused kernel");
}

Kernel Log输出

Freeing unused kernel memory: 472K (c1200000 - c1276000)



此时总内存=1010688K , 可用内存 = 876440K + 472K = 876912 K



高通平台相关内存


处理和释放 dts中no-map-fixup属性的内存,主要是高通modem占用的内存

在这里插入图片描述

处理释放过程:

static void removed_region_fixup(struct removed_region *dma_mem, int index)
{
	//把modem这块内存释放出来从reserve变成memory[0M , 80M]
	memblock_free(dma_mem->base, dma_mem->nr_pages * PAGE_SIZE);
	//把modem这块内存[0M , 75M]从内存管理移除
	memblock_remove(dma_mem->base, index * PAGE_SIZE);

	//释放掉modem使用的page内存
	free_memmap(base_pfn, base_pfn + index);
	//释放掉modem的[75M , 80M]这5M内存
	free_bootmem_late(dma_mem->base + index * PAGE_SIZE, fixup_size);
	
}

Kernel Log输出

memblock_free: [0x00000086800000-0x0000008b7fffff] removed_alloc+0x114/0x460 memblock_remove: [0x00000086800000-0x0000008b2fffff] removed_alloc+0x128/0x460 free_bootmem_late() addr = 0xaf09d000 , size = 576 K free_bootmem_late() addr = 0x8b300000 , size = 5120 K



此时总内存=1010688K , 可用内存 = 876912 K + 576 K + 5120 K = 882608 K



释放splash占用的内存

高通平台主要被开机logo使用

dts中的声明:

在这里插入图片描述

释放流程

int mdss_mdp_splash_cleanup(struct msm_fb_data_type *mfd,
					bool use_borderfill)
{
	memblock_free(mdp5_data->splash_mem_addr,mdp5_data->splash_mem_size);
}

这块内存大概20M 所以:



此时总内存=1010688K , 可用内存 = 882608 K + 20480 K = 903088 K



cat /proc/meminfo 结果:

MemTotal:         903088 kB
MemFree:          110328 kB
MemAvailable:     424740 kB
Buffers:           12888 kB
Cached:           326552 kB
SwapCached:         4340 kB
Active:           299168 kB
Inactive:         303332 kB
Active(anon):     130016 kB
Inactive(anon):   135228 kB
Active(file):     169152 kB
Inactive(file):   168104 kB
Unevictable:         256 kB
Mlocked:             256 kB
HighTotal:        270336 kB
HighFree:          84516 kB
LowTotal:         632752 kB
LowFree:           25812 kB
SwapTotal:        991224 kB
SwapFree:         944296 kB
Dirty:                16 kB
Writeback:             0 kB
AnonPages:        261948 kB
Mapped:           149820 kB
Shmem:              2180 kB
Slab:              50952 kB
SReclaimable:      16424 kB
SUnreclaim:        34528 kB
KernelStack:       11952 kB
PageTables:        23180 kB
NFS_Unstable:          0 kB
Bounce:                0 kB
WritebackTmp:          0 kB
CommitLimit:     1442768 kB
Committed_AS:   32504332 kB
VmallocTotal:     245760 kB
VmallocUsed:      102044 kB
VmallocChunk:      42836 kB

图示释放过程

在这里插入图片描述

释放之后要想精简增大内存可优化地方比较少