tests/benchmarks: add dynamic memory allocation measurement

add a test into latency_measure test case to measure
the average time for dynamic memory allocation and release.

Signed-off-by: Chen Peng1 <peng1.chen@intel.com>
diff --git a/tests/benchmarks/latency_measure/README.rst b/tests/benchmarks/latency_measure/README.rst
index 423f696..160dc18 100644
--- a/tests/benchmarks/latency_measure/README.rst
+++ b/tests/benchmarks/latency_measure/README.rst
@@ -15,27 +15,30 @@
 * Time it takes to resume a suspended thread
 * Time it takes to create a new thread (without starting it)
 * Time it takes to start a newly created thread
+* Measure average time to alloc memory from heap then free that memory
 
 
 Sample output of the benchmark::
 
-        *** Booting Zephyr OS build zephyr-v2.3.0-2257-g0f420483db07  ***
+        *** Booting Zephyr OS build zephyr-v2.6.0-1119-g378a1e082ac5  ***
         START - Time Measurement
-        Timing results: Clock frequency: 120 MHz
-        Average thread context switch using yield                   :     420 cycles ,     3502 ns
-        Average context switch time between threads (coop)          :     429 cycles ,     3583 ns
-        Switch from ISR back to interrupted thread                  :     670 cycles ,     5583 ns
-        Time from ISR to executing a different thread               :     570 cycles ,     4750 ns
-        Time to create a thread (without start)                     :     360 cycles ,     3000 ns
-        Time to start a thread                                      :     545 cycles ,     4541 ns
-        Time to suspend a thread                                    :     605 cycles ,     5041 ns
-        Time to resume a thread                                     :     660 cycles ,     5500 ns
-        Time to abort a thread (not running)                        :     495 cycles ,     4125 ns
-        Average semaphore signal time                               :     195 cycles ,     1626 ns
-        Average semaphore test time                                 :      62 cycles ,      518 ns
-        Semaphore take time (context switch)                        :     695 cycles ,     5791 ns
-        Semaphore give time (context switch)                        :     845 cycles ,     7041 ns
-        Average time to lock a mutex                                :      79 cycles ,      659 ns
-        Average time to unlock a mutex                              :     370 cycles ,     3085 ns
+        Timing results: Clock frequency: 1000 MHz
+        Average thread context switch using yield                   :    9060 cycles ,     9060 ns
+        Average context switch time between threads (coop)          :    9503 cycles ,     9503 ns
+        Switch from ISR back to interrupted thread                  :   14208 cycles ,    14208 ns
+        Time from ISR to executing a different thread               :    9664 cycles ,     9664 ns
+        Time to create a thread (without start)                     :    3968 cycles ,     3968 ns
+        Time to start a thread                                      :   12064 cycles ,    12064 ns
+        Time to suspend a thread                                    :   12640 cycles ,    12640 ns
+        Time to resume a thread                                     :   12096 cycles ,    12096 ns
+        Time to abort a thread (not running)                        :    2208 cycles ,     2208 ns
+        Average semaphore signal time                               :    8928 cycles ,     8928 ns
+        Average semaphore test time                                 :    2048 cycles ,     2048 ns
+        Semaphore take time (context switch)                        :   13472 cycles ,    13472 ns
+        Semaphore give time (context switch)                        :   18400 cycles ,    18400 ns
+        Average time to lock a mutex                                :    3072 cycles ,     3072 ns
+        Average time to unlock a mutex                              :    9251 cycles ,     9251 ns
+        Average time for heap malloc                                :   13056 cycles ,    13056 ns
+        Average time for heap free                                  :    7776 cycles ,     7776 ns
         ===================================================================
         PROJECT EXECUTION SUCCESSFUL
diff --git a/tests/benchmarks/latency_measure/prj.conf b/tests/benchmarks/latency_measure/prj.conf
index 67b633e..abea793 100644
--- a/tests/benchmarks/latency_measure/prj.conf
+++ b/tests/benchmarks/latency_measure/prj.conf
@@ -22,3 +22,5 @@
 # Can only run under 1 CPU
 CONFIG_MP_NUM_CPUS=1
 CONFIG_TIMING_FUNCTIONS=y
+
+CONFIG_HEAP_MEM_POOL_SIZE=2048
diff --git a/tests/benchmarks/latency_measure/src/heap_malloc_free.c b/tests/benchmarks/latency_measure/src/heap_malloc_free.c
new file mode 100644
index 0000000..bd73bbc
--- /dev/null
+++ b/tests/benchmarks/latency_measure/src/heap_malloc_free.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <zephyr.h>
+#include <timing/timing.h>
+#include "utils.h"
+
+#define TEST_COUNT 100
+#define TEST_SIZE 10
+
+void heap_malloc_free(void)
+{
+	timing_t heap_malloc_start_time = 0U;
+	timing_t heap_malloc_end_time = 0U;
+
+	timing_t heap_free_start_time = 0U;
+	timing_t heap_free_end_time = 0U;
+
+	uint32_t count = 0U;
+	uint32_t sum_malloc = 0U;
+	uint32_t sum_free = 0U;
+
+	timing_start();
+
+	while (count != TEST_COUNT) {
+		heap_malloc_start_time = timing_counter_get();
+		void *allocated_mem = k_malloc(TEST_SIZE);
+
+		heap_malloc_end_time = timing_counter_get();
+		if (allocated_mem == NULL) {
+			printk("Failed to alloc memory from heap "
+					"at count %d\n", count);
+			break;
+		}
+
+		heap_free_start_time = timing_counter_get();
+		k_free(allocated_mem);
+		heap_free_end_time = timing_counter_get();
+
+		sum_malloc += timing_cycles_get(&heap_malloc_start_time,
+				&heap_malloc_end_time);
+		sum_free += timing_cycles_get(&heap_free_start_time,
+				&heap_free_end_time);
+		count++;
+	}
+
+	PRINT_STATS_AVG("Average time for heap malloc", sum_malloc, count);
+	PRINT_STATS_AVG("Average time for heap free", sum_free, count);
+
+	timing_stop();
+}
diff --git a/tests/benchmarks/latency_measure/src/main.c b/tests/benchmarks/latency_measure/src/main.c
index 18b0d8e..d12477f 100644
--- a/tests/benchmarks/latency_measure/src/main.c
+++ b/tests/benchmarks/latency_measure/src/main.c
@@ -27,6 +27,7 @@
 extern int sema_test(void);
 extern int sema_context_switch(void);
 extern int suspend_resume(void);
+extern void heap_malloc_free(void);
 
 void test_thread(void *arg1, void *arg2, void *arg3)
 {
@@ -57,6 +58,8 @@
 
 	mutex_lock_unlock();
 
+	heap_malloc_free();
+
 	TC_END_REPORT(error_count);
 }