arm64: cache: refine arch_dcache_range()

Moved all assembly code to c code. Fixed arch_dcache_line_size_get()
to get dcache line size by using "4 << dminline" and don't consider
CWG according to sample code in cotexta-v8 programer guider.

Signed-off-by: Jiafei Pan <Jiafei.Pan@nxp.com>
diff --git a/arch/arm64/core/CMakeLists.txt b/arch/arm64/core/CMakeLists.txt
index f8f077a..cc573d8 100644
--- a/arch/arm64/core/CMakeLists.txt
+++ b/arch/arm64/core/CMakeLists.txt
@@ -27,7 +27,6 @@
 zephyr_library_sources_ifdef(CONFIG_THREAD_LOCAL_STORAGE tls.c)
 zephyr_library_sources_ifdef(CONFIG_HAS_ARM_SMCCC smccc-call.S)
 zephyr_library_sources_ifdef(CONFIG_AARCH64_IMAGE_HEADER header.S)
-zephyr_library_sources_ifdef(CONFIG_CACHE_MANAGEMENT cache.S)
 zephyr_library_sources_ifdef(CONFIG_CACHE_MANAGEMENT cache.c)
 if ((CONFIG_MP_NUM_CPUS GREATER 1) OR (CONFIG_SMP))
   zephyr_library_sources(smp.c)
diff --git a/arch/arm64/core/cache.S b/arch/arm64/core/cache.S
deleted file mode 100644
index 3fe7e53..0000000
--- a/arch/arm64/core/cache.S
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2020 NXP
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-
-/*
- * ARM64 Cortex-A Cache management
- */
-
-#include <toolchain.h>
-#include <linker/sections.h>
-#include <arch/cpu.h>
-
-_ASM_FILE_PROLOGUE
-
-.macro	dcache_range_op, dcache op
-	/* Calculate dcache line size */
-	mrs	x3, ctr_el0
-	mov	x2, #4
-	ubfm	x3, x3, #16, #19
-	lsl	x2, x2, x3
-
-	/*
-	 * x2 = cacheline_size
-	 * x1 = start + end
-	 * x3 = cacheline_size - 1
-	 * x0 = x0 & ~(x3)
-	 */
-	add	x1, x0, x1
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:
-	\dcache \op, x0
-	add	x0, x0, x2
-	cmp	x0, x1
-	blo	1b
-	dsb	sy
-.endm
-
-GTEXT(arch_dcache_flush)
-SECTION_FUNC(TEXT, arch_dcache_flush)
-	dcache_range_op dc civac
-	ret
-
-GTEXT(arch_dcache_invd)
-SECTION_FUNC(TEXT, arch_dcache_invd)
-	dcache_range_op dc ivac
-	ret
diff --git a/arch/arm64/core/cache.c b/arch/arm64/core/cache.c
index fa6d820..4e1d24d 100644
--- a/arch/arm64/core/cache.c
+++ b/arch/arm64/core/cache.c
@@ -1,7 +1,7 @@
 /* cache.c - d-cache support for AARCH64 CPUs */
 
 /*
- * Copyright 2020 NXP
+ * Copyright 2020-2021 NXP
  *
  * SPDX-License-Identifier: Apache-2.0
  */
@@ -15,10 +15,10 @@
 
 #include <cache.h>
 
-#define	CTR_EL0_DMINLINE_SHIFT	16
-#define	CTR_EL0_DMINLINE_MASK	GENMASK(19, 16)
-#define	CTR_EL0_CWG_SHIFT	24
-#define	CTR_EL0_CWG_MASK	GENMASK(27, 24)
+#define	CTR_EL0_DMINLINE_SHIFT		16
+#define	CTR_EL0_DMINLINE_MASK		BIT_MASK(4)
+#define	CTR_EL0_CWG_SHIFT		24
+#define	CTR_EL0_CWG_MASK		BIT_MASK(4)
 
 /* clidr_el1 */
 #define CLIDR_EL1_LOC_SHIFT		24
@@ -39,28 +39,11 @@
 	__asm__ volatile ("dc " op ", %0" :: "r" (val) : "memory");	\
 })
 
-int arch_dcache_flush(void *addr, size_t size);
-int arch_dcache_invd(void *addr, size_t size);
-
 static size_t dcache_line_size;
 
-int arch_dcache_range(void *addr, size_t size, int op)
-{
-	if (op == K_CACHE_INVD) {
-		arch_dcache_invd(addr, size);
-	} else if (op == K_CACHE_WB_INVD) {
-		arch_dcache_flush(addr, size);
-	} else {
-		return -ENOTSUP;
-	}
-
-	return 0;
-}
-
 size_t arch_dcache_line_size_get(void)
 {
 	uint64_t ctr_el0;
-	uint32_t cwg;
 	uint32_t dminline;
 
 	if (dcache_line_size)
@@ -68,16 +51,51 @@
 
 	ctr_el0 = read_sysreg(CTR_EL0);
 
-	cwg = (ctr_el0 & CTR_EL0_CWG_MASK) >> CTR_EL0_CWG_SHIFT;
-	dminline = (ctr_el0 & CTR_EL0_DMINLINE_MASK) >>
-		CTR_EL0_DMINLINE_SHIFT;
+	dminline = (ctr_el0 >> CTR_EL0_DMINLINE_SHIFT) & CTR_EL0_DMINLINE_MASK;
 
-	dcache_line_size = cwg ? 4 << cwg : 4 << dminline;
+	dcache_line_size = 4 << dminline;
 
 	return dcache_line_size;
 }
 
 /*
+ * operation for data cache by virtual address to PoC
+ * ops:  K_CACHE_INVD: invalidate
+ *	 K_CACHE_WB: clean
+ *	 K_CACHE_WB_INVD: clean and invalidate
+ */
+int arch_dcache_range(void *addr, size_t size, int op)
+{
+	size_t line_size;
+	uintptr_t start_addr = (uintptr_t)addr;
+	uintptr_t end_addr = start_addr + size;
+
+	if (op != K_CACHE_INVD && op != K_CACHE_WB && op != K_CACHE_WB_INVD)
+		return -ENOTSUP;
+
+	line_size = arch_dcache_line_size_get();
+
+	/* Align address to line size */
+	start_addr &= ~(line_size - 1);
+
+	do {
+		if (op == K_CACHE_INVD) {
+			dc_ops("ivac", start_addr);
+		} else if (op == K_CACHE_WB) {
+			dc_ops("cvac", start_addr);
+		} else if (op == K_CACHE_WB_INVD) {
+			dc_ops("civac", start_addr);
+		}
+
+		start_addr += line_size;
+	} while (start_addr < end_addr);
+
+	dsb();
+
+	return 0;
+}
+
+/*
  * operation for all data cache
  * ops:  K_CACHE_INVD: invalidate
  *	 K_CACHE_WB: clean