tests/arch/xtensa_asm2/src/main.c - third_party/github/zephyrproject-rtos/zephyr - Git at Google

 /*
  * Copyright (c) 2017, Intel Corporation
  *
  * SPDX-License-Identifier: Apache-2.0
  */

 #include <zephyr.h>
 #include <string.h>
 #include <sys/printk.h>
 #include <xtensa-asm2.h>

 #ifdef CONFIG_MULTITHREADING
 #error Disable multithreading for this unit test!
 #endif

 /* Just random numbers intended to whiten the register contents during
  * the spill test and make every bit of every register in every call
  * significant in an attempt to catch any mistakes/swaps/etc...
  */
 int white[] = {
 	       0x5fad484a,
 	       0xc23e88f7,
 	       0xfff301fb,
 	       0xf1189ba7,
 	       0x88bffad6,
 	       0xaabb96fa,
 	       0x629619d5,
 	       0x246bee82
 };

 static inline unsigned int ccount(void)
 {
 	unsigned int cc;

 	__asm__ volatile("rsr.ccount %0" : "=r"(cc));
 	return cc;
 }

 /* We call spill_fn() through a pointer to prevent the compiler from
  * detecting and optimizing out the tail recursion in fn() and forcing
  * a real function call using CALLn instructions.
  */
 int (*spill_fnp)(int level, int a, int b, int c);

 /* WINDOWBASE/WINDOWSTART registers tested before and after the spill */
 unsigned int spill_wb0, spill_ws0, spill_wb1, spill_ws1;

 /* Test start/end values for CCOUNT */
 unsigned int spill_start, spill_end;

 /* Validated result for spill_fn() */
 int spill_expect;

 enum {
 	NO_SPILL, HAL_SPILL, ZEPHYR_SPILL, NUM_MODES
 } spill_mode;

 static int spill_fn(int level, int a, int b, int c)
 {
 	/* Be very careful when debugging, note that a printk() call
 	 * tends to push all the registers out of the windows on its
 	 * own, leaving no frames for us to test against!
 	 */
 	if (level >= ARRAY_SIZE(white)) {
 		__asm__ volatile ("rsr.WINDOWBASE %0" : "=r"(spill_wb0));
 		__asm__ volatile ("rsr.WINDOWSTART %0" : "=r"(spill_ws0));

 		spill_start = ccount();

 		if (spill_mode == NO_SPILL) {
 			/* Just here to test the cycle count overhead
 			 * and get the baseline function result.
 			 */
 		} else if (spill_mode == ZEPHYR_SPILL) {
 			/* FIXME: the a0_save hack should be needless.  It
 			 * *should* be enough to list "a0" in the clobber list
 			 * of the __asm__ statement (and let the compiler
 			 * decide on how to save the value), but that's not
 			 * working for me...
 			 */
 			int a0_save;

 			__asm__ volatile
 				("mov %0, a0"			"\n\t"
 				 "call0 spill_reg_windows"	"\n\t"
 				 "mov a0, %0"			"\n\t"
 				 : "=r"(a0_save));
 		} else if (spill_mode == HAL_SPILL) {
 			/* Strictly there is a xthal_window_spill_nw
 			 * routine that is called with special setup
 			 * (use CALL0, spill A2/A3, clear WOE) and
 			 * supposed to be faster, but I couldn't make
 			 * that work.
 			 */
 			extern void xthal_window_spill(void);
 			xthal_window_spill();
 		}

 		spill_end = ccount();
 		__asm__ volatile ("rsr.WINDOWBASE %0" : "=r" (spill_wb1));
 		__asm__ volatile ("rsr.WINDOWSTART %0" : "=r" (spill_ws1));

 		return ((a + b) | c);
 	}

 	int val1 = (a - (b & c)) ^ white[level];
 	int val2 = ((a | b) + c) ^ white[(level + 1) % ARRAY_SIZE(white)];
 	int val3 = (a - (b - c)) ^ white[(level + 2) % ARRAY_SIZE(white)];

 	int x = spill_fnp(level+1, val1, val2, val3);

 	/* FIXME: as it happens, the compiler seems not to be
 	 * optimizing components of this addition before the function
 	 * call, which is what we want: the desire is that the
 	 * individual values be held in registers across the call so
 	 * that they can be checked to have been spilled/filled
 	 * properly as we return up the stack.  But the compiler
 	 * certainly COULD reorder this addition (it would actually be
 	 * a good optimization: you could reduce the number of
 	 * registers used before the tail return and use a smaller
 	 * call frame).  For now, I'm happy enough simply having read
 	 * the generated code, but long term this should be a more
 	 * robust test if possible.  Maybe write the values to some
 	 * extern volatile spots...
 	 */
 	return x + val1 + val2 + val3 + a + b + c;
 }

 int test_reg_spill(void)
 {
 	spill_fnp = spill_fn;

 	int ok = 1;

 	for (spill_mode = 0; spill_mode < NUM_MODES; spill_mode++) {
 		printk("Testing %s\n",
 		       spill_mode == NO_SPILL ? "NO_SPILL"
 		       : (spill_mode == HAL_SPILL ? "HAL_SPILL"
 			  : "ZEPHYR_SPILL"));

 		int result = spill_fnp(0, 1, 2, 3);

 		printk("  WINDOWBASE %d -> %d, WINDOWSTART 0x%x -> 0x%x (%d cycles)\n",
 		       spill_wb0, spill_wb1, spill_ws0, spill_ws1,
 		       spill_end - spill_start);

 		if (spill_mode == NO_SPILL) {
 			spill_expect = result;
 			continue;
 		}

 		if (spill_ws1 != 1 << spill_wb1) {
 			printk("WINDOWSTART should show exactly one frame at WINDOWBASE\n");
 			ok = 0;
 		}

 		if (result != spill_expect) {
 			printk("Unexpected fn(1, 2, 3) result, got %d want %d\n",
 			       result, spill_expect);
 			ok = 0;
 		}
 	}

 	return ok;
 }

 int *test_highreg_handle;

 /* Simple save locations for some context needed by the test assembly */
 void *_test_highreg_sp_save;
 void *_test_highreg_a0_save;

 int test_highreg_stack[64];

 int *test_highreg_sp_top = &test_highreg_stack[ARRAY_SIZE(test_highreg_stack)];

 /* External function, defined in assembly */
 void fill_window(void (*fn)(void));

 /* Test rig for fill_window, maybe remove as a metatest */
 int testfw_wb, testfw_ws;
 void testfw(void);

 /* Assembly-defined leaf functions for fill_window which poke the
  * specified number of high GPRs before calling xtensa_save_high_regs
  * to spill them into the test_highreg_stack area for inspection.
  */
 void test_highreg_0(void);
 void test_highreg_4(void);
 void test_highreg_8(void);
 void test_highreg_12(void);

 typedef void (*test_fn_t)(void);
 test_fn_t highreg_tests[] = {
 	test_highreg_0,
 	test_highreg_4,
 	test_highreg_8,
 	test_highreg_12,
 };

 int test_highreg_save(void)
 {
 	int ok = 1;

 	fill_window(testfw);
 	printk("testfw wb %d ws 0x%x\n", testfw_wb, testfw_ws);
 	ok = ok && (testfw_ws == ((1 << (XCHAL_NUM_AREGS / 4)) - 1));

 	for (int i = 0; i < ARRAY_SIZE(highreg_tests); i++) {
 		printk("\nHighreg test %d\n", i);

 		fill_window(highreg_tests[i]);

 		ok = ok && (*test_highreg_handle == (int)test_highreg_sp_top);

 		int spilled_words = test_highreg_sp_top - test_highreg_handle;

 		for (int quad = 0; ok && quad < (spilled_words - 1)/4; quad++) {
 			int *qbase = test_highreg_sp_top - (quad + 1) * 4;

 			for (int ri = 0; ri < 4; ri++) {
 				int reg = 4 + quad * 4 + ri;

 				ok = ok && (qbase[ri] == reg);
 				printk("  q %d reg %d qb[%d] %d\n",
 				       quad, reg, ri, qbase[ri]);
 			}
 		}
 	}

 	return ok;
 }

 void *switch_handle0, *switch_handle1;

 void xtensa_switch(void *handle, void **old_handle);

 void test_switch_bounce(void);
 __asm__("test_switch_bounce:"	"\n\t"
 	"call4 test_switch_top"	"\n\t");

 volatile int switch_count;

 /* Sits in a loop switching back to handle0 (which is the main thread) */
 void test_switch_top(void)
 {
 	int n = 1;

 	while (1) {
 		switch_count = n++;
 		xtensa_switch(switch_handle0, &switch_handle1);
 	}
 }

 int test_switch(void)
 {
 	static int stack2[512];

 	printk("%s\n", __func__);

 	(void)memset(stack2, 0, sizeof(stack2));

 	int *sp = xtensa_init_stack(k_current_get(),
 				    &stack2[ARRAY_SIZE(stack2)],
 				    (void *)test_switch_bounce,
 				    0, 0, 0);

 #if 0
 	/* DEBUG: dump the stack contents for manual inspection */
 	for (int i = 0; i < 64; i++) {
 		int idx = ARRAY_SIZE(stack2) - (i+1);
 		int off = (i+1) * -4;
 		int *addr = &stack2[idx];

 		if (addr < sp) {
 			break;
 		}
 		printk("%p (%d): 0x%x\n", addr, off, stack2[idx]);
 	}
 	printk("sp: %p\n", sp);
 #endif

 	switch_handle1 = sp;

 	const int n_switch = 10;

 	for (int i = 0; i < n_switch; i++) {
 		xtensa_switch(switch_handle1, &switch_handle0);
 		/* printk("switch %d count %d\n", i, switch_count); */
 	}

 	return switch_count == n_switch;
 }

 void rfi_jump(void);
 void rfi_jump_c(void)
 {
 	int ps;

 	__asm__ volatile ("rsr.PS %0" : "=r"(ps));
 	printk("%s, PS = %xh\n", __func__, ps);
 }

 int xstack_ok;

 #define XSTACK_SIZE 1024
 #define XSTACK_CANARY 0x5a5aa5a5
 static int xstack_stack2[XSTACK_SIZE + 1];

 void do_xstack_call(void *new_stack); /* in asmhelp.S */

 void xstack_bottom(void)
 {
 	xstack_ok = 1;
 }

 void xstack_top(void)
 {
 	int on_my_stack;

 	printk("%s oms %p\n", __func__, &on_my_stack);

 	/* Do this via fill_window() to be absolutely sure the whole
 	 * call stack across both physical stacks got spilled and
 	 * filled properly.
 	 */
 	fill_window(xstack_bottom);
 }

 int test_xstack(void)
 {
 	/* Make the stack one element big and put a canary above it to
 	 * check nothing underflows
 	 */

 	int *new_stack = &xstack_stack2[XSTACK_SIZE];
 	*new_stack = XSTACK_CANARY;

 	printk("%s new_stack = %p\n", __func__, new_stack);

 	do_xstack_call(new_stack);

 	printk("xstack_ok %d stack2[%d] 0x%x\n",
 	       xstack_ok, XSTACK_SIZE, xstack_stack2[XSTACK_SIZE]);

 	return xstack_ok && xstack_stack2[XSTACK_SIZE] == XSTACK_CANARY;
 }

 #ifdef CONFIG_SOC_ESP32
 #define TIMER_INT 16
 #else
 #define TIMER_INT 13
 #endif

 volatile int timer2_fired;

 int excint_stack[8192];
 void *excint_stack_top = &excint_stack[ARRAY_SIZE(excint_stack)];

 static struct { int nest; void *stack_top; } excint_cpu;

 volatile int int5_result;

 void disable_timer(void)
 {
 	int ie;

 	__asm__ volatile("rsr.intenable %0" : "=r"(ie));
 	ie &= ~(1<<TIMER_INT);
 	__asm__ volatile("wsr.intenable %0; rsync" : : "r"(ie));
 }

 void enable_timer(void)
 {
 	int ie;

 	__asm__ volatile("rsr.intenable %0" : "=r"(ie));
 	ie |= (1<<TIMER_INT);
 	__asm__ volatile("wsr.intenable %0; rsync" : : "r"(ie));
 }

 void *handle_int5_c(void *handle)
 {
 	int5_result = spill_fnp(0, 3, 2, 1);

 	int ccompare2_val = ccount() - 1;

 	__asm__ volatile("wsr.ccompare2 %0; rsync" : : "r"(ccompare2_val));

 	disable_timer();

 	timer2_fired = 1;

 	return handle;
 }

 int interrupt_test(void)
 {
 	int ok = 1;

 	excint_cpu.nest = 0;
 	excint_cpu.stack_top = &excint_stack[ARRAY_SIZE(excint_stack)];

 	void *cpuptr = &excint_cpu;

 	__asm__ volatile("wsr.MISC0 %0" : : "r"(cpuptr));

 	/* We reuse the "spill_fn" logic from above to get a
 	 * stack-sensitive, deeply-recursive computation going that
 	 * will be sensitive to interrupt bugs
 	 */
 	spill_mode = NO_SPILL;

 	unsigned int start = ccount();
 	int expect = spill_fnp(0, 3, 2, 1);
 	unsigned int spill_time = ccount() - start;

 	/* Ten thousand iterations is still pretty quick */
 	for (int i = 0; i < 10000; i++) {
 		int nest = i & 1;

 		excint_cpu.nest = nest;
 		timer2_fired = 0;

 		/* Vaguely random delay between 2-8 iterations of
 		 * spill_fn().  Maybe improve with a real PRNG.
 		 */
 		const int max_reps = 8;
 		int wh = white[i % ARRAY_SIZE(white)];
 		int delay = spill_time * 2U
 			+ ((wh * (i+1)) % (spill_time * (max_reps - 2)));

 		int alarm = ccount() + delay;

 		__asm__ volatile("wsr.ccompare2 %0; rsync" : : "r"(alarm));

 		enable_timer();

 #if 0
 		/* This is what I want to test: run the spill_fn test
 		 * repeatedly in the main thread so that it can be
 		 * interrupted and restored, and validate that it
 		 * returns the same result every time.  But this can't
 		 * work, even in principle: the timer interrupt we are
 		 * using is "high priority", which means that it can
 		 * interrupt the window exceptions being thrown in the
 		 * main thread.  And by design, Xtensa window
 		 * exceptions CANNOT be made reentrant (they don't
 		 * save the interrupted state, so can be interrupted
 		 * again before they can mask off exceptions, which
 		 * will then lose/clobber the OWB field in PS when the
 		 * interrupt handler throws another window exception).
 		 * So this doesn't work, in fact it fails every 2-10
 		 * iterations as spill_fn spends a lot of its time
 		 * spill/filling stack frames (by design, of course).
 		 *
 		 * This could be made to work if we could repurpose
 		 * the existing medium priority timer interrupt (which
 		 * is hard in a unit test: that's an important
 		 * interrupt!) or use the low priority timer which
 		 * delivers to the global exception handler (basically
 		 * impossible in a unit test).  Frustrating.
 		 */
 		int reps = 0;

 		while (!timer2_fired && reps < (max_reps+2)) {
 			int result = spill_fnp(0, 3, 2, 1);

 			reps++;
 			if (result != expect) {
 				ok = 0;
 			}
 		}
 		if (reps >= max_reps+2) {
 			printk("Interrupt didn't arrive\n");
 			ok = 0;
 		}
 		if (int5_result != expect) {
 			printk("Unexpected int spill_fn() result\n");
 			ok = 0;
 		}
 		printk("INT test delay %d nest %d reps %d\n",
 		       delay, nest, reps);
 #else
 		/* So this is what we do instead: just spin in the
 		 * main thread calling functions that don't involve
 		 * exceptions.  By experiment, calling spill_fn with a
 		 * first (depth) argument of 6 or 7 results in a
 		 * shallow call tree that won't throw exepctions.  At
 		 * least we're executing real code which depends on
 		 * its register state and validating that interrupts
 		 * don't hurt.
 		 */
 		volatile int dummy = 1;

 		while (!timer2_fired) {
 			dummy = spill_fnp(6, dummy, 2, 3);
 		}
 		if (int5_result != expect) {
 			printk("Unexpected int spill_fn() result\n");
 			ok = 0;
 		}
 #endif
 	}
 	return ok;
 }

 void main(void)
 {
 	/* Turn off interrupts and leave disabled, otherwise the
 	 * "userspace" context switching tests might not be reliable.
 	 * Stack pointers can exist in indeterminate states here.
 	 * (Note: the interrupt test below is using a high priority
 	 * interrupt which is not masked by irq_lock(), so it doesn't
 	 * care).
 	 */
 	int key = irq_lock();

 	/* Strictly not a "test", we just want to know that the jump
 	 * worked.  If the rest of the code runs, this must have
 	 * "passed".
 	 */
 	rfi_jump();

 	int ok = 1;

 	ok = ok && test_reg_spill();

 	ok = ok && test_highreg_save();

 	ok = ok && test_switch();

 	ok = ok && test_xstack();

 	ok = ok && interrupt_test();

 	irq_unlock(key);

 	printk("%s\n", ok ? "OK" : "Failed");
 }
	/*
	* Copyright (c) 2017, Intel Corporation
	*
	* SPDX-License-Identifier: Apache-2.0
	*/

	#include <zephyr.h>
	#include <string.h>
	#include <sys/printk.h>
	#include <xtensa-asm2.h>

	#ifdef CONFIG_MULTITHREADING
	#error Disable multithreading for this unit test!
	#endif

	/* Just random numbers intended to whiten the register contents during
	* the spill test and make every bit of every register in every call
	* significant in an attempt to catch any mistakes/swaps/etc...
	*/
	int white[] = {
	0x5fad484a,
	0xc23e88f7,
	0xfff301fb,
	0xf1189ba7,
	0x88bffad6,
	0xaabb96fa,
	0x629619d5,
	0x246bee82
	};

	static inline unsigned int ccount(void)
	{
	unsigned int cc;

	__asm__ volatile("rsr.ccount %0" : "=r"(cc));
	return cc;
	}

	/* We call spill_fn() through a pointer to prevent the compiler from
	* detecting and optimizing out the tail recursion in fn() and forcing
	* a real function call using CALLn instructions.
	*/
	int (*spill_fnp)(int level, int a, int b, int c);

	/* WINDOWBASE/WINDOWSTART registers tested before and after the spill */
	unsigned int spill_wb0, spill_ws0, spill_wb1, spill_ws1;

	/* Test start/end values for CCOUNT */
	unsigned int spill_start, spill_end;

	/* Validated result for spill_fn() */
	int spill_expect;

	enum {
	NO_SPILL, HAL_SPILL, ZEPHYR_SPILL, NUM_MODES
	} spill_mode;

	static int spill_fn(int level, int a, int b, int c)
	{
	/* Be very careful when debugging, note that a printk() call
	* tends to push all the registers out of the windows on its
	* own, leaving no frames for us to test against!
	*/
	if (level >= ARRAY_SIZE(white)) {
	__asm__ volatile ("rsr.WINDOWBASE %0" : "=r"(spill_wb0));
	__asm__ volatile ("rsr.WINDOWSTART %0" : "=r"(spill_ws0));

	spill_start = ccount();

	if (spill_mode == NO_SPILL) {
	/* Just here to test the cycle count overhead
	* and get the baseline function result.
	*/
	} else if (spill_mode == ZEPHYR_SPILL) {
	/* FIXME: the a0_save hack should be needless. It
	* should be enough to list "a0" in the clobber list
	* of the __asm__ statement (and let the compiler
	* decide on how to save the value), but that's not
	* working for me...
	*/
	int a0_save;

	__asm__ volatile
	("mov %0, a0" "\n\t"
	"call0 spill_reg_windows" "\n\t"
	"mov a0, %0" "\n\t"
	: "=r"(a0_save));
	} else if (spill_mode == HAL_SPILL) {
	/* Strictly there is a xthal_window_spill_nw
	* routine that is called with special setup
	* (use CALL0, spill A2/A3, clear WOE) and
	* supposed to be faster, but I couldn't make
	* that work.
	*/
	extern void xthal_window_spill(void);
	xthal_window_spill();
	}

	spill_end = ccount();
	__asm__ volatile ("rsr.WINDOWBASE %0" : "=r" (spill_wb1));
	__asm__ volatile ("rsr.WINDOWSTART %0" : "=r" (spill_ws1));

	return ((a + b) \| c);
	}

	int val1 = (a - (b & c)) ^ white[level];
	int val2 = ((a \| b) + c) ^ white[(level + 1) % ARRAY_SIZE(white)];
	int val3 = (a - (b - c)) ^ white[(level + 2) % ARRAY_SIZE(white)];

	int x = spill_fnp(level+1, val1, val2, val3);

	/* FIXME: as it happens, the compiler seems not to be
	* optimizing components of this addition before the function
	* call, which is what we want: the desire is that the
	* individual values be held in registers across the call so
	* that they can be checked to have been spilled/filled
	* properly as we return up the stack. But the compiler
	* certainly COULD reorder this addition (it would actually be
	* a good optimization: you could reduce the number of
	* registers used before the tail return and use a smaller
	* call frame). For now, I'm happy enough simply having read
	* the generated code, but long term this should be a more
	* robust test if possible. Maybe write the values to some
	* extern volatile spots...
	*/
	return x + val1 + val2 + val3 + a + b + c;
	}

	int test_reg_spill(void)
	{
	spill_fnp = spill_fn;

	int ok = 1;

	for (spill_mode = 0; spill_mode < NUM_MODES; spill_mode++) {
	printk("Testing %s\n",
	spill_mode == NO_SPILL ? "NO_SPILL"
	: (spill_mode == HAL_SPILL ? "HAL_SPILL"
	: "ZEPHYR_SPILL"));

	int result = spill_fnp(0, 1, 2, 3);

	printk(" WINDOWBASE %d -> %d, WINDOWSTART 0x%x -> 0x%x (%d cycles)\n",
	spill_wb0, spill_wb1, spill_ws0, spill_ws1,
	spill_end - spill_start);

	if (spill_mode == NO_SPILL) {
	spill_expect = result;
	continue;
	}

	if (spill_ws1 != 1 << spill_wb1) {
	printk("WINDOWSTART should show exactly one frame at WINDOWBASE\n");
	ok = 0;
	}

	if (result != spill_expect) {
	printk("Unexpected fn(1, 2, 3) result, got %d want %d\n",
	result, spill_expect);
	ok = 0;
	}
	}

	return ok;
	}

	int *test_highreg_handle;

	/* Simple save locations for some context needed by the test assembly */
	void *_test_highreg_sp_save;
	void *_test_highreg_a0_save;

	int test_highreg_stack[64];

	int *test_highreg_sp_top = &test_highreg_stack[ARRAY_SIZE(test_highreg_stack)];

	/* External function, defined in assembly */
	void fill_window(void (*fn)(void));

	/* Test rig for fill_window, maybe remove as a metatest */
	int testfw_wb, testfw_ws;
	void testfw(void);

	/* Assembly-defined leaf functions for fill_window which poke the
	* specified number of high GPRs before calling xtensa_save_high_regs
	* to spill them into the test_highreg_stack area for inspection.
	*/
	void test_highreg_0(void);
	void test_highreg_4(void);
	void test_highreg_8(void);
	void test_highreg_12(void);

	typedef void (*test_fn_t)(void);
	test_fn_t highreg_tests[] = {
	test_highreg_0,
	test_highreg_4,
	test_highreg_8,
	test_highreg_12,
	};

	int test_highreg_save(void)
	{
	int ok = 1;

	fill_window(testfw);
	printk("testfw wb %d ws 0x%x\n", testfw_wb, testfw_ws);
	ok = ok && (testfw_ws == ((1 << (XCHAL_NUM_AREGS / 4)) - 1));

	for (int i = 0; i < ARRAY_SIZE(highreg_tests); i++) {
	printk("\nHighreg test %d\n", i);

	fill_window(highreg_tests[i]);

	ok = ok && (*test_highreg_handle == (int)test_highreg_sp_top);

	int spilled_words = test_highreg_sp_top - test_highreg_handle;

	for (int quad = 0; ok && quad < (spilled_words - 1)/4; quad++) {
	int qbase = test_highreg_sp_top - (quad + 1) 4;

	for (int ri = 0; ri < 4; ri++) {
	int reg = 4 + quad * 4 + ri;

	ok = ok && (qbase[ri] == reg);
	printk(" q %d reg %d qb[%d] %d\n",
	quad, reg, ri, qbase[ri]);
	}
	}
	}

	return ok;
	}

	void switch_handle0, switch_handle1;

	void xtensa_switch(void handle, void *old_handle);

	void test_switch_bounce(void);
	__asm__("test_switch_bounce:" "\n\t"
	"call4 test_switch_top" "\n\t");

	volatile int switch_count;

	/* Sits in a loop switching back to handle0 (which is the main thread) */
	void test_switch_top(void)
	{
	int n = 1;

	while (1) {
	switch_count = n++;
	xtensa_switch(switch_handle0, &switch_handle1);
	}
	}

	int test_switch(void)
	{
	static int stack2[512];

	printk("%s\n", __func__);

	(void)memset(stack2, 0, sizeof(stack2));

	int *sp = xtensa_init_stack(k_current_get(),
	&stack2[ARRAY_SIZE(stack2)],
	(void *)test_switch_bounce,
	0, 0, 0);

	#if 0
	/* DEBUG: dump the stack contents for manual inspection */
	for (int i = 0; i < 64; i++) {
	int idx = ARRAY_SIZE(stack2) - (i+1);
	int off = (i+1) * -4;
	int *addr = &stack2[idx];

	if (addr < sp) {
	break;
	}
	printk("%p (%d): 0x%x\n", addr, off, stack2[idx]);
	}
	printk("sp: %p\n", sp);
	#endif

	switch_handle1 = sp;

	const int n_switch = 10;

	for (int i = 0; i < n_switch; i++) {
	xtensa_switch(switch_handle1, &switch_handle0);
	/* printk("switch %d count %d\n", i, switch_count); */
	}

	return switch_count == n_switch;
	}

	void rfi_jump(void);
	void rfi_jump_c(void)
	{
	int ps;

	__asm__ volatile ("rsr.PS %0" : "=r"(ps));
	printk("%s, PS = %xh\n", __func__, ps);
	}

	int xstack_ok;

	#define XSTACK_SIZE 1024
	#define XSTACK_CANARY 0x5a5aa5a5
	static int xstack_stack2[XSTACK_SIZE + 1];

	void do_xstack_call(void new_stack); / in asmhelp.S */

	void xstack_bottom(void)
	{
	xstack_ok = 1;
	}

	void xstack_top(void)
	{
	int on_my_stack;

	printk("%s oms %p\n", __func__, &on_my_stack);

	/* Do this via fill_window() to be absolutely sure the whole
	* call stack across both physical stacks got spilled and
	* filled properly.
	*/
	fill_window(xstack_bottom);
	}

	int test_xstack(void)
	{
	/* Make the stack one element big and put a canary above it to
	* check nothing underflows
	*/

	int *new_stack = &xstack_stack2[XSTACK_SIZE];
	*new_stack = XSTACK_CANARY;

	printk("%s new_stack = %p\n", __func__, new_stack);

	do_xstack_call(new_stack);

	printk("xstack_ok %d stack2[%d] 0x%x\n",
	xstack_ok, XSTACK_SIZE, xstack_stack2[XSTACK_SIZE]);

	return xstack_ok && xstack_stack2[XSTACK_SIZE] == XSTACK_CANARY;
	}

	#ifdef CONFIG_SOC_ESP32
	#define TIMER_INT 16
	#else
	#define TIMER_INT 13
	#endif

	volatile int timer2_fired;

	int excint_stack[8192];
	void *excint_stack_top = &excint_stack[ARRAY_SIZE(excint_stack)];

	static struct { int nest; void *stack_top; } excint_cpu;

	volatile int int5_result;

	void disable_timer(void)
	{
	int ie;

	__asm__ volatile("rsr.intenable %0" : "=r"(ie));
	ie &= ~(1<<TIMER_INT);
	__asm__ volatile("wsr.intenable %0; rsync" : : "r"(ie));
	}

	void enable_timer(void)
	{
	int ie;

	__asm__ volatile("rsr.intenable %0" : "=r"(ie));
	ie \|= (1<<TIMER_INT);
	__asm__ volatile("wsr.intenable %0; rsync" : : "r"(ie));
	}

	void handle_int5_c(void handle)
	{
	int5_result = spill_fnp(0, 3, 2, 1);

	int ccompare2_val = ccount() - 1;

	__asm__ volatile("wsr.ccompare2 %0; rsync" : : "r"(ccompare2_val));

	disable_timer();

	timer2_fired = 1;

	return handle;
	}

	int interrupt_test(void)
	{
	int ok = 1;

	excint_cpu.nest = 0;
	excint_cpu.stack_top = &excint_stack[ARRAY_SIZE(excint_stack)];

	void *cpuptr = &excint_cpu;

	__asm__ volatile("wsr.MISC0 %0" : : "r"(cpuptr));

	/* We reuse the "spill_fn" logic from above to get a
	* stack-sensitive, deeply-recursive computation going that
	* will be sensitive to interrupt bugs
	*/
	spill_mode = NO_SPILL;

	unsigned int start = ccount();
	int expect = spill_fnp(0, 3, 2, 1);
	unsigned int spill_time = ccount() - start;

	/* Ten thousand iterations is still pretty quick */
	for (int i = 0; i < 10000; i++) {
	int nest = i & 1;

	excint_cpu.nest = nest;
	timer2_fired = 0;

	/* Vaguely random delay between 2-8 iterations of
	* spill_fn(). Maybe improve with a real PRNG.
	*/
	const int max_reps = 8;
	int wh = white[i % ARRAY_SIZE(white)];
	int delay = spill_time * 2U
	+ ((wh * (i+1)) % (spill_time * (max_reps - 2)));

	int alarm = ccount() + delay;

	__asm__ volatile("wsr.ccompare2 %0; rsync" : : "r"(alarm));

	enable_timer();

	#if 0
	/* This is what I want to test: run the spill_fn test
	* repeatedly in the main thread so that it can be
	* interrupted and restored, and validate that it
	* returns the same result every time. But this can't
	* work, even in principle: the timer interrupt we are
	* using is "high priority", which means that it can
	* interrupt the window exceptions being thrown in the
	* main thread. And by design, Xtensa window
	* exceptions CANNOT be made reentrant (they don't
	* save the interrupted state, so can be interrupted
	* again before they can mask off exceptions, which
	* will then lose/clobber the OWB field in PS when the
	* interrupt handler throws another window exception).
	* So this doesn't work, in fact it fails every 2-10
	* iterations as spill_fn spends a lot of its time
	* spill/filling stack frames (by design, of course).
	*
	* This could be made to work if we could repurpose
	* the existing medium priority timer interrupt (which
	* is hard in a unit test: that's an important
	* interrupt!) or use the low priority timer which
	* delivers to the global exception handler (basically
	* impossible in a unit test). Frustrating.
	*/
	int reps = 0;

	while (!timer2_fired && reps < (max_reps+2)) {
	int result = spill_fnp(0, 3, 2, 1);

	reps++;
	if (result != expect) {
	ok = 0;
	}
	}
	if (reps >= max_reps+2) {
	printk("Interrupt didn't arrive\n");
	ok = 0;
	}
	if (int5_result != expect) {
	printk("Unexpected int spill_fn() result\n");
	ok = 0;
	}
	printk("INT test delay %d nest %d reps %d\n",
	delay, nest, reps);
	#else
	/* So this is what we do instead: just spin in the
	* main thread calling functions that don't involve
	* exceptions. By experiment, calling spill_fn with a
	* first (depth) argument of 6 or 7 results in a
	* shallow call tree that won't throw exepctions. At
	* least we're executing real code which depends on
	* its register state and validating that interrupts
	* don't hurt.
	*/
	volatile int dummy = 1;

	while (!timer2_fired) {
	dummy = spill_fnp(6, dummy, 2, 3);
	}
	if (int5_result != expect) {
	printk("Unexpected int spill_fn() result\n");
	ok = 0;
	}
	#endif
	}
	return ok;
	}

	void main(void)
	{
	/* Turn off interrupts and leave disabled, otherwise the
	* "userspace" context switching tests might not be reliable.
	* Stack pointers can exist in indeterminate states here.
	* (Note: the interrupt test below is using a high priority
	* interrupt which is not masked by irq_lock(), so it doesn't
	* care).
	*/
	int key = irq_lock();

	/* Strictly not a "test", we just want to know that the jump
	* worked. If the rest of the code runs, this must have
	* "passed".
	*/
	rfi_jump();

	int ok = 1;

	ok = ok && test_reg_spill();

	ok = ok && test_highreg_save();

	ok = ok && test_switch();

	ok = ok && test_xstack();

	ok = ok && interrupt_test();

	irq_unlock(key);

	printk("%s\n", ok ? "OK" : "Failed");
	}