kernel/arch: consolidate tTCS and TNANO definitions

There was a lot of duplication between architectures for the definition
of threads and the "nanokernel" guts. These have been consolidated.

Now, a common file kernel/unified/include/kernel_structs.h holds the
common definitions. Architectures provide two files to complement it:
kernel_arch_data.h and kernel_arch_func.h. The first one contains at
least the struct _thread_arch and struct _kernel_arch data structures,
as well as the struct _callee_saved and struct _caller_saved register
layouts. The second file contains anything that needs what is provided
by the common stuff in kernel_structs.h. Those two files are only meant
to be included in kernel_structs.h in very specific locations.

The thread data structure has been separated into three major parts:
common struct _thread_base and struct k_thread, and arch-specific struct
_thread_arch. The first and third ones are included in the second.

The struct s_NANO data structure has been split into two: common struct
_kernel and arch-specific struct _kernel_arch. The latter is included in
the former.

Offsets files have also changed: nano_offsets.h has been renamed
kernel_offsets.h and is still included by the arch-specific offsets.c.
Also, since the thread and kernel data structures are now made of
sub-structures, offsets have to be added to make up the full offset.
Some of these additions have been consolidated in shorter symbols,
available from kernel/unified/include/offsets_short.h, which includes an
arch-specific offsets_arch_short.h. Most of the code include
offsets_short.h now instead of offsets.h.

Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a
Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
diff --git a/kernel/unified/alert.c b/kernel/unified/alert.c
index 61b818b..03aaea6 100644
--- a/kernel/unified/alert.c
+++ b/kernel/unified/alert.c
@@ -20,7 +20,7 @@
 */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <atomic.h>
 #include <init.h>
diff --git a/kernel/unified/compiler_stack_protect.c b/kernel/unified/compiler_stack_protect.c
index 506d505..d47b7af 100644
--- a/kernel/unified/compiler_stack_protect.c
+++ b/kernel/unified/compiler_stack_protect.c
@@ -28,7 +28,7 @@
 
 #include <toolchain.h> /* compiler specific configurations */
 
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <toolchain.h>
 #include <sections.h>
 
diff --git a/kernel/unified/errno.c b/kernel/unified/errno.c
index 08f27a5..9305fab 100644
--- a/kernel/unified/errno.c
+++ b/kernel/unified/errno.c
@@ -22,7 +22,7 @@
  * context switching.
  */
 
-#include <nano_private.h>
+#include <kernel_structs.h>
 
 /*
  * Define _k_neg_eagain for use in assembly files as errno.h is
@@ -34,6 +34,6 @@
 #ifdef CONFIG_ERRNO
 int *_get_errno(void)
 {
-	return &_nanokernel.current->errno_var;
+	return &_current->errno_var;
 }
 #endif
diff --git a/kernel/unified/fifo.c b/kernel/unified/fifo.c
index 8afa595..a3e8b95 100644
--- a/kernel/unified/fifo.c
+++ b/kernel/unified/fifo.c
@@ -22,7 +22,7 @@
 
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <toolchain.h>
 #include <sections.h>
@@ -156,5 +156,5 @@
 
 	_pend_current_thread(&fifo->wait_q, timeout);
 
-	return _Swap(key) ? NULL : _current->swap_data;
+	return _Swap(key) ? NULL : _current->base.swap_data;
 }
diff --git a/kernel/unified/idle.c b/kernel/unified/idle.c
index f388b66..b6e36a3 100644
--- a/kernel/unified/idle.c
+++ b/kernel/unified/idle.c
@@ -15,7 +15,7 @@
  */
 
 #include <nanokernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <toolchain.h>
 #include <sections.h>
 #include <drivers/system_timer.h>
@@ -58,7 +58,7 @@
  */
 static void set_kernel_idle_time_in_ticks(int32_t ticks)
 {
-	_nanokernel.idle = ticks;
+	_kernel.idle = ticks;
 }
 #else
 #define set_kernel_idle_time_in_ticks(x) do { } while (0)
diff --git a/kernel/unified/include/gen_offset.h b/kernel/unified/include/gen_offset.h
index c44d554..e195999 100644
--- a/kernel/unified/include/gen_offset.h
+++ b/kernel/unified/include/gen_offset.h
@@ -55,7 +55,7 @@
  * /@ include struct definitions for which offsets symbols are to be
  * generated @/
  *
- * #include <nano_private.h>
+ * #include <kernel_structs.h>
  * GEN_ABS_SYM_BEGIN (_OffsetAbsSyms)	/@ the name parameter is arbitrary @/
  * /@ tNANO structure member offsets @/
  *
@@ -63,7 +63,7 @@
  * GEN_OFFSET_SYM (tNANO, task);
  * GEN_OFFSET_SYM (tNANO, current);
  * GEN_OFFSET_SYM (tNANO, nested);
- * GEN_OFFSET_SYM (tNANO, common_isp);
+ * GEN_OFFSET_SYM (tNANO, irq_stack);
  *
  * GEN_ABSOLUTE_SYM (__tNANO_SIZEOF, sizeof(tNANO));
  *
@@ -73,7 +73,7 @@
  * Compiling the sample offsets.c results in the following symbols in offsets.o:
  *
  * $ nm offsets.o
- * 00000010 A __tNANO_common_isp_OFFSET
+ * 00000010 A __tNANO_irq_stack_OFFSET
  * 00000008 A __tNANO_current_OFFSET
  * 0000000c A __tNANO_nested_OFFSET
  * 00000000 A __tNANO_fiber_OFFSET
diff --git a/kernel/unified/include/kernel_offsets.h b/kernel/unified/include/kernel_offsets.h
new file mode 100644
index 0000000..ad61f8d
--- /dev/null
+++ b/kernel/unified/include/kernel_offsets.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2013-2014 Wind River Systems, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <device.h>
+
+#ifndef _kernel_offsets__h_
+#define _kernel_offsets__h_
+
+/*
+ * The final link step uses the symbol _OffsetAbsSyms to force the linkage of
+ * offsets.o into the ELF image.
+ */
+
+GEN_ABS_SYM_BEGIN(_OffsetAbsSyms)
+
+/* arch-agnostic tNANO structure member offsets */
+
+GEN_OFFSET_SYM(_kernel_t, current);
+
+#if defined(CONFIG_THREAD_MONITOR)
+GEN_OFFSET_SYM(_kernel_t, threads);
+#endif
+
+GEN_OFFSET_SYM(_kernel_t, nested);
+GEN_OFFSET_SYM(_kernel_t, irq_stack);
+#ifdef CONFIG_SYS_POWER_MANAGEMENT
+GEN_OFFSET_SYM(_kernel_t, idle);
+#endif
+
+#ifdef CONFIG_FP_SHARING
+GEN_OFFSET_SYM(_kernel_t, current_fp);
+#endif
+
+GEN_ABSOLUTE_SYM(_STRUCT_KERNEL_SIZE, sizeof(struct _kernel));
+
+GEN_OFFSET_SYM(_thread_base_t, flags);
+GEN_OFFSET_SYM(_thread_base_t, prio);
+GEN_OFFSET_SYM(_thread_base_t, sched_locked);
+GEN_OFFSET_SYM(_thread_base_t, swap_data);
+
+GEN_OFFSET_SYM(_thread_t, base);
+GEN_OFFSET_SYM(_thread_t, caller_saved);
+GEN_OFFSET_SYM(_thread_t, callee_saved);
+GEN_OFFSET_SYM(_thread_t, arch);
+
+#if defined(CONFIG_THREAD_MONITOR)
+GEN_OFFSET_SYM(_thread_t, next_thread);
+#endif
+
+#ifdef CONFIG_THREAD_CUSTOM_DATA
+GEN_OFFSET_SYM(_thread_t, custom_data);
+#endif
+
+GEN_ABSOLUTE_SYM(K_THREAD_SIZEOF, sizeof(struct k_thread));
+
+/* size of the device structure. Used by linker scripts */
+GEN_ABSOLUTE_SYM(_DEVICE_STRUCT_SIZE, sizeof(struct device));
+
+#endif /* _kernel_offsets__h_ */
diff --git a/kernel/unified/include/kernel_structs.h b/kernel/unified/include/kernel_structs.h
new file mode 100644
index 0000000..786f0dd
--- /dev/null
+++ b/kernel/unified/include/kernel_structs.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2016 Wind River Systems, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _kernel_structs__h_
+#define _kernel_structs__h_
+
+#if !defined(_ASMLANGUAGE)
+#include <kernel.h>
+#include <atomic.h>
+#include <misc/dlist.h>
+#endif
+
+#include <kernel_arch_data.h>
+
+#if !defined(_ASMLANGUAGE)
+
+#ifdef CONFIG_THREAD_MONITOR
+struct __thread_entry {
+	_thread_entry_t pEntry;
+	void *parameter1;
+	void *parameter2;
+	void *parameter3;
+};
+#endif
+
+/* can be used for creating 'dummy' threads, e.g. for pending on objects */
+struct _thread_base {
+
+	/* this thread's entry in a ready/wait queue */
+	sys_dnode_t k_q_node;
+
+	/* execution flags */
+	int flags;
+
+	/* thread priority used to sort linked list */
+	int prio;
+
+	/* scheduler lock count */
+	atomic_t sched_locked;
+
+	/* data returned by APIs */
+	void *swap_data;
+
+#ifdef CONFIG_NANO_TIMEOUTS
+	/* this thread's entry in a timeout queue */
+	struct _timeout timeout;
+#endif
+
+};
+
+typedef struct _thread_base _thread_base_t;
+
+struct k_thread {
+
+	struct _thread_base base;
+
+	/* defined by the architecture, but all archs need these */
+	struct _caller_saved caller_saved;
+	struct _callee_saved callee_saved;
+
+	/* static thread init data */
+	void *init_data;
+
+	/* abort function */
+	void (*fn_abort)(void);
+
+#if defined(CONFIG_THREAD_MONITOR)
+	/* thread entry and parameters description */
+	struct __thread_entry *entry;
+
+	/* next item in list of all threads */
+	struct k_thread *next_thread;
+#endif
+
+#ifdef CONFIG_THREAD_CUSTOM_DATA
+	/* crude thread-local storage */
+	void *custom_data;
+#endif
+
+#ifdef CONFIG_ERRNO
+	/* per-thread errno variable */
+	int errno_var;
+#endif
+
+	/* arch-specifics: must always be at the end */
+	struct _thread_arch arch;
+};
+
+typedef struct k_thread _thread_t;
+
+struct _ready_q {
+
+	/* next thread to run if known, NULL otherwise */
+	struct k_thread *cache;
+
+	/* bitmap of priorities that contain at least one ready thread */
+	uint32_t prio_bmap[1];
+
+	/* ready queues, one per priority */
+	sys_dlist_t q[K_NUM_PRIORITIES];
+};
+
+struct _kernel {
+
+	/* nested interrupt count */
+	uint32_t nested;
+
+	/* interrupt stack pointer base */
+	char *irq_stack;
+
+	/* currently scheduled thread */
+	struct k_thread *current;
+
+#ifdef CONFIG_SYS_CLOCK_EXISTS
+	/* queue of timeouts */
+	sys_dlist_t timeout_q;
+#endif
+
+#ifdef CONFIG_SYS_POWER_MANAGEMENT
+	int32_t idle; /* Number of ticks for kernel idling */
+#endif
+
+	/*
+	 * ready queue: can be big, keep after small fields, since some
+	 * assembly (e.g. ARC are limited in the encoding of the offset)
+	 */
+	struct _ready_q ready_q;
+
+#ifdef CONFIG_FP_SHARING
+	/*
+	 * A 'current_sse' field does not exist in addition to the 'current_fp'
+	 * field since it's not possible to divide the IA-32 non-integer
+	 * registers into 2 distinct blocks owned by differing threads.  In
+	 * other words, given that the 'fxnsave/fxrstor' instructions
+	 * save/restore both the X87 FPU and XMM registers, it's not possible
+	 * for a thread to only "own" the XMM registers.
+	 */
+
+	/* thread (fiber or task) that owns the FP regs */
+	struct k_thread *current_fp;
+#endif
+
+#if defined(CONFIG_THREAD_MONITOR)
+	struct k_thread *threads; /* singly linked list of ALL fiber+tasks */
+#endif
+
+	/* arch-specific part of _kernel */
+	struct _kernel_arch arch;
+};
+
+typedef struct _kernel _kernel_t;
+
+extern struct _kernel _kernel;
+
+#define _current _kernel.current
+#define _ready_q _kernel.ready_q
+#define _timeout_q _kernel.timeout_q
+#define _threads _kernel.threads
+
+#include <kernel_arch_func.h>
+
+static ALWAYS_INLINE void
+_set_thread_return_value_with_data(struct k_thread *thread,
+				   unsigned int value,
+				   void *data)
+{
+	_set_thread_return_value(thread, value);
+	thread->base.swap_data = data;
+}
+
+#endif /* _ASMLANGUAGE */
+
+#endif /* _kernel_structs__h_ */
diff --git a/kernel/unified/include/ksched.h b/kernel/unified/include/ksched.h
index 7162bcb..e016b02 100644
--- a/kernel/unified/include/ksched.h
+++ b/kernel/unified/include/ksched.h
@@ -17,10 +17,7 @@
 #ifndef _ksched__h_
 #define _ksched__h_
 
-#include <kernel.h>
-#include <nano_private.h>
-#include <atomic.h>
-#include <misc/dlist.h>
+#include <kernel_structs.h>
 
 extern k_tid_t const _main_thread;
 extern k_tid_t const _idle_thread;
@@ -111,30 +108,30 @@
 static inline int _is_t1_higher_prio_than_t2(struct k_thread *t1,
 					     struct k_thread *t2)
 {
-	return _is_prio1_higher_than_prio2(t1->prio, t2->prio);
+	return _is_prio1_higher_than_prio2(t1->base.prio, t2->base.prio);
 }
 
 static inline int _is_higher_prio_than_current(struct k_thread *thread)
 {
-	return _is_t1_higher_prio_than_t2(thread, _nanokernel.current);
+	return _is_t1_higher_prio_than_t2(thread, _current);
 }
 
 /* is thread currenlty cooperative ? */
 static inline int _is_coop(struct k_thread *thread)
 {
-	return thread->prio < 0;
+	return thread->base.prio < 0;
 }
 
 /* is thread currently preemptible ? */
 static inline int _is_preempt(struct k_thread *thread)
 {
-	return !_is_coop(thread) && !atomic_get(&thread->sched_locked);
+	return !_is_coop(thread) && !atomic_get(&thread->base.sched_locked);
 }
 
 /* is current thread preemptible and we are not running in ISR context */
 static inline int _is_current_execution_context_preemptible(void)
 {
-	return !_is_in_isr() && _is_preempt(_nanokernel.current);
+	return !_is_in_isr() && _is_preempt(_current);
 }
 
 /* find out if priority is under priority inheritance ceiling */
@@ -178,7 +175,7 @@
 /* interrupts must be locked */
 static inline int _get_highest_ready_prio(void)
 {
-	uint32_t ready = _nanokernel.ready_q.prio_bmap[0];
+	uint32_t ready = _ready_q.prio_bmap[0];
 
 	return find_lsb_set(ready) - 1 - CONFIG_NUM_COOP_PRIORITIES;
 }
@@ -204,7 +201,7 @@
 {
 	__ASSERT(!_is_in_isr(), "");
 
-	atomic_inc(&_nanokernel.current->sched_locked);
+	atomic_inc(&_current->base.sched_locked);
 
 	K_DEBUG("scheduler locked (%p:%d)\n",
 		_current, _current->sched_locked);
@@ -220,77 +217,77 @@
 {
 	__ASSERT(!_is_in_isr(), "");
 
-	atomic_dec(&_nanokernel.current->sched_locked);
+	atomic_dec(&_current->base.sched_locked);
 }
 
 static inline void _set_thread_states(struct k_thread *thread, uint32_t states)
 {
-	thread->flags |= states;
+	thread->base.flags |= states;
 }
 
 static inline void _reset_thread_states(struct k_thread *thread,
 					uint32_t states)
 {
-	thread->flags &= ~states;
+	thread->base.flags &= ~states;
 }
 
 /* mark a thread as being suspended */
 static inline void _mark_thread_as_suspended(struct k_thread *thread)
 {
-	thread->flags |= K_SUSPENDED;
+	thread->base.flags |= K_SUSPENDED;
 }
 
 /* mark a thread as not being suspended */
 static inline void _mark_thread_as_not_suspended(struct k_thread *thread)
 {
-	thread->flags &= ~K_SUSPENDED;
+	thread->base.flags &= ~K_SUSPENDED;
 }
 
 /* mark a thread as being in the timer queue */
 static inline void _mark_thread_as_timing(struct k_thread *thread)
 {
-	thread->flags |= K_TIMING;
+	thread->base.flags |= K_TIMING;
 }
 
 /* mark a thread as not being in the timer queue */
 static inline void _mark_thread_as_not_timing(struct k_thread *thread)
 {
-	thread->flags &= ~K_TIMING;
+	thread->base.flags &= ~K_TIMING;
 }
 
 /* check if a thread is on the timer queue */
 static inline int _is_thread_timing(struct k_thread *thread)
 {
-	return !!(thread->flags & K_TIMING);
+	return !!(thread->base.flags & K_TIMING);
 }
 
 static inline int _has_thread_started(struct k_thread *thread)
 {
-	return !(thread->flags & K_PRESTART);
+	return !(thread->base.flags & K_PRESTART);
 }
 
 /* check if a thread is ready */
 static inline int _is_thread_ready(struct k_thread *thread)
 {
-	return (thread->flags & K_EXECUTION_MASK) == K_READY;
+	return (thread->base.flags & K_EXECUTION_MASK) == K_READY;
 }
 
 /* mark a thread as pending in its TCS */
 static inline void _mark_thread_as_pending(struct k_thread *thread)
 {
-	thread->flags |= K_PENDING;
+	thread->base.flags |= K_PENDING;
 }
 
 /* mark a thread as not pending in its TCS */
 static inline void _mark_thread_as_not_pending(struct k_thread *thread)
 {
-	thread->flags &= ~K_PENDING;
+	thread->base.flags &= ~K_PENDING;
 }
 
 /* check if a thread is pending */
 static inline int _is_thread_pending(struct k_thread *thread)
 {
-	return !!(thread->flags & K_PENDING);
+	return !!(thread->base.flags & K_PENDING);
 }
 
 /*
@@ -300,17 +297,17 @@
 /* must be called with interrupts locked */
 static inline void _ready_thread(struct k_thread *thread)
 {
-	__ASSERT(_is_prio_higher(thread->prio, K_LOWEST_THREAD_PRIO) ||
-		 ((thread->prio == K_LOWEST_THREAD_PRIO) &&
+	__ASSERT(_is_prio_higher(thread->base.prio, K_LOWEST_THREAD_PRIO) ||
+		 ((thread->base.prio == K_LOWEST_THREAD_PRIO) &&
 		  (thread == _idle_thread)),
 		 "thread %p prio too low (is %d, cannot be lower than %d)",
-		 thread, thread->prio,
+		 thread, thread->base.prio,
 		 thread == _idle_thread ? K_LOWEST_THREAD_PRIO :
 					  K_LOWEST_APPLICATION_THREAD_PRIO);
 
-	__ASSERT(!_is_prio_higher(thread->prio, K_HIGHEST_THREAD_PRIO),
+	__ASSERT(!_is_prio_higher(thread->base.prio, K_HIGHEST_THREAD_PRIO),
 		 "thread %p prio too high (id %d, cannot be higher than %d)",
-		 thread, thread->prio, K_HIGHEST_THREAD_PRIO);
+		 thread, thread->base.prio, K_HIGHEST_THREAD_PRIO);
 
 	/* K_PRESTART is needed to handle the start-with-delay case */
 	_reset_thread_states(thread, K_TIMING|K_PRESTART);
@@ -327,7 +324,7 @@
  */
 static inline void _mark_thread_as_started(struct k_thread *thread)
 {
-	thread->flags &= ~K_PRESTART;
+	thread->base.flags &= ~K_PRESTART;
 }
 
 /**
@@ -337,7 +334,7 @@
  */
 static inline void _mark_thread_as_dead(struct k_thread *thread)
 {
-	thread->flags |= K_DEAD;
+	thread->base.flags |= K_DEAD;
 }
 
 /*
@@ -349,10 +346,10 @@
 {
 	if (_is_thread_ready(thread)) {
 		_remove_thread_from_ready_q(thread);
-		thread->prio = prio;
+		thread->base.prio = prio;
 		_add_thread_to_ready_q(thread);
 	} else {
-		thread->prio = prio;
+		thread->base.prio = prio;
 	}
 }
 
@@ -378,9 +375,9 @@
 /* must be called with interrupts locked */
 static inline void _unpend_thread(struct k_thread *thread)
 {
-	__ASSERT(thread->flags & K_PENDING, "");
+	__ASSERT(thread->base.flags & K_PENDING, "");
 
-	sys_dlist_remove(&thread->k_q_node);
+	sys_dlist_remove(&thread->base.k_q_node);
 	_mark_thread_as_not_pending(thread);
 }
 
diff --git a/kernel/unified/include/nano_internal.h b/kernel/unified/include/nano_internal.h
index 2161216..344f085 100644
--- a/kernel/unified/include/nano_internal.h
+++ b/kernel/unified/include/nano_internal.h
@@ -49,7 +49,6 @@
 
 /* helper type alias for thread control structure */
 
-typedef struct tcs tTCS;
 typedef void (*_thread_entry_t)(void *, void *, void *);
 
 extern void _thread_entry(void (*)(void *, void *, void *),
@@ -73,9 +72,9 @@
 /* clean up when a thread is aborted */
 
 #if defined(CONFIG_THREAD_MONITOR)
-extern void _thread_monitor_exit(struct tcs *tcs);
+extern void _thread_monitor_exit(struct k_thread *thread);
 #else
-#define _thread_monitor_exit(tcs) \
+#define _thread_monitor_exit(thread) \
 	do {/* nothing */    \
 	} while (0)
 #endif /* CONFIG_THREAD_MONITOR */
diff --git a/kernel/unified/include/nano_offsets.h b/kernel/unified/include/nano_offsets.h
deleted file mode 100644
index ed4e22c..0000000
--- a/kernel/unified/include/nano_offsets.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* nano_offsets.h - nanokernel structure member offset definitions */
-
-/*
- * Copyright (c) 2013-2014 Wind River Systems, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <device.h>
-
-#ifndef _NANO_OFFSETS__H_
-#define _NANO_OFFSETS__H_
-
-/*
- * The final link step uses the symbol _OffsetAbsSyms to force the linkage of
- * offsets.o into the ELF image.
- */
-
-GEN_ABS_SYM_BEGIN(_OffsetAbsSyms)
-
-/* arch-agnostic tNANO structure member offsets */
-
-GEN_OFFSET_SYM(tNANO, current);
-
-#if defined(CONFIG_THREAD_MONITOR)
-GEN_OFFSET_SYM(tNANO, threads);
-#endif
-
-#ifdef CONFIG_FP_SHARING
-GEN_OFFSET_SYM(tNANO, current_fp);
-#endif
-
-/* size of the entire tNANO structure */
-
-GEN_ABSOLUTE_SYM(__tNANO_SIZEOF, sizeof(tNANO));
-
-/* arch-agnostic struct tcs structure member offsets */
-
-GEN_OFFSET_SYM(tTCS, prio);
-GEN_OFFSET_SYM(tTCS, flags);
-GEN_OFFSET_SYM(tTCS, coopReg);   /* start of coop register set */
-GEN_OFFSET_SYM(tTCS, preempReg); /* start of prempt register set */
-
-#if defined(CONFIG_THREAD_MONITOR)
-GEN_OFFSET_SYM(tTCS, next_thread);
-#endif
-
-GEN_OFFSET_SYM(tTCS, sched_locked);
-
-/* size of the entire struct tcs structure */
-
-GEN_ABSOLUTE_SYM(__tTCS_SIZEOF, sizeof(tTCS));
-
-/* size of the device structure. Used by linker scripts */
-GEN_ABSOLUTE_SYM(__DEVICE_STR_SIZEOF, sizeof(struct device));
-
-#endif /* _NANO_OFFSETS__H_ */
diff --git a/kernel/unified/include/offsets_short.h b/kernel/unified/include/offsets_short.h
new file mode 100644
index 0000000..dbfb1f5a
--- /dev/null
+++ b/kernel/unified/include/offsets_short.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016 Wind River Systems, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _offsets_short__h_
+#define _offsets_short__h_
+
+#include <offsets.h>
+#include <offsets_short_arch.h>
+
+/* kernel */
+
+/* main */
+
+#define _kernel_offset_to_nested \
+	(___kernel_t_nested_OFFSET)
+
+#define _kernel_offset_to_irq_stack \
+	(___kernel_t_irq_stack_OFFSET)
+
+#define _kernel_offset_to_current \
+	(___kernel_t_current_OFFSET)
+
+#define _kernel_offset_to_idle \
+	(___kernel_t_idle_OFFSET)
+
+#define _kernel_offset_to_current_fp \
+	(___kernel_t_current_fp_OFFSET)
+
+/* end - kernel */
+
+/* threads */
+
+/* main */
+
+#define _thread_offset_to_callee_saved \
+	(___thread_t_callee_saved_OFFSET)
+
+/* base */
+
+#define _thread_offset_to_flags \
+	(___thread_t_base_OFFSET + ___thread_base_t_flags_OFFSET)
+
+#define _thread_offset_to_prio \
+	(___thread_t_base_OFFSET + ___thread_base_t_prio_OFFSET)
+
+#define _thread_offset_to_sched_locked \
+	(___thread_t_base_OFFSET + ___thread_base_t_sched_locked_OFFSET)
+
+#define _thread_offset_to_esf \
+	(___thread_t_arch_OFFSET + ___thread_arch_t_esf_OFFSET)
+
+
+/* end - threads */
+
+#endif /* _offsets_short__h_ */
diff --git a/kernel/unified/include/timeout_q.h b/kernel/unified/include/timeout_q.h
index 31642f0..66f7a49 100644
--- a/kernel/unified/include/timeout_q.h
+++ b/kernel/unified/include/timeout_q.h
@@ -67,16 +67,16 @@
 
 static inline void _init_thread_timeout(struct k_thread *thread)
 {
-	_init_timeout(&thread->timeout, NULL);
+	_init_timeout(&thread->base.timeout, NULL);
 }
 
 /*
  * XXX - backwards compatibility until the arch part is updated to call
  * _init_thread_timeout()
  */
-static inline void _nano_timeout_tcs_init(struct tcs *tcs)
+static inline void _nano_timeout_thread_init(struct k_thread *thread)
 {
-	_init_thread_timeout(tcs);
+	_init_thread_timeout(thread);
 }
 
 /* remove a thread timing out from kernel object's wait queue */
@@ -86,7 +86,7 @@
 {
 	if (timeout_obj->wait_q) {
 		_unpend_thread(thread);
-		thread->timeout.wait_q = NULL;
+		thread->base.timeout.wait_q = NULL;
 	}
 }
 
@@ -132,7 +132,7 @@
 
 static inline void _handle_timeouts(void)
 {
-	sys_dlist_t *timeout_q = &_nanokernel.timeout_q;
+	sys_dlist_t *timeout_q = &_timeout_q;
 	struct _timeout *next;
 
 	next = (struct _timeout *)sys_dlist_peek_head(timeout_q);
@@ -145,7 +145,7 @@
 
 static inline int _abort_timeout(struct _timeout *t)
 {
-	sys_dlist_t *timeout_q = &_nanokernel.timeout_q;
+	sys_dlist_t *timeout_q = &_timeout_q;
 
 	if (-1 == t->delta_ticks_from_prev) {
 		return -1;
@@ -165,7 +165,7 @@
 
 static inline int _abort_thread_timeout(struct k_thread *thread)
 {
-	return _abort_timeout(&thread->timeout);
+	return _abort_timeout(&thread->base.timeout);
 }
 
 /*
@@ -210,12 +210,12 @@
 	K_DEBUG("thread %p on wait_q %p, for timeout: %d\n",
 		thread, wait_q, timeout);
 
-	sys_dlist_t *timeout_q = &_nanokernel.timeout_q;
+	sys_dlist_t *timeout_q = &_timeout_q;
 
 	K_DEBUG("timeout_q %p before: head: %p, tail: %p\n",
-		&_nanokernel.timeout_q,
-		sys_dlist_peek_head(&_nanokernel.timeout_q),
-		_nanokernel.timeout_q.tail);
+		&_timeout_q,
+		sys_dlist_peek_head(&_timeout_q),
+		_timeout_q.tail);
 
 	K_DEBUG("timeout   %p before: next: %p, prev: %p\n",
 		timeout_obj, timeout_obj->node.next, timeout_obj->node.prev);
@@ -228,9 +228,9 @@
 			    &timeout_obj->delta_ticks_from_prev);
 
 	K_DEBUG("timeout_q %p after:  head: %p, tail: %p\n",
-		&_nanokernel.timeout_q,
-		sys_dlist_peek_head(&_nanokernel.timeout_q),
-		_nanokernel.timeout_q.tail);
+		&_timeout_q,
+		sys_dlist_peek_head(&_timeout_q),
+		_timeout_q.tail);
 
 	K_DEBUG("timeout   %p after:  next: %p, prev: %p\n",
 		timeout_obj, timeout_obj->node.next, timeout_obj->node.prev);
@@ -245,7 +245,7 @@
 static inline void _add_thread_timeout(struct k_thread *thread,
 				       _wait_q_t *wait_q, int32_t timeout)
 {
-	_add_timeout(thread, &thread->timeout, wait_q, timeout);
+	_add_timeout(thread, &thread->base.timeout, wait_q, timeout);
 }
 
 /* find the closest deadline in the timeout queue */
diff --git a/kernel/unified/include/wait_q.h b/kernel/unified/include/wait_q.h
index bfa1108..d499871 100644
--- a/kernel/unified/include/wait_q.h
+++ b/kernel/unified/include/wait_q.h
@@ -19,7 +19,7 @@
 #ifndef _kernel_nanokernel_include_wait_q__h_
 #define _kernel_nanokernel_include_wait_q__h_
 
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/dlist.h>
 #include <ksched.h>
 
@@ -31,7 +31,7 @@
 #include <timeout_q.h>
 #else
 #define _init_thread_timeout(thread) do { } while ((0))
-#define _nano_timeout_tcs_init(thread) _init_thread_timeout(thread)
+#define _nano_timeout_thread_init(thread) _init_thread_timeout(thread)
 #define _add_thread_timeout(thread, wait_q, timeout) do { } while (0)
 static inline int _abort_thread_timeout(struct k_thread *thread) { return 0; }
 #define _get_next_timeout_expiry() (K_FOREVER)
diff --git a/kernel/unified/init.c b/kernel/unified/init.c
index 29591da..781d625 100644
--- a/kernel/unified/init.c
+++ b/kernel/unified/init.c
@@ -22,13 +22,13 @@
  */
 
 #include <zephyr.h>
-#include <offsets.h>
+#include <offsets_short.h>
 #include <kernel.h>
 #include <misc/printk.h>
 #include <drivers/rand32.h>
 #include <sections.h>
 #include <toolchain.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <device.h>
 #include <init.h>
 #include <linker-defs.h>
@@ -116,7 +116,7 @@
 #ifdef CONFIG_SYS_CLOCK_EXISTS
 	#include <misc/dlist.h>
 	#define initialize_timeouts() do { \
-		sys_dlist_init(&_nanokernel.timeout_q); \
+		sys_dlist_init(&_timeout_q); \
 	} while ((0))
 #else
 	#define initialize_timeouts() do { } while ((0))
@@ -219,7 +219,7 @@
 	main();
 
 	/* Terminate thread normally since it has no more work to do */
-	_main_thread->flags &= ~K_ESSENTIAL;
+	_main_thread->base.flags &= ~K_ESSENTIAL;
 }
 
 void __weak main(void)
@@ -234,7 +234,7 @@
  * This routine initializes various nanokernel data structures, including
  * the background (or idle) task and any architecture-specific initialization.
  *
- * Note that all fields of "_nanokernel" are set to zero on entry, which may
+ * Note that all fields of "_kernel" are set to zero on entry, which may
  * be all the initialization many of them require.
  *
  * @return N/A
@@ -255,10 +255,10 @@
 	 * Do not insert dummy execution context in the list of fibers, so
 	 * that it does not get scheduled back in once context-switched out.
 	 */
-	dummy_thread->flags = K_ESSENTIAL;
-	dummy_thread->prio = K_PRIO_COOP(0);
+	dummy_thread->base.flags = K_ESSENTIAL;
+	dummy_thread->base.prio = K_PRIO_COOP(0);
 
-	/* _nanokernel.ready_q is all zeroes */
+	/* _kernel.ready_q is all zeroes */
 
 
 	/*
@@ -274,7 +274,7 @@
 	/* ready the init/main and idle threads */
 
 	for (int ii = 0; ii < K_NUM_PRIORITIES; ii++) {
-		sys_dlist_init(&_nanokernel.ready_q.q[ii]);
+		sys_dlist_init(&_ready_q.q[ii]);
 	}
 
 	_new_thread(main_stack, MAIN_STACK_SIZE, NULL,
@@ -359,7 +359,7 @@
 {
 	/* floating point operations are NOT performed during nanokernel init */
 
-	char __stack dummy_thread[__tTCS_NOFLOAT_SIZEOF];
+	char __stack dummy_thread[_K_THREAD_NO_FLOAT_SIZEOF];
 
 	/*
 	 * Initialize nanokernel data structures. This step includes
diff --git a/kernel/unified/kernel_event_logger.c b/kernel/unified/kernel_event_logger.c
index 981d2f9..18b3930 100644
--- a/kernel/unified/kernel_event_logger.c
+++ b/kernel/unified/kernel_event_logger.c
@@ -23,7 +23,7 @@
 #include <misc/kernel_event_logger.h>
 #include <misc/util.h>
 #include <init.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <kernel_event_logger_arch.h>
 #include <misc/__assert.h>
 
@@ -81,7 +81,7 @@
 #ifdef CONFIG_KERNEL_EVENT_LOGGER_CONTEXT_SWITCH
 void _sys_k_event_logger_context_switch(void)
 {
-	extern tNANO _nanokernel;
+	extern struct _kernel _kernel;
 	uint32_t data[2];
 
 	extern void _sys_event_logger_put_non_preemptible(
@@ -101,12 +101,12 @@
 		return;
 	}
 
-	if (_collector_coop_thread == _nanokernel.current) {
+	if (_collector_coop_thread == _kernel.current) {
 		return;
 	}
 
 	data[0] = _sys_k_get_time();
-	data[1] = (uint32_t)_nanokernel.current;
+	data[1] = (uint32_t)_kernel.current;
 
 	/*
 	 * The mechanism we use to log the kernel events uses a sync semaphore
@@ -137,7 +137,7 @@
 {
 	ASSERT_CURRENT_IS_COOP_THREAD();
 
-	_collector_coop_thread = _nanokernel.current;
+	_collector_coop_thread = _kernel.current;
 }
 #endif /* CONFIG_KERNEL_EVENT_LOGGER_CONTEXT_SWITCH */
 
diff --git a/kernel/unified/legacy_offload.c b/kernel/unified/legacy_offload.c
index f652994..007c1d4 100644
--- a/kernel/unified/legacy_offload.c
+++ b/kernel/unified/legacy_offload.c
@@ -22,7 +22,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <ksched.h>
 #include <init.h>
 
@@ -46,7 +46,7 @@
 	int result = (offload->offload_func)(offload->offload_args);
 	unsigned int key = irq_lock();
 
-	offload->thread->swap_data = (void *)result;
+	offload->thread->base.swap_data = (void *)result;
 	irq_unlock(key);
 }
 
@@ -68,7 +68,7 @@
 
 	offload.thread = _current;
 	k_work_submit_to_queue(&offload_work_q, &offload.work_item);
-	return (int)_current->swap_data;
+	return (int)_current->base.swap_data;
 }
 
 static char __stack offload_work_q_stack[CONFIG_OFFLOAD_WORKQUEUE_STACK_SIZE];
diff --git a/kernel/unified/lifo.c b/kernel/unified/lifo.c
index bd662dc..03d5897 100644
--- a/kernel/unified/lifo.c
+++ b/kernel/unified/lifo.c
@@ -20,7 +20,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <toolchain.h>
 #include <sections.h>
@@ -111,5 +111,5 @@
 
 	_pend_current_thread(&lifo->wait_q, timeout);
 
-	return _Swap(key) ? NULL : _current->swap_data;
+	return _Swap(key) ? NULL : _current->base.swap_data;
 }
diff --git a/kernel/unified/mailbox.c b/kernel/unified/mailbox.c
index 5fd7b14..7eb255f 100644
--- a/kernel/unified/mailbox.c
+++ b/kernel/unified/mailbox.c
@@ -19,7 +19,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <toolchain.h>
 #include <sections.h>
@@ -33,7 +33,7 @@
 
 /* asynchronous message descriptor type */
 struct k_mbox_async {
-	struct tcs_base thread;		/* dummy thread object */
+	struct _thread_base thread;		/* dummy thread object */
 	struct k_mbox_msg tx_msg;	/* transmit message descriptor */
 };
 
@@ -201,7 +201,7 @@
 	/* recover sender info */
 	sending_thread = rx_msg->_syncing_thread;
 	rx_msg->_syncing_thread = NULL;
-	tx_msg = (struct k_mbox_msg *)sending_thread->swap_data;
+	tx_msg = (struct k_mbox_msg *)sending_thread->base.swap_data;
 
 	/* update data size field for sender */
 	tx_msg->size = rx_msg->size;
@@ -211,7 +211,7 @@
 	 * asynchronous send: free asynchronous message descriptor +
 	 * dummy thread pair, then give semaphore (if needed)
 	 */
-	if (sending_thread->flags & K_DUMMY) {
+	if (sending_thread->base.flags & K_DUMMY) {
 		struct k_sem *async_sem = tx_msg->_async_sem;
 
 		_mbox_async_free((struct k_mbox_async *)sending_thread);
@@ -258,14 +258,14 @@
 
 	/* finish readying sending thread (actual or dummy) for send */
 	sending_thread = tx_msg->_syncing_thread;
-	sending_thread->swap_data = tx_msg;
+	sending_thread->base.swap_data = tx_msg;
 
 	/* search mailbox's rx queue for a compatible receiver */
 	key = irq_lock();
 
 	SYS_DLIST_FOR_EACH_NODE(&mbox->rx_msg_queue, wait_q_item) {
 		receiving_thread = (struct k_thread *)wait_q_item;
-		rx_msg = (struct k_mbox_msg *)receiving_thread->swap_data;
+		rx_msg = (struct k_mbox_msg *)receiving_thread->base.swap_data;
 
 		if (_mbox_message_match(tx_msg, rx_msg) == 0) {
 			/* take receiver out of rx queue */
@@ -284,7 +284,7 @@
 			 * note: dummy sending thread sits (unqueued)
 			 * until the receiver consumes the message
 			 */
-			if (sending_thread->flags & K_DUMMY) {
+			if (sending_thread->base.flags & K_DUMMY) {
 				_reschedule_threads(key);
 				return 0;
 			}
@@ -308,7 +308,7 @@
 
 #if (CONFIG_NUM_MBOX_ASYNC_MSGS > 0)
 	/* asynchronous send: dummy thread waits on tx queue for receiver */
-	if (sending_thread->flags & K_DUMMY) {
+	if (sending_thread->base.flags & K_DUMMY) {
 		_pend_thread(sending_thread, &mbox->tx_msg_queue, K_FOREVER);
 		irq_unlock(key);
 		return 0;
@@ -340,7 +340,7 @@
 	 */
 	_mbox_async_alloc(&async);
 
-	async->thread.prio = _current->prio;
+	async->thread.prio = _current->base.prio;
 
 	async->tx_msg = *tx_msg;
 	async->tx_msg._syncing_thread = (struct k_thread *)&async->thread;
@@ -448,7 +448,7 @@
 
 	SYS_DLIST_FOR_EACH_NODE(&mbox->tx_msg_queue, wait_q_item) {
 		sending_thread = (struct k_thread *)wait_q_item;
-		tx_msg = (struct k_mbox_msg *)sending_thread->swap_data;
+		tx_msg = (struct k_mbox_msg *)sending_thread->base.swap_data;
 
 		if (_mbox_message_match(tx_msg, rx_msg) == 0) {
 			/* take sender out of mailbox's tx queue */
@@ -472,7 +472,7 @@
 
 	/* wait until a matching sender appears or a timeout occurs */
 	_pend_current_thread(&mbox->rx_msg_queue, timeout);
-	_current->swap_data = rx_msg;
+	_current->base.swap_data = rx_msg;
 	result = _Swap(key);
 
 	/* consume message data immediately, if needed */
@@ -499,7 +499,7 @@
 	}
 
 	/* handle sending message of current thread priority */
-	curr_prio = _current->prio;
+	curr_prio = _current->base.prio;
 	if (prio == curr_prio) {
 		return _error_to_rc(k_mbox_put(mbox, tx_msg,
 					       _ticks_to_ms(timeout)));
@@ -527,7 +527,7 @@
 	unsigned int key;
 
 	/* handle sending message of current thread priority */
-	curr_prio = _current->prio;
+	curr_prio = _current->base.prio;
 	if (prio == curr_prio) {
 		k_mbox_async_put(mbox, tx_msg, sema);
 		return;
diff --git a/kernel/unified/mem_pool.c b/kernel/unified/mem_pool.c
index 930c67d..a509f35 100644
--- a/kernel/unified/mem_pool.c
+++ b/kernel/unified/mem_pool.c
@@ -19,7 +19,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <ksched.h>
 #include <wait_q.h>
@@ -434,7 +434,7 @@
 
 	/* loop all waiters */
 	while (waiter != NULL) {
-		uint32_t req_size = (uint32_t)(waiter->swap_data);
+		uint32_t req_size = (uint32_t)(waiter->base.swap_data);
 
 		/* locate block set to try allocating from */
 		offset = compute_block_set_index(pool, req_size);
@@ -443,7 +443,7 @@
 		found_block = get_block_recursive(pool, offset, offset);
 
 		next_waiter = (struct k_thread *)sys_dlist_peek_next(
-			&pool->wait_q, &waiter->k_q_node);
+			&pool->wait_q, &waiter->base.k_q_node);
 
 		/* if success : remove task from list and reschedule */
 		if (found_block != NULL) {
@@ -509,13 +509,13 @@
 		unsigned int key = irq_lock();
 		_sched_unlock_no_reschedule();
 
-		_current->swap_data = (void *)size;
+		_current->base.swap_data = (void *)size;
 		_pend_current_thread(&pool->wait_q, timeout);
 		result = _Swap(key);
 		if (result == 0) {
 			block->pool_id = pool;
-			block->addr_in_pool = _current->swap_data;
-			block->data = _current->swap_data;
+			block->addr_in_pool = _current->base.swap_data;
+			block->data = _current->base.swap_data;
 			block->req_size = size;
 		}
 		return result;
diff --git a/kernel/unified/mem_slab.c b/kernel/unified/mem_slab.c
index 3402303..6bff6ea 100644
--- a/kernel/unified/mem_slab.c
+++ b/kernel/unified/mem_slab.c
@@ -15,7 +15,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <toolchain.h>
 #include <sections.h>
@@ -109,7 +109,7 @@
 		_pend_current_thread(&slab->wait_q, timeout);
 		result = _Swap(key);
 		if (result == 0) {
-			*mem = _current->swap_data;
+			*mem = _current->base.swap_data;
 		}
 		return result;
 	}
diff --git a/kernel/unified/msg_q.c b/kernel/unified/msg_q.c
index 374a8fa..652ec6f 100644
--- a/kernel/unified/msg_q.c
+++ b/kernel/unified/msg_q.c
@@ -21,7 +21,7 @@
 
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <toolchain.h>
 #include <sections.h>
@@ -83,7 +83,8 @@
 		pending_thread = _unpend_first_thread(&q->wait_q);
 		if (pending_thread) {
 			/* give message to waiting thread */
-			memcpy(pending_thread->swap_data, data, q->msg_size);
+			memcpy(pending_thread->base.swap_data, data,
+			       q->msg_size);
 			/* wake up waiting thread */
 			_set_thread_return_value(pending_thread, 0);
 			_abort_thread_timeout(pending_thread);
@@ -108,7 +109,7 @@
 	} else {
 		/* wait for put message success, failure, or timeout */
 		_pend_current_thread(&q->wait_q, timeout);
-		_current->swap_data = data;
+		_current->base.swap_data = data;
 		return _Swap(key);
 	}
 
@@ -138,7 +139,7 @@
 		pending_thread = _unpend_first_thread(&q->wait_q);
 		if (pending_thread) {
 			/* add thread's message to queue */
-			memcpy(q->write_ptr, pending_thread->swap_data,
+			memcpy(q->write_ptr, pending_thread->base.swap_data,
 			       q->msg_size);
 			q->write_ptr += q->msg_size;
 			if (q->write_ptr == q->buffer_end) {
@@ -162,7 +163,7 @@
 	} else {
 		/* wait for get message success or timeout */
 		_pend_current_thread(&q->wait_q, timeout);
-		_current->swap_data = data;
+		_current->base.swap_data = data;
 		return _Swap(key);
 	}
 
diff --git a/kernel/unified/mutex.c b/kernel/unified/mutex.c
index ee51c67..be0640a 100644
--- a/kernel/unified/mutex.c
+++ b/kernel/unified/mutex.c
@@ -37,7 +37,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <toolchain.h>
 #include <sections.h>
 #include <wait_q.h>
@@ -116,7 +116,7 @@
 
 static void adjust_owner_prio(struct k_mutex *mutex, int new_prio)
 {
-	if (mutex->owner->prio != new_prio) {
+	if (mutex->owner->base.prio != new_prio) {
 
 		K_DEBUG("%p (ready (y/n): %c) prio changed to %d (was %d)\n",
 			mutex->owner, _is_thread_ready(mutex->owner) ?
@@ -138,7 +138,7 @@
 		RECORD_STATE_CHANGE();
 
 		mutex->owner_orig_prio = mutex->lock_count == 0 ?
-					_current->prio :
+					_current->base.prio :
 					mutex->owner_orig_prio;
 
 		mutex->lock_count++;
@@ -166,7 +166,8 @@
 	}
 	new_prio = _get_new_prio_with_ceiling(new_prio);
 #endif
-	new_prio = new_prio_for_inheritance(_current->prio, mutex->owner->prio);
+	new_prio = new_prio_for_inheritance(_current->base.prio,
+					    mutex->owner->base.prio);
 
 	key = irq_lock();
 
@@ -196,8 +197,8 @@
 		(struct k_thread *)sys_dlist_peek_head(&mutex->wait_q);
 
 	new_prio = mutex->owner_orig_prio;
-	new_prio = waiter ? new_prio_for_inheritance(waiter->prio, new_prio) :
-			    new_prio;
+	new_prio = waiter ? new_prio_for_inheritance(waiter->base.prio,
+						     new_prio) : new_prio;
 
 	K_DEBUG("adjusting prio down on mutex %p\n", mutex);
 
@@ -254,7 +255,7 @@
 		 */
 		mutex->owner = new_owner;
 		mutex->lock_count++;
-		mutex->owner_orig_prio = new_owner->prio;
+		mutex->owner_orig_prio = new_owner->base.prio;
 	} else {
 		irq_unlock(key);
 		mutex->owner = NULL;
diff --git a/kernel/unified/pipes.c b/kernel/unified/pipes.c
index 9708344..b9ac73e 100644
--- a/kernel/unified/pipes.c
+++ b/kernel/unified/pipes.c
@@ -21,7 +21,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <toolchain.h>
 #include <sections.h>
@@ -40,7 +40,7 @@
 };
 
 struct k_pipe_async {
-	struct tcs_base     thread;   /* Dummy thread object */
+	struct _thread_base thread;   /* Dummy thread object */
 	struct k_pipe_desc  desc;     /* Pipe message descriptor */
 };
 
@@ -286,7 +286,7 @@
 		for (node = sys_dlist_peek_head(wait_q); node != NULL;
 		     node = sys_dlist_peek_next(wait_q, node)) {
 			thread = (struct k_thread *)node;
-			desc = (struct k_pipe_desc *)thread->swap_data;
+			desc = (struct k_pipe_desc *)thread->base.swap_data;
 
 			num_bytes += desc->bytes_to_xfer;
 
@@ -309,7 +309,7 @@
 	num_bytes = 0;
 
 	while ((thread = (struct k_thread *) sys_dlist_peek_head(wait_q))) {
-		desc = (struct k_pipe_desc *)thread->swap_data;
+		desc = (struct k_pipe_desc *)thread->base.swap_data;
 		num_bytes += desc->bytes_to_xfer;
 
 		if (num_bytes > bytes_to_xfer) {
@@ -330,7 +330,7 @@
 		 */
 		_unpend_thread(thread);
 		_abort_thread_timeout(thread);
-		sys_dlist_append(xfer_list, &thread->k_q_node);
+		sys_dlist_append(xfer_list, &thread->base.k_q_node);
 	}
 
 	*waiter = (num_bytes > bytes_to_xfer) ? thread : NULL;
@@ -377,7 +377,7 @@
 	unsigned int  key;
 
 #if (CONFIG_NUM_PIPE_ASYNC_MSGS > 0)
-	if (thread->flags & K_DUMMY) {
+	if (thread->base.flags & K_DUMMY) {
 		_pipe_async_finish((struct k_pipe_async *)thread);
 		return;
 	}
@@ -440,7 +440,7 @@
 	struct k_thread *thread = (struct k_thread *)
 				  sys_dlist_get(&xfer_list);
 	while (thread) {
-		desc = (struct k_pipe_desc *)thread->swap_data;
+		desc = (struct k_pipe_desc *)thread->base.swap_data;
 		bytes_copied = _pipe_xfer(desc->buffer, desc->bytes_to_xfer,
 					  data + num_bytes_written,
 					  bytes_to_write - num_bytes_written);
@@ -462,7 +462,7 @@
 	 * It is possible no data will be copied.
 	 */
 	if (reader) {
-		desc = (struct k_pipe_desc *)reader->swap_data;
+		desc = (struct k_pipe_desc *)reader->base.swap_data;
 		bytes_copied = _pipe_xfer(desc->buffer, desc->bytes_to_xfer,
 					  data + num_bytes_written,
 					  bytes_to_write - num_bytes_written);
@@ -515,7 +515,7 @@
 	pipe_desc.bytes_to_xfer  = bytes_to_write - num_bytes_written;
 
 	if (timeout != K_NO_WAIT) {
-		_current->swap_data = &pipe_desc;
+		_current->base.swap_data = &pipe_desc;
 		/*
 		 * Lock interrupts and unlock the scheduler before
 		 * manipulating the writers wait_q.
@@ -584,7 +584,7 @@
 	struct k_thread *thread = (struct k_thread *)
 				  sys_dlist_get(&xfer_list);
 	while (thread && (num_bytes_read < bytes_to_read)) {
-		desc = (struct k_pipe_desc *)thread->swap_data;
+		desc = (struct k_pipe_desc *)thread->base.swap_data;
 		bytes_copied = _pipe_xfer(data + num_bytes_read,
 					  bytes_to_read - num_bytes_read,
 					  desc->buffer, desc->bytes_to_xfer);
@@ -608,7 +608,7 @@
 	}
 
 	if (writer && (num_bytes_read < bytes_to_read)) {
-		desc = (struct k_pipe_desc *)writer->swap_data;
+		desc = (struct k_pipe_desc *)writer->base.swap_data;
 		bytes_copied = _pipe_xfer(data + num_bytes_read,
 					  bytes_to_read - num_bytes_read,
 					  desc->buffer, desc->bytes_to_xfer);
@@ -624,7 +624,7 @@
 	 */
 
 	while (thread) {
-		desc = (struct k_pipe_desc *)thread->swap_data;
+		desc = (struct k_pipe_desc *)thread->base.swap_data;
 		bytes_copied = _pipe_buffer_put(pipe, desc->buffer,
 						desc->bytes_to_xfer);
 
@@ -638,7 +638,7 @@
 	}
 
 	if (writer) {
-		desc = (struct k_pipe_desc *)writer->swap_data;
+		desc = (struct k_pipe_desc *)writer->base.swap_data;
 		bytes_copied = _pipe_buffer_put(pipe, desc->buffer,
 						desc->bytes_to_xfer);
 
@@ -662,7 +662,7 @@
 	pipe_desc.bytes_to_xfer = bytes_to_read - num_bytes_read;
 
 	if (timeout != K_NO_WAIT) {
-		_current->swap_data = &pipe_desc;
+		_current->base.swap_data = &pipe_desc;
 		key = irq_lock();
 		_sched_unlock_no_reschedule();
 		_pend_current_thread(&pipe->wait_q.readers, timeout);
diff --git a/kernel/unified/sched.c b/kernel/unified/sched.c
index b25d62a..a877da5 100644
--- a/kernel/unified/sched.c
+++ b/kernel/unified/sched.c
@@ -15,16 +15,19 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <atomic.h>
 #include <ksched.h>
 #include <wait_q.h>
 
+/* the only struct _kernel instance */
+struct _kernel _kernel = {0};
+
 /* set the bit corresponding to prio in ready q bitmap */
 static void _set_ready_q_prio_bit(int prio)
 {
 	int bmap_index = _get_ready_q_prio_bmap_index(prio);
-	uint32_t *bmap = &_nanokernel.ready_q.prio_bmap[bmap_index];
+	uint32_t *bmap = &_ready_q.prio_bmap[bmap_index];
 
 	*bmap |= _get_ready_q_prio_bit(prio);
 }
@@ -33,7 +36,7 @@
 static void _clear_ready_q_prio_bit(int prio)
 {
 	int bmap_index = _get_ready_q_prio_bmap_index(prio);
-	uint32_t *bmap = &_nanokernel.ready_q.prio_bmap[bmap_index];
+	uint32_t *bmap = &_ready_q.prio_bmap[bmap_index];
 
 	*bmap &= ~_get_ready_q_prio_bit(prio);
 }
@@ -50,15 +53,16 @@
 
 void _add_thread_to_ready_q(struct k_thread *thread)
 {
-	int q_index = _get_ready_q_q_index(thread->prio);
-	sys_dlist_t *q = &_nanokernel.ready_q.q[q_index];
+	int q_index = _get_ready_q_q_index(thread->base.prio);
+	sys_dlist_t *q = &_ready_q.q[q_index];
 
-	_set_ready_q_prio_bit(thread->prio);
-	sys_dlist_append(q, &thread->k_q_node);
+	_set_ready_q_prio_bit(thread->base.prio);
+	sys_dlist_append(q, &thread->base.k_q_node);
 
-	struct k_thread **cache = &_nanokernel.ready_q.cache;
+	struct k_thread **cache = &_ready_q.cache;
 
-	*cache = *cache && _is_prio_higher(thread->prio, (*cache)->prio) ?
+	*cache = *cache && _is_prio_higher(thread->base.prio,
+					   (*cache)->base.prio) ?
 		 thread : *cache;
 }
 
@@ -71,15 +75,15 @@
 
 void _remove_thread_from_ready_q(struct k_thread *thread)
 {
-	int q_index = _get_ready_q_q_index(thread->prio);
-	sys_dlist_t *q = &_nanokernel.ready_q.q[q_index];
+	int q_index = _get_ready_q_q_index(thread->base.prio);
+	sys_dlist_t *q = &_ready_q.q[q_index];
 
-	sys_dlist_remove(&thread->k_q_node);
+	sys_dlist_remove(&thread->base.k_q_node);
 	if (sys_dlist_is_empty(q)) {
-		_clear_ready_q_prio_bit(thread->prio);
+		_clear_ready_q_prio_bit(thread->base.prio);
 	}
 
-	struct k_thread **cache = &_nanokernel.ready_q.cache;
+	struct k_thread **cache = &_ready_q.cache;
 
 	*cache = *cache == thread ? NULL : *cache;
 }
@@ -103,20 +107,20 @@
 {
 	__ASSERT(!_is_in_isr(), "");
 
-	atomic_inc(&_nanokernel.current->sched_locked);
+	atomic_inc(&_current->base.sched_locked);
 
 	K_DEBUG("scheduler locked (%p:%d)\n",
-		_current, _current->sched_locked);
+		_current, _current->base.sched_locked);
 }
 
 void k_sched_unlock(void)
 {
-	__ASSERT(_nanokernel.current->sched_locked > 0, "");
+	__ASSERT(_current->base.sched_locked > 0, "");
 	__ASSERT(!_is_in_isr(), "");
 
 	int key = irq_lock();
 
-	atomic_dec(&_nanokernel.current->sched_locked);
+	atomic_dec(&_current->base.sched_locked);
 
 	K_DEBUG("scheduler unlocked (%p:%d)\n",
 		_current, _current->sched_locked);
@@ -128,12 +132,15 @@
  * Callback for sys_dlist_insert_at() to find the correct insert point in a
  * wait queue (priority-based).
  */
-static int _is_wait_q_insert_point(sys_dnode_t *dnode_info, void *insert_prio)
+static int _is_wait_q_insert_point(sys_dnode_t *node, void *insert_prio)
 {
 	struct k_thread *waitq_node =
-		CONTAINER_OF(dnode_info, struct k_thread, k_q_node);
+		CONTAINER_OF(
+			CONTAINER_OF(node, struct _thread_base, k_q_node),
+			struct k_thread,
+			base);
 
-	return _is_prio_higher((int)insert_prio, waitq_node->prio);
+	return _is_prio_higher((int)insert_prio, waitq_node->base.prio);
 }
 
 /* convert milliseconds to ticks */
@@ -154,8 +161,9 @@
 {
 	sys_dlist_t *dlist = (sys_dlist_t *)wait_q;
 
-	sys_dlist_insert_at(dlist, &thread->k_q_node,
-			    _is_wait_q_insert_point, (void *)thread->prio);
+	sys_dlist_insert_at(dlist, &thread->base.k_q_node,
+			    _is_wait_q_insert_point,
+			    (void *)thread->base.prio);
 
 	_mark_thread_as_pending(thread);
 
@@ -182,7 +190,7 @@
 {
 	int prio = _get_highest_ready_prio();
 	int q_index = _get_ready_q_q_index(prio);
-	sys_dlist_t *list = &_nanokernel.ready_q.q[q_index];
+	sys_dlist_t *list = &_ready_q.q[q_index];
 
 	__ASSERT(!sys_dlist_is_empty(list),
 		 "no thread to run (prio: %d, queue index: %u)!\n",
@@ -191,7 +199,7 @@
 	struct k_thread *thread =
 		(struct k_thread *)sys_dlist_peek_head_not_empty(list);
 
-	_nanokernel.ready_q.cache = thread;
+	_ready_q.cache = thread;
 
 	return thread;
 }
@@ -200,7 +208,7 @@
 /* must be called with interrupts locked */
 struct k_thread *_get_next_ready_thread(void)
 {
-	struct k_thread *cache = _nanokernel.ready_q.cache;
+	struct k_thread *cache = _ready_q.cache;
 
 	return cache ? cache : __get_next_ready_thread();
 }
@@ -217,7 +225,7 @@
 	extern void _dump_ready_q(void);
 	_dump_ready_q();
 
-	return _is_prio_higher(_get_highest_ready_prio(), _current->prio);
+	return _is_prio_higher(_get_highest_ready_prio(), _current->base.prio);
 }
 
 int _is_next_thread_current(void)
@@ -227,7 +235,7 @@
 
 int  k_thread_priority_get(k_tid_t thread)
 {
-	return thread->prio;
+	return thread->base.prio;
 }
 
 void k_thread_priority_set(k_tid_t tid, int prio)
@@ -255,17 +263,17 @@
  */
 void _move_thread_to_end_of_prio_q(struct k_thread *thread)
 {
-	int q_index = _get_ready_q_q_index(thread->prio);
-	sys_dlist_t *q = &_nanokernel.ready_q.q[q_index];
+	int q_index = _get_ready_q_q_index(thread->base.prio);
+	sys_dlist_t *q = &_ready_q.q[q_index];
 
-	if (sys_dlist_is_tail(q, &thread->k_q_node)) {
+	if (sys_dlist_is_tail(q, &thread->base.k_q_node)) {
 		return;
 	}
 
-	sys_dlist_remove(&thread->k_q_node);
-	sys_dlist_append(q, &thread->k_q_node);
+	sys_dlist_remove(&thread->base.k_q_node);
+	sys_dlist_append(q, &thread->base.k_q_node);
 
-	struct k_thread **cache = &_nanokernel.ready_q.cache;
+	struct k_thread **cache = &_ready_q.cache;
 
 	*cache = *cache == thread ? NULL : *cache;
 }
diff --git a/kernel/unified/sem.c b/kernel/unified/sem.c
index c3baaa5..f16b25c 100644
--- a/kernel/unified/sem.c
+++ b/kernel/unified/sem.c
@@ -27,7 +27,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <toolchain.h>
 #include <sections.h>
@@ -44,7 +44,7 @@
 };
 
 struct _sem_thread {
-	struct tcs_base    dummy;
+	struct _thread_base dummy;
 	struct _sem_desc   desc;
 };
 #endif
@@ -120,7 +120,7 @@
 	sys_dlist_t   list;
 
 	sys_dlist_init(&list);
-	_current->swap_data = &list;
+	_current->base.swap_data = &list;
 
 	for (int i = 0; i < num; i++) {
 		wait_objects[i].dummy.flags = K_DUMMY;
@@ -172,7 +172,7 @@
 	sys_dnode_t  *node;
 	sys_dnode_t  *next;
 
-	if (!(thread->flags & K_DUMMY)) {
+	if (!(thread->base.flags & K_DUMMY)) {
 		/*
 		 * The awakened thread is a real thread and thus was not
 		 * involved in a semaphore group operation.
@@ -185,7 +185,7 @@
 	 * in a semaphore group operation.
 	 */
 
-	list = (sys_dlist_t *)dummy->desc.thread->swap_data;
+	list = (sys_dlist_t *)dummy->desc.thread->base.swap_data;
 	node = sys_dlist_peek_head(list);
 
 	__ASSERT(node != NULL, "");
diff --git a/kernel/unified/stack.c b/kernel/unified/stack.c
index c5729dc..366d0e3 100644
--- a/kernel/unified/stack.c
+++ b/kernel/unified/stack.c
@@ -19,7 +19,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/debug/object_tracing_common.h>
 #include <toolchain.h>
 #include <sections.h>
@@ -116,7 +116,7 @@
 
 	result = _Swap(key);
 	if (result == 0) {
-		*data = (uint32_t)_current->swap_data;
+		*data = (uint32_t)_current->base.swap_data;
 	}
 	return result;
 }
diff --git a/kernel/unified/sys_clock.c b/kernel/unified/sys_clock.c
index d460293..d2934da 100644
--- a/kernel/unified/sys_clock.c
+++ b/kernel/unified/sys_clock.c
@@ -17,7 +17,7 @@
  */
 
 
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <toolchain.h>
 #include <sections.h>
 #include <wait_q.h>
@@ -206,7 +206,7 @@
 		return;
 	}
 
-	if (_is_prio_higher(_current->prio, _time_slice_prio_ceiling)) {
+	if (_is_prio_higher(_current->base.prio, _time_slice_prio_ceiling)) {
 		return;
 	}
 
diff --git a/kernel/unified/thread.c b/kernel/unified/thread.c
index b1e37ca..9a68165 100644
--- a/kernel/unified/thread.c
+++ b/kernel/unified/thread.c
@@ -26,7 +26,7 @@
 #include <toolchain.h>
 #include <sections.h>
 
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <misc/printk.h>
 #include <sys_clock.h>
 #include <drivers/system_timer.h>
@@ -69,7 +69,7 @@
 	if (k_is_in_isr())
 		return NANO_CTX_ISR;
 
-	if (_current->prio < 0)
+	if (_current->base.prio < 0)
 		return NANO_CTX_FIBER;
 
 	return NANO_CTX_TASK;
@@ -86,7 +86,7 @@
  */
 void _thread_essential_set(void)
 {
-	_current->flags |= K_ESSENTIAL;
+	_current->base.flags |= K_ESSENTIAL;
 }
 
 /*
@@ -96,7 +96,7 @@
  */
 void _thread_essential_clear(void)
 {
-	_current->flags &= ~K_ESSENTIAL;
+	_current->base.flags &= ~K_ESSENTIAL;
 }
 
 /*
@@ -106,7 +106,7 @@
  */
 int _is_thread_essential(void)
 {
-	return _current->flags & K_ESSENTIAL;
+	return _current->base.flags & K_ESSENTIAL;
 }
 
 void k_busy_wait(uint32_t usec_to_wait)
@@ -151,12 +151,12 @@
 {
 	unsigned int key = irq_lock();
 
-	if (thread == _nanokernel.threads) {
-		_nanokernel.threads = _nanokernel.threads->next_thread;
+	if (thread == _kernel.threads) {
+		_kernel.threads = _kernel.threads->next_thread;
 	} else {
 		struct k_thread *prev_thread;
 
-		prev_thread = _nanokernel.threads;
+		prev_thread = _kernel.threads;
 		while (thread != prev_thread->next_thread) {
 			prev_thread = prev_thread->next_thread;
 		}
diff --git a/kernel/unified/thread_abort.c b/kernel/unified/thread_abort.c
index deceafc..5ec34f2 100644
--- a/kernel/unified/thread_abort.c
+++ b/kernel/unified/thread_abort.c
@@ -21,7 +21,7 @@
  */
 
 #include <kernel.h>
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <nano_internal.h>
 #include <string.h>
 #include <toolchain.h>
diff --git a/kernel/unified/timer.c b/kernel/unified/timer.c
index 988a600..4067c77 100644
--- a/kernel/unified/timer.c
+++ b/kernel/unified/timer.c
@@ -201,7 +201,6 @@
 {
 	unsigned int key = irq_lock();
 	int32_t remaining_ticks;
-	sys_dlist_t *timeout_q = &_nanokernel.timeout_q;
 
 	if (timer->timeout.delta_ticks_from_prev == -1) {
 		remaining_ticks = 0;
@@ -211,11 +210,11 @@
 		 * and summing up the various tick deltas involved
 		 */
 		struct _timeout *t =
-			(struct _timeout *)sys_dlist_peek_head(timeout_q);
+			(struct _timeout *)sys_dlist_peek_head(&_timeout_q);
 
 		remaining_ticks = t->delta_ticks_from_prev;
 		while (t != &timer->timeout) {
-			t = (struct _timeout *)sys_dlist_peek_next(timeout_q,
+			t = (struct _timeout *)sys_dlist_peek_next(&_timeout_q,
 								   &t->node);
 			remaining_ticks += t->delta_ticks_from_prev;
 		}
diff --git a/kernel/unified/work_q.c b/kernel/unified/work_q.c
index 97e119c..a15535f 100644
--- a/kernel/unified/work_q.c
+++ b/kernel/unified/work_q.c
@@ -21,7 +21,7 @@
  * Workqueue support functions
  */
 
-#include <nano_private.h>
+#include <kernel_structs.h>
 #include <wait_q.h>
 #include <errno.h>