userspace: Support for split 64 bit arguments

System call arguments, at the arch layer, are single words.  So
passing wider values requires splitting them into two registers at
call time.  This gets even more complicated for values (e.g
k_timeout_t) that may have different sizes depending on configuration.
This patch adds a feature to gen_syscalls.py to detect functions with
wide arguments and automatically generates code to split/unsplit them.

Unfortunately the current scheme of Z_SYSCALL_DECLARE_* macros won't
work with functions like this, because for N arguments (our current
maximum N is 10) there are 2^N possible configurations of argument
widths.  So this generates the complete functions for each handler and
wrapper, effectively doing in python what was originally done in the
preprocessor.

Another complexity is that traditional the z_hdlr_*() function for a
system call has taken the raw list of word arguments, which does not
work when some of those arguments must be 64 bit types.  So instead of
using a single Z_SYSCALL_HANDLER macro, this splits the job of
z_hdlr_*() into two steps: An automatically-generated unmarshalling
function, z_mrsh_*(), which then calls a user-supplied verification
function z_vrfy_*().  The verification function is typesafe, and is a
simple C function with exactly the same argument and return signature
as the syscall impl function.  It is also not responsible for
validating the pointers to the extra parameter array or a wide return
value, that code gets automatically generated.

This commit includes new vrfy/msrh handling for all syscalls invoked
during CI runs.  Future commits will port the less testable code.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
diff --git a/scripts/gen_syscalls.py b/scripts/gen_syscalls.py
index ada81ac..7d8bff5 100755
--- a/scripts/gen_syscalls.py
+++ b/scripts/gen_syscalls.py
@@ -29,6 +29,20 @@
 import os
 import json
 
+types64 = ["s64_t", "u64_t"]
+
+# The kernel linkage is complicated.  These functions from
+# userspace_handlers.c are present in the kernel .a library after
+# userspace.c, which contains the weak fallbacks defined here.  So the
+# linker finds the weak one first and stops searching, and thus won't
+# see the real implementation which should override.  Yet changing the
+# order runs afoul of a comment in CMakeLists.txt that the order is
+# critical.  These are core syscalls that won't ever be unconfigured,
+# just disable the fallback mechanism as a simple workaround.
+noweak = set(["z_mrsh_k_object_release",
+              "z_mrsh_k_object_access_grant",
+              "z_mrsh_k_object_alloc"])
+
 table_template = """/* auto-generated by gen_syscalls.py, don't edit */
 
 /* Weak handler functions that get replaced by the real ones unless a system
@@ -52,16 +66,6 @@
 
 #include <zephyr/types.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-%s
-
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _ASMLANGUAGE */
 
 #endif /* ZEPHYR_SYSCALL_LIST_H */
@@ -69,12 +73,16 @@
 
 syscall_template = """
 /* auto-generated by gen_syscalls.py, don't edit */
+%s
 
 #ifndef _ASMLANGUAGE
 
 #include <syscall_list.h>
 #include <syscall_macros.h>
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -85,11 +93,14 @@
 }
 #endif
 
+#pragma GCC diagnostic pop
+
 #endif
+#endif /* include guard */
 """
 
 handler_template = """
-extern u32_t %s(u32_t arg1, u32_t arg2, u32_t arg3,
+extern u32_t z_hdlr_%s(u32_t arg1, u32_t arg2, u32_t arg3,
                 u32_t arg4, u32_t arg5, u32_t arg6, void *ssf);
 """
 
@@ -124,6 +135,164 @@
     m = mo.groups()
     return (m[0].strip(), m[1])
 
+def need_split(argtype):
+    return argtype in types64
+
+# Note: "lo" and "hi" are named in little endian conventions,
+# but it doesn't matter as long as they are consistently
+# generated.
+def union_decl(type):
+    return "union { struct { u32_t lo, hi; } split; %s val; }" % type
+
+def wrapper_defs(func_name, func_type, args):
+    ret64 = func_type in types64
+    mrsh_args = [] # List of rvalue expressions for the marshalled invocation
+    split_args = []
+    nsplit = 0
+    for i, argrec in enumerate(args):
+        (argtype, argname) = argrec
+        if need_split(argtype):
+            split_args.append((argtype, argname))
+            mrsh_args.append("parm%d.split.lo" % nsplit)
+            mrsh_args.append("parm%d.split.hi" % nsplit)
+            nsplit += 1
+        else:
+            mrsh_args.append("*(u32_t *)&" + argname)
+
+    if ret64:
+        mrsh_args.append("(u32_t)&ret64")
+
+    decl_arglist = ", ".join([" ".join(argrec) for argrec in args])
+
+    wrap = "extern %s z_impl_%s(%s);\n" % (func_type, func_name, decl_arglist)
+    wrap += "static inline %s %s(%s)\n" % (func_type, func_name, decl_arglist)
+    wrap += "{\n"
+    wrap += "#ifdef CONFIG_USERSPACE\n"
+    wrap += ("\t" + "u64_t ret64;\n") if ret64 else ""
+    wrap += "\t" + "if (z_syscall_trap()) {\n"
+
+    for parmnum, rec in enumerate(split_args):
+        (argtype, argname) = rec
+        wrap += "\t\t%s parm%d;\n" % (union_decl(argtype), parmnum)
+        wrap += "\t\t" + "parm%d.val = %s;\n" % (parmnum, argname)
+
+    if len(mrsh_args) > 6:
+        wrap += "\t\t" + "u32_t more[] = {\n"
+        wrap += "\t\t\t" + (",\n\t\t\t".join(mrsh_args[5:])) + "\n"
+        wrap += "\t\t" + "};\n"
+        mrsh_args[5:] = ["(u32_t) &more"]
+
+    syscall_id = "K_SYSCALL_" + func_name.upper()
+    invoke = ("z_arch_syscall_invoke%d(%s)"
+              % (len(mrsh_args),
+                 ", ".join(mrsh_args + [syscall_id])))
+
+    if ret64:
+        wrap += "\t\t" + "(void)%s;\n" % invoke
+        wrap += "\t\t" + "return (%s)ret64;\n" % func_type
+    elif func_type == "void":
+        wrap += "\t\t" + "%s;\n" % invoke
+        wrap += "\t\t" + "return;\n";
+    else:
+        wrap += "\t\t" + "return (%s) %s;\n" % (func_type, invoke)
+
+    wrap += "\t" + "}\n"
+    wrap += "#endif\n"
+
+    # Otherwise fall through to direct invocation of the impl func.
+    # Note the compiler barrier: that is required to prevent code from
+    # the impl call from being hoisted above the check for user
+    # context.
+    impl_arglist = ", ".join([argrec[1] for argrec in args])
+    impl_call = "z_impl_%s(%s)" % (func_name, impl_arglist)
+    wrap += "\t" + "compiler_barrier();\n"
+    wrap += "\t" + "%s%s;\n" % ("return " if func_type != "void" else "",
+                               impl_call)
+
+    wrap += "}\n"
+
+    return wrap
+
+# Returns an expression for the specified (zero-indexed!) marshalled
+# parameter to a syscall, with handling for a final "more" parameter.
+def mrsh_rval(mrsh_num, total):
+    if mrsh_num < 5 or total <= 6:
+        return "arg%d" % mrsh_num
+    else:
+        return "(((u32_t *)more)[%d])" % (mrsh_num - 5)
+
+def marshall_defs(func_name, func_type, args):
+    mrsh_name = "z_mrsh_" + func_name
+
+    nmrsh = 0        # number of marshalled u32_t parameter
+    vrfy_parms = []  # list of (arg_num, mrsh_or_parm_num, bool_is_split)
+    split_parms = [] # list of a (arg_num, mrsh_num) for each split
+    for i, argrec in enumerate(args):
+        (argtype, argname) = argrec
+        if need_split(argtype):
+            vrfy_parms.append((i, len(split_parms), True))
+            split_parms.append((i, nmrsh))
+            nmrsh += 2
+        else:
+            vrfy_parms.append((i, nmrsh, False))
+            nmrsh += 1
+
+    # Final argument for a 64 bit return value?
+    if func_type in types64:
+        nmrsh += 1
+
+    decl_arglist = ", ".join([" ".join(argrec) for argrec in args])
+    mrsh = "extern %s z_vrfy_%s(%s);\n" % (func_type, func_name, decl_arglist)
+
+    mrsh += "u32_t %s(u32_t arg0, u32_t arg1, u32_t arg2,\n" % mrsh_name
+    if nmrsh <= 6:
+        mrsh += "\t\t" + "u32_t arg3, u32_t arg4, u32_t arg5, void *ssf)\n";
+    else:
+        mrsh += "\t\t" + "u32_t arg3, u32_t arg4, void *more, void *ssf)\n";
+    mrsh += "{\n"
+    mrsh += "\t" + "_current_cpu->syscall_frame = ssf;\n";
+
+    for unused_arg in range(nmrsh, 6):
+        mrsh += "\t(void) arg%d;\t/* unused */\n" % unused_arg
+
+    if nmrsh > 6:
+        mrsh += ("\tZ_OOPS(Z_SYSCALL_MEMORY_READ(more, "
+                 + str(nmrsh - 6) + " * sizeof(u32_t)));\n")
+
+    for i, split_rec in enumerate(split_parms):
+        arg_num, mrsh_num = split_rec
+        arg_type = args[arg_num][0];
+        mrsh += "\t%s parm%d;\n" % (union_decl(arg_type), i);
+        mrsh += "\t" + "parm%d.split.lo = %s;\n" % (i, mrsh_rval(mrsh_num,
+                                                                 nmrsh))
+        mrsh += "\t" + "parm%d.split.hi = %s;\n" % (i, mrsh_rval(mrsh_num + 1,
+                                                                 nmrsh))
+    # Finally, invoke the verify function
+    out_args = []
+    for i, argn, is_split in vrfy_parms:
+        if is_split:
+            out_args.append("parm%d.val" % argn)
+        else:
+            out_args.append("*(%s*)&%s" % (args[i][0], mrsh_rval(argn, nmrsh)))
+
+    vrfy_call = "z_vrfy_%s(%s)\n" % (func_name, ", ".join(out_args))
+
+    if func_type == "void":
+        mrsh += "\t" + "%s;\n" % vrfy_call
+        mrsh += "\t" + "return 0;\n"
+    else:
+        mrsh += "\t" + "%s ret = %s;\n" % (func_type, vrfy_call)
+        if func_type in types64:
+            ptr = "((u64_t *)%s)" % mrsh_rval(nmrsh - 1, nmrsh)
+            mrsh += "\t" + "Z_OOPS(Z_SYSCALL_MEMORY_WRITE(%s, 8));\n" % ptr
+            mrsh += "\t" + "*%s = ret;\n" % ptr
+            mrsh += "\t" + "return 0;\n"
+        else:
+            mrsh += "\t" + "return (u32_t) ret;\n"
+
+    mrsh += "}\n"
+
+    return mrsh, mrsh_name
 
 def analyze_fn(match_group):
     func, args = match_group
@@ -141,39 +310,14 @@
 
     sys_id = "K_SYSCALL_" + func_name.upper()
 
-    if func_type == "void":
-        suffix = "_VOID"
-        is_void = True
-    else:
-        is_void = False
-        if func_type in ["s64_t", "u64_t"]:
-            suffix = "_RET64"
-        else:
-            suffix = ""
-
-    is_void = (func_type == "void")
-
-    # Get the proper system call macro invocation, which depends on the
-    # number of arguments, the return type, and whether the implementation
-    # is an inline function
-    macro = "K_SYSCALL_DECLARE%d%s" % (len(args), suffix)
-
-    # Flatten the argument lists and generate a comma separated list
-    # of t0, p0, t1, p1, ... tN, pN as expected by the macros
-    flat_args = [i for sublist in args for i in sublist]
-    if not is_void:
-        flat_args = [func_type] + flat_args
-    flat_args = [sys_id, func_name] + flat_args
-    argslist = ", ".join(flat_args)
-
-    invocation = "%s(%s)" % (macro, argslist)
-
-    handler = "z_hdlr_" + func_name
+    marshaller = None
+    marshaller, handler = marshall_defs(func_name, func_type, args)
+    invocation = wrapper_defs(func_name, func_type, args)
 
     # Entry in _k_syscall_table
     table_entry = "[%s] = %s" % (sys_id, handler)
 
-    return (handler, invocation, sys_id, table_entry)
+    return (handler, invocation, marshaller, sys_id, table_entry)
 
 def parse_args():
     global args
@@ -189,22 +333,30 @@
                         help="output C system call list header")
     parser.add_argument("-o", "--base-output", required=True,
                         help="Base output directory for syscall macro headers")
+    parser.add_argument("-s", "--split-type", action="append",
+                        help="A long type that must be split/marshalled")
     args = parser.parse_args()
 
 
 def main():
     parse_args()
 
+    if args.split_type != None:
+        for t in args.split_type:
+            types64.append(t)
+
     with open(args.json_file, 'r') as fd:
         syscalls = json.load(fd)
 
     invocations = {}
+    mrsh_defs = {}
+    mrsh_includes = {}
     ids = []
     table_entries = []
     handlers = []
 
     for match_group, fn in syscalls:
-        handler, inv, sys_id, entry = analyze_fn(match_group)
+        handler, inv, mrsh, sys_id, entry = analyze_fn(match_group)
 
         if fn not in invocations:
             invocations[fn] = []
@@ -214,12 +366,24 @@
         table_entries.append(entry)
         handlers.append(handler)
 
+        if mrsh:
+            syscall = typename_split(match_group[0])[1]
+            mrsh_defs[syscall] = mrsh
+            mrsh_includes[syscall] = "#include <syscalls/%s>" % fn
+
     with open(args.syscall_dispatch, "w") as fp:
         table_entries.append("[K_SYSCALL_BAD] = handler_bad_syscall")
 
-        weak_defines = "".join([weak_template % name for name in handlers])
+        weak_defines = "".join([weak_template % name
+                                for name in handlers
+                                if not name in noweak])
 
-        fp.write(table_template % (weak_defines, ",\n\t".join(table_entries)))
+        # The "noweak" ones just get a regular declaration
+        weak_defines += "\n".join(["extern u32_t %s(u32_t arg1, u32_t arg2, u32_t arg3, u32_t arg4, u32_t arg5, u32_t arg6, void *ssf);"
+                                   % s for s in noweak])
+
+        fp.write(table_template % (weak_defines,
+                                   ",\n\t".join(table_entries)))
 
     # Listing header emitted to stdout
     ids.sort()
@@ -229,19 +393,32 @@
     for i, item in enumerate(ids):
         ids_as_defines += "#define {} {}\n".format(item, i)
 
-    handler_defines = "".join([handler_template % name for name in handlers])
     with open(args.syscall_list, "w") as fp:
-        fp.write(list_template % (ids_as_defines, handler_defines))
+        fp.write(list_template % ids_as_defines)
 
     os.makedirs(args.base_output, exist_ok=True)
     for fn, invo_list in invocations.items():
         out_fn = os.path.join(args.base_output, fn)
 
-        header = syscall_template % "\n\n".join(invo_list)
+        ig = re.sub("[^a-zA-Z0-9]", "_", "Z_INCLUDE_SYSCALLS_" + fn).upper()
+        include_guard = "#ifndef %s\n#define %s\n" % (ig, ig)
+        header = syscall_template % (include_guard, "\n\n".join(invo_list))
 
         with open(out_fn, "w") as fp:
             fp.write(header)
 
+    # Likewise emit _mrsh.c files for syscall inclusion
+    for fn in mrsh_defs:
+        mrsh_fn = os.path.join(args.base_output, fn + "_mrsh.c")
+
+        with open(mrsh_fn, "w") as fp:
+            fp.write("/* auto-generated by gen_syscalls.py, don't edit */\n")
+            fp.write("#pragma GCC diagnostic push\n")
+            fp.write("#pragma GCC diagnostic ignored \"-Wstrict-aliasing\"\n")
+            fp.write(mrsh_includes[fn] + "\n")
+            fp.write("\n")
+            fp.write(mrsh_defs[fn] + "\n")
+            fp.write("#pragma GCC diagnostic pop\n")
 
 if __name__ == "__main__":
     main()