Implement the `$next` keyword for convenient packed structures.
This change adds a new keyword, `$next`, which may be used in the offset
expression of a physical field, and which translates to "the offset of
the first byte after the end of the syntactically previous physical
field."
Essentially, it allows `struct`s or `bits` like:
struct Foo:
0 [+4] UInt a
$next [+2] UInt b
$next [+1] UInt c
$next [+4] UInt d
... where each field is packed directly after the previous one. See the
updated documentation in `doc/language-reference.md` for details.
This also contains a few incidental changes:
* A new function in `ir_util`, for scanning subtrees of the IR. Note
that a similar function used to exist; this is really a resurrection
of that functionality.
* `synthetics.synthesize_fields` has been renamed to
`synthetics.desugar`, since it now does more than just synthesize
fields.
* The "how to use the Emboss compiler" sections of `guide.md` have been
expanded.
* The comment in `compiler/front_end/docs_are_up_to_date_test.py` has
been updated with the correct paths for the open-source Emboss.
diff --git a/compiler/back_end/cpp/BUILD b/compiler/back_end/cpp/BUILD
index 52840f1..fe532a1 100644
--- a/compiler/back_end/cpp/BUILD
+++ b/compiler/back_end/cpp/BUILD
@@ -273,6 +273,17 @@
)
emboss_cc_test(
+ name = "next_keyword_test",
+ srcs = [
+ "testcode/next_keyword_test.cc",
+ ],
+ deps = [
+ "//testdata:next_keyword_emboss",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+emboss_cc_test(
name = "virtual_field_test",
srcs = [
"testcode/virtual_field_test.cc",
diff --git a/compiler/back_end/cpp/testcode/next_keyword_test.cc b/compiler/back_end/cpp/testcode/next_keyword_test.cc
new file mode 100644
index 0000000..c4990f4
--- /dev/null
+++ b/compiler/back_end/cpp/testcode/next_keyword_test.cc
@@ -0,0 +1,55 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Tests of generated code for structures using the `$next` keyword.
+// Note that `$next` is removed from the IR before it reaches the back end, so
+// this is really testing that the front end desugared correctly.
+#include <stdint.h>
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "testdata/next_keyword.emb.h"
+
+namespace emboss {
+namespace test {
+namespace {
+
+// [$default byte_order: "LittleEndian"]
+// struct NextKeyword:
+// 0 [+4] UInt value32
+// $next [+2] UInt value16
+// $next [+1] UInt value8
+// $next+3 [+1] UInt value8_offset
+
+TEST(NextKeyword, FieldsAreCorrectlyLocated) {
+ ::std::array<char, NextKeyword::IntrinsicSizeInBytes()> values = {
+ 1, 0, 0, 0,
+ 2, 0,
+ 3,
+ 5, 6, 7, 4,
+ };
+ const auto view = MakeNextKeywordView(&values);
+ EXPECT_TRUE(view.Ok());
+ EXPECT_EQ(1, view.value32().Read());
+ EXPECT_EQ(2, view.value16().Read());
+ EXPECT_EQ(3, view.value8().Read());
+ EXPECT_EQ(4, view.value8_offset().Read());
+}
+
+} // namespace
+} // namespace test
+} // namespace emboss
diff --git a/compiler/front_end/dependency_checker.py b/compiler/front_end/dependency_checker.py
index 3cc6251..fa2d719 100644
--- a/compiler/front_end/dependency_checker.py
+++ b/compiler/front_end/dependency_checker.py
@@ -20,7 +20,20 @@
from compiler.util import traverse_ir
-def _add_reference_to_dependencies(reference, dependencies, name):
+def _add_reference_to_dependencies(reference, dependencies, name,
+ source_file_name, errors):
+ if reference.canonical_name.object_path[0] in {"$is_statically_sized",
+ "$static_size_in_bits",
+ "$next"}:
+ # This error is a bit opaque, but given that the compiler used to crash on
+ # this case -- for a couple of years -- and no one complained, it seems
+ # safe to assume that this is a rare error.
+ errors.append([
+ error.error(source_file_name, reference.source_location,
+ "Keyword `" + reference.canonical_name.object_path[0] +
+ "` may not be used in this context."),
+ ])
+ return
dependencies[name] |= {ir_util.hashable_form_of_reference(reference)}
@@ -37,6 +50,7 @@
def _find_dependencies(ir):
"""Constructs a dependency graph for the entire IR."""
dependencies = {}
+ errors = []
traverse_ir.fast_traverse_ir_top_down(
ir, [ir_pb2.Reference], _add_reference_to_dependencies,
# TODO(bolms): Add handling for references inside of attributes, once
@@ -49,7 +63,10 @@
ir_pb2.EnumValue: _add_name_to_dependencies,
ir_pb2.RuntimeParameter: _add_name_to_dependencies,
},
- parameters={"dependencies": dependencies})
+ parameters={
+ "dependencies": dependencies,
+ "errors": errors,
+ })
traverse_ir.fast_traverse_ir_top_down(
ir, [ir_pb2.FieldReference], _add_field_reference_to_dependencies,
skip_descendants_of={ir_pb2.Attribute},
@@ -59,7 +76,7 @@
ir_pb2.RuntimeParameter: _add_name_to_dependencies,
},
parameters={"dependencies": dependencies})
- return dependencies
+ return dependencies, errors
def _find_dependency_ordering_for_fields_in_structure(
@@ -207,9 +224,10 @@
def _find_object_dependency_cycles(ir):
"""Finds dependency cycles in types in the ir."""
- dependencies = _find_dependencies(ir)
+ dependencies, errors = _find_dependencies(ir)
+ if errors:
+ return errors
cycles = _find_cycles(dict(dependencies))
- errors = []
for cycle in cycles:
# TODO(bolms): This lists the entire strongly-connected component in a
# fairly arbitrary order. This is simple, and handles components that
diff --git a/compiler/front_end/docs_are_up_to_date_test.py b/compiler/front_end/docs_are_up_to_date_test.py
index d103d17..70fd4b3 100644
--- a/compiler/front_end/docs_are_up_to_date_test.py
+++ b/compiler/front_end/docs_are_up_to_date_test.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-"""Tests that g3doc/grammar.md is up to date."""
+"""Tests that doc/grammar.md is up to date."""
import pkgutil
@@ -28,7 +28,7 @@
correct_md = generate_grammar_md.generate_grammar_md()
# If this fails, run:
#
- # bazel run //front_end:generate_grammar_md > g3doc/grammar.md
+ # bazel run //compiler/front_end:generate_grammar_md > doc/grammar.md
#
# Be sure to check that the results look good before committing!
doc_md_lines = doc_md.splitlines()
diff --git a/compiler/front_end/format_emb.py b/compiler/front_end/format_emb.py
index 3ddd20e..ce5d16b 100644
--- a/compiler/front_end/format_emb.py
+++ b/compiler/front_end/format_emb.py
@@ -719,6 +719,7 @@
@_formats('builtin-field-word -> "$size_in_bytes"')
@_formats('builtin-reference -> builtin-word')
@_formats('builtin-word -> "$is_statically_sized"')
+@_formats('builtin-word -> "$next"')
@_formats('builtin-word -> "$static_size_in_bits"')
@_formats('choice-expression -> logical-expression')
@_formats('Comment? -> Comment')
diff --git a/compiler/front_end/glue.py b/compiler/front_end/glue.py
index 2d96a89..261027d 100644
--- a/compiler/front_end/glue.py
+++ b/compiler/front_end/glue.py
@@ -298,7 +298,7 @@
back end, and errors is a list of compilation errors. If errors is not an
empty list, ir will be None.
"""
- passes = (synthetics.synthesize_fields,
+ passes = (synthetics.desugar,
symbol_resolver.resolve_symbols,
dependency_checker.find_dependency_cycles,
dependency_checker.set_dependency_order,
diff --git a/compiler/front_end/module_ir.py b/compiler/front_end/module_ir.py
index aea005c..15312cf 100644
--- a/compiler/front_end/module_ir.py
+++ b/compiler/front_end/module_ir.py
@@ -1265,6 +1265,7 @@
@_handles('builtin-field-word -> "$min_size_in_bytes"')
@_handles('builtin-word -> "$is_statically_sized"')
@_handles('builtin-word -> "$static_size_in_bits"')
+@_handles('builtin-word -> "$next"')
@_handles('constant-word -> ShoutyWord')
@_handles('and-operator -> "&&"')
@_handles('or-operator -> "||"')
diff --git a/compiler/front_end/module_ir_test.py b/compiler/front_end/module_ir_test.py
index c9249b4..0bbd46c 100644
--- a/compiler/front_end/module_ir_test.py
+++ b/compiler/front_end/module_ir_test.py
@@ -3614,6 +3614,31 @@
}
===
+$next
+---
+struct Foo:
+ $next [+0] UInt x
+---
+{
+ "type": [
+ {
+ "structure": {
+ "field": [
+ {
+ "location": {
+ "start": {
+ "builtin_reference": { "source_name": [ { "text": "$next" } ] }
+ }
+ },
+ "name": { "name": { "text": "x" } }
+ }
+ ]
+ }
+ }
+ ]
+}
+
+===
virtual fields
---
struct Foo:
diff --git a/compiler/front_end/synthetics.py b/compiler/front_end/synthetics.py
index a2296e3..aebc7e5 100644
--- a/compiler/front_end/synthetics.py
+++ b/compiler/front_end/synthetics.py
@@ -15,6 +15,7 @@
"""Adds auto-generated virtual fields to the IR."""
from compiler.front_end import attributes
+from compiler.util import error
from compiler.util import expression_parser
from compiler.util import ir_pb2
from compiler.util import ir_util
@@ -215,23 +216,118 @@
structure.field.extend([size_field])
+# The replacement for the "$next" keyword is a simple "start + size" expression.
+# 'x' and 'y' are placeholders, to be replaced.
+_NEXT_KEYWORD_REPLACEMENT_EXPRESSION = expression_parser.parse("x + y")
+
+
+def _maybe_replace_next_keyword_in_expression(expression, last_location,
+ source_file_name, errors):
+ if not expression.HasField("builtin_reference"):
+ return
+ if expression.builtin_reference.canonical_name.object_path[0] != "$next":
+ return
+ if not last_location:
+ errors.append([
+ error.error(source_file_name, expression.source_location,
+ "`$next` may not be used in the first physical field of a " +
+ "structure; perhaps you meant `0`?")
+ ])
+ return
+ original_location = expression.source_location
+ expression.CopyFrom(_NEXT_KEYWORD_REPLACEMENT_EXPRESSION)
+ expression.function.args[0].CopyFrom(last_location.start)
+ expression.function.args[1].CopyFrom(last_location.size)
+ expression.source_location.CopyFrom(original_location)
+ # DO NOT SUBMIT: Error messages for circular references?
+ _mark_as_synthetic(expression.function)
+
+
+def _check_for_bad_next_keyword_in_size(expression, source_file_name, errors):
+ if not expression.HasField("builtin_reference"):
+ return
+ if expression.builtin_reference.canonical_name.object_path[0] != "$next":
+ return
+ errors.append([
+ error.error(source_file_name, expression.source_location,
+ "`$next` may only be used in the start expression of a " +
+ "physical field.")
+ ])
+
+
+def _replace_next_keyword(structure, source_file_name, errors):
+ last_physical_field_location = None
+ new_errors = []
+ for field in structure.field:
+ if ir_util.field_is_virtual(field):
+ # TODO(bolms): It could be useful to allow `$next` in a virtual field, in
+ # order to reuse the value (say, to allow overlapping fields in a
+ # mostly-packed structure), but it seems better to add `$end_of(field)`,
+ # `$offset_of(field)`, and `$size_of(field)` constructs of some sort,
+ # instead.
+ continue
+ traverse_ir.fast_traverse_node_top_down(
+ field.location.size, [ir_pb2.Expression],
+ _check_for_bad_next_keyword_in_size,
+ parameters={
+ "errors": new_errors,
+ "source_file_name": source_file_name,
+ })
+ # If `$next` is misused in a field size, it can end up causing a
+ # `RecursionError` in fast_traverse_node_top_down. (When the `$next` node
+ # in the next field is replaced, its replacement gets traversed, but the
+ # replacement also contains a `$next` node, leading to infinite recursion.)
+ #
+ # Technically, we could scan all of the sizes instead of bailing early, but
+ # it seems relatively unlikely that someone will have `$next` in multiple
+ # sizes and not figure out what is going on relatively quickly.
+ if new_errors:
+ errors.extend(new_errors)
+ return
+ traverse_ir.fast_traverse_node_top_down(
+ field.location.start, [ir_pb2.Expression],
+ _maybe_replace_next_keyword_in_expression,
+ parameters={
+ "last_location": last_physical_field_location,
+ "errors": new_errors,
+ "source_file_name": source_file_name,
+ })
+ # The only possible error from _maybe_replace_next_keyword_in_expression is
+ # `$next` occurring in the start expression of the first physical field,
+ # which leads to similar recursion issue if `$next` is used in the start
+ # expression of the next physical field.
+ if new_errors:
+ errors.extend(new_errors)
+ return
+ last_physical_field_location = field.location
+
+
def _add_virtuals_to_structure(structure, type_definition):
_add_anonymous_aliases(structure, type_definition)
_add_size_virtuals(structure, type_definition)
_add_size_bound_virtuals(structure, type_definition)
-def synthesize_fields(ir):
- """Adds synthetic fields to all structures.
+def desugar(ir):
+ """Translates pure syntactic sugar to its desugared form.
+
+ Replaces `$then` symbols with the start+length of the previous physical
+ field.
Adds aliases for all fields in anonymous `bits` to the enclosing structure.
Arguments:
- ir: The IR to which to add fields.
+ ir: The IR to desugar.
Returns:
A list of errors, or an empty list.
"""
+ errors = []
+ traverse_ir.fast_traverse_ir_top_down(
+ ir, [ir_pb2.Structure], _replace_next_keyword,
+ parameters={"errors": errors})
+ if errors:
+ return errors
traverse_ir.fast_traverse_ir_top_down(
ir, [ir_pb2.Structure], _add_virtuals_to_structure)
return []
diff --git a/compiler/front_end/synthetics_test.py b/compiler/front_end/synthetics_test.py
index 3d3f9a5..904b6cf 100644
--- a/compiler/front_end/synthetics_test.py
+++ b/compiler/front_end/synthetics_test.py
@@ -18,6 +18,7 @@
from compiler.front_end import glue
from compiler.front_end import synthetics
from compiler.front_end import test_util
+from compiler.util import error
from compiler.util import ir_pb2
@@ -36,7 +37,7 @@
ir, unused_debug_info, errors = glue.parse_emboss_file(
"m.emb",
test_util.dict_file_reader({"m.emb": emb_text}),
- stop_before_step="synthesize_fields")
+ stop_before_step="desugar")
assert not errors, errors
return ir
@@ -44,7 +45,7 @@
ir = self._make_ir("struct Foo:\n"
" 0 [+1] UInt x\n"
" 1 [+1] UInt:8[] y\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
def test_adds_anonymous_bits_fields(self):
ir = self._make_ir("struct Foo:\n"
@@ -57,7 +58,7 @@
" BAR = 0\n"
"bits Bits:\n"
" 0 [+4] UInt uint\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
structure = ir.module[0].type[0].structure
# The first field should be the anonymous bits structure.
self.assertTrue(structure.field[0].HasField("location"))
@@ -73,7 +74,7 @@
ir = self._make_ir("struct Foo:\n"
" 0 [+1] bits:\n"
" 0 [+4] UInt bar\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
bits_field = ir.module[0].type[0].structure.field[0]
alias_field = ir.module[0].type[0].structure.field[1]
self.assertEqual("bar", alias_field.name.name.text)
@@ -99,7 +100,7 @@
ir = self._make_ir("struct Foo:\n"
" 0 [+1] bits:\n"
" 0 [+4] UInt bar\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
bits_field = ir.module[0].type[0].structure.field[0]
alias_field = ir.module[0].type[0].structure.field[1]
self.assertEqual("bar", alias_field.name.name.text)
@@ -115,7 +116,7 @@
" 0 [+1] bits:\n"
" 0 [+4] UInt bar\n"
" 4 [+4] UInt baz (qux)\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
bar_alias = ir.module[0].type[0].structure.field[1]
baz_alias = ir.module[0].type[0].structure.field[2]
self.assertFalse(bar_alias.HasField("abbreviation"))
@@ -125,7 +126,7 @@
ir = self._make_ir("struct Foo:\n"
" 0 [+1] bits:\n"
" 0 [+4] UInt bar (b)\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
bits_field = ir.module[0].type[0].subtype[0].structure.field[0]
alias_field = ir.module[0].type[0].structure.field[1]
self.assertFalse(alias_field.name.source_location.is_synthetic)
@@ -148,7 +149,7 @@
ir = self._make_ir("struct Foo:\n"
" 0 [+1] bits:\n"
" 0 [+4] UInt bar (b)\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
bits_field = ir.module[0].type[0].structure.field[0]
text_output_attribute = self._find_attribute(bits_field, "text_output")
self.assertEqual("Skip", text_output_attribute.value.string_constant.text)
@@ -157,7 +158,7 @@
ir = self._make_ir("struct Foo:\n"
" 0 [+1] bits:\n"
" 0 [+4] UInt bar\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
bits_field = ir.module[0].type[0].structure.field[0]
attribute = self._find_attribute(bits_field, "text_output")
self.assertTrue(attribute.source_location.is_synthetic)
@@ -170,7 +171,7 @@
ir = self._make_ir("struct Foo:\n"
" 1 [+l] UInt:8[] bytes\n"
" 0 [+1] UInt length (l)\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
structure = ir.module[0].type[0].structure
size_in_bytes_field = structure.field[2]
max_size_in_bytes_field = structure.field[3]
@@ -193,7 +194,7 @@
ir = self._make_ir("bits Foo:\n"
" 1 [+9] UInt hi\n"
" 0 [+1] Flag lo\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
structure = ir.module[0].type[0].structure
size_in_bits_field = structure.field[2]
max_size_in_bits_field = structure.field[3]
@@ -216,13 +217,53 @@
ir = self._make_ir("struct Foo:\n"
" 1 [+l] UInt:8[] bytes\n"
" 0 [+1] UInt length (l)\n")
- self.assertEqual([], synthetics.synthesize_fields(ir))
+ self.assertEqual([], synthetics.desugar(ir))
size_in_bytes_field = ir.module[0].type[0].structure.field[2]
self.assertEqual("$size_in_bytes", size_in_bytes_field.name.name.text)
text_output_attribute = self._find_attribute(size_in_bytes_field,
"text_output")
self.assertEqual("Skip", text_output_attribute.value.string_constant.text)
+ def test_replaces_next(self):
+ ir = self._make_ir("struct Foo:\n"
+ " 1 [+2] UInt:8[] a\n"
+ " $next [+4] UInt b\n"
+ " $next [+1] UInt c\n")
+ self.assertEqual([], synthetics.desugar(ir))
+ offset_of_b = ir.module[0].type[0].structure.field[1].location.start
+ self.assertTrue(offset_of_b.HasField("function"))
+ self.assertEqual(offset_of_b.function.function, ir_pb2.Function.ADDITION)
+ self.assertEqual(offset_of_b.function.args[0].constant.value, "1")
+ self.assertEqual(offset_of_b.function.args[1].constant.value, "2")
+ offset_of_c = ir.module[0].type[0].structure.field[2].location.start
+ self.assertEqual(
+ offset_of_c.function.args[0].function.args[0].constant.value, "1")
+ self.assertEqual(
+ offset_of_c.function.args[0].function.args[1].constant.value, "2")
+ self.assertEqual(offset_of_c.function.args[1].constant.value, "4")
+
+ def test_next_in_first_field(self):
+ ir = self._make_ir("struct Foo:\n"
+ " $next [+2] UInt:8[] a\n"
+ " $next [+4] UInt b\n")
+ struct = ir.module[0].type[0].structure
+ self.assertEqual([[
+ error.error("m.emb", struct.field[0].location.start.source_location,
+ "`$next` may not be used in the first physical field of " +
+ "a structure; perhaps you meant `0`?"),
+ ]], synthetics.desugar(ir))
+
+ def test_next_in_size(self):
+ ir = self._make_ir("struct Foo:\n"
+ " 0 [+2] UInt:8[] a\n"
+ " 1 [+$next] UInt b\n")
+ struct = ir.module[0].type[0].structure
+ self.assertEqual([[
+ error.error("m.emb", struct.field[1].location.size.source_location,
+ "`$next` may only be used in the start expression of a " +
+ "physical field."),
+ ]], synthetics.desugar(ir))
+
if __name__ == "__main__":
unittest.main()
diff --git a/compiler/front_end/tokenizer.py b/compiler/front_end/tokenizer.py
index 4262371..752371f 100644
--- a/compiler/front_end/tokenizer.py
+++ b/compiler/front_end/tokenizer.py
@@ -122,7 +122,7 @@
LITERAL_TOKEN_PATTERNS = (
"[ ] ( ) : = + - * . ? == != && || < > <= >= , "
"$static_size_in_bits $is_statically_sized "
- "$max $present $upper_bound $lower_bound "
+ "$max $present $upper_bound $lower_bound $next "
"$size_in_bits $size_in_bytes "
"$max_size_in_bits $max_size_in_bytes $min_size_in_bits $min_size_in_bytes "
"$default struct bits enum external import as if let").split()
diff --git a/compiler/util/traverse_ir.py b/compiler/util/traverse_ir.py
index f0c10d0..ce2c94f 100644
--- a/compiler/util/traverse_ir.py
+++ b/compiler/util/traverse_ir.py
@@ -281,3 +281,31 @@
all_incidental_actions.setdefault(key, []).extend(incidental_action)
_fast_traverse_proto_top_down(ir, all_incidental_actions, pattern,
skip_descendants_of, action, parameters or {})
+
+
+def fast_traverse_node_top_down(node, pattern, action, incidental_actions=None,
+ skip_descendants_of=(), parameters=None):
+ """Traverse a subtree of an IR, executing the given actions.
+
+ fast_traverse_node_top_down is like fast_traverse_ir_top_down, except that:
+
+ It may be called on a subtree, instead of the top of the IR.
+
+ It does not have any built-in incidental actions.
+
+ Arguments:
+ ir: An ir_pb2.Ir object to walk.
+ pattern: A list of node types to match.
+ action: A callable, which will be called on nodes matching `pattern`.
+ incidental_actions: A dict of node types to callables, which can be used to
+ set new parameters for `action` for part of the IR tree.
+ skip_descendants_of: A list of types whose children should be skipped when
+ traversing `ir`.
+ parameters: A list of top-level parameters.
+
+ Returns:
+ None
+ """
+ _fast_traverse_proto_top_down(node, incidental_actions or {}, pattern,
+ skip_descendants_of or {}, action,
+ parameters or {})
diff --git a/doc/design_docs/field_packing_notation.md b/doc/design_docs/archive/next_keyword.md
similarity index 95%
rename from doc/design_docs/field_packing_notation.md
rename to doc/design_docs/archive/next_keyword.md
index 77ae16e..2f4c554 100644
--- a/doc/design_docs/field_packing_notation.md
+++ b/doc/design_docs/archive/next_keyword.md
@@ -1,5 +1,9 @@
# Design Sketch: Packed Field Notation
+This document is provided for historical interest. This feature is now
+implemented in the form of the `$next` keyword.
+
+
## Motivation
Many structures have many or most fields laid out consecutively, possibly with
diff --git a/doc/grammar.md b/doc/grammar.md
index 7bc99fa..a53abe6 100644
--- a/doc/grammar.md
+++ b/doc/grammar.md
@@ -125,6 +125,7 @@
constant-word -> ShoutyWord
builtin-reference -> builtin-word
builtin-word -> "$is_statically_sized"
+ | "$next"
| "$static_size_in_bits"
boolean-constant -> BooleanConstant
and-expression -> comparison-expression
@@ -349,6 +350,7 @@
`\$present` | `"$present"`
`\$upper_bound` | `"$upper_bound"`
`\$lower_bound` | `"$lower_bound"`
+`\$next` | `"$next"`
`\$size_in_bits` | `"$size_in_bits"`
`\$size_in_bytes` | `"$size_in_bytes"`
`\$max_size_in_bits` | `"$max_size_in_bits"`
diff --git a/doc/guide.md b/doc/guide.md
index d52ebd5..a0b0b7a 100644
--- a/doc/guide.md
+++ b/doc/guide.md
@@ -1,7 +1,5 @@
# Emboss User Guide
-[TOC]
-
## Getting Started
@@ -26,16 +24,41 @@
#### Running the Emboss Compiler
-The Emboss compiler requires Python 3.6 or later.
+The Emboss compiler requires Python 3.6 or later. On a Linux-like system with
+Python 3 installed in the usual place (`/usr/bin/python3`), you can run the
+embossc script at the top level on an `.emb` file to generate C++, like so:
+
+```
+embossc --generate cc --output-path path/to/object/dir path/to/input.emb
+```
+
+If your project is using Bazel, the `build_defs.bzl` file has an
+`emboss_cc_library` rule that you can use from your project.
-#### Using the Generated Code
+#### Using the Generated C++ Code
The code generated by Emboss requires a C++11-compliant compiler, and a
reasonably up-to-date standard library. Emboss has been tested with GCC and
Clang, libc++ and libstd++. In theory, it should work with MSVC, ICC, etc., but
it has not been tested, so there are likely to be bugs.
+The generated C++ code lives entirely in a `.h` file, one per `.emb` file. All
+of the generated code is in C++ templates or (in a very few cases) `inline`
+functions. The generated code is structured this way in order to implement
+"pay-as-you-use" for code size: any functions, methods, or views that are not
+used by your code won't end up in your final binary. This is often important
+for environments like microcontrollers!
+
+There is an Emboss runtime library (under `runtime/cpp`), which is also
+header-only. You will need to add the root of the Emboss source tree to your
+`#include` path.
+
+Note that it is *strongly* recommended that you compile your release code with
+at least some optimizations: `-Os` or `-O2`. The Emboss generated code leans
+fairly heavily on your C++ compiler's inlining and common code elimination to
+produce fast, lean compiled code.
+
#### Contributing to the Compiler
@@ -455,6 +478,6 @@
### Use the `.emb` Autoformatter
You can use the `.emb` autoformatter to avoid manual formatting. For now, it is
-available at `front_end/format.py`.
+available at `compiler/front_end/format.py`.
*TODO(bolms): Package the Emboss tools for easy workstation installation.*
diff --git a/doc/language-reference.md b/doc/language-reference.md
index 3a07bce..6084421 100644
--- a/doc/language-reference.md
+++ b/doc/language-reference.md
@@ -1,7 +1,5 @@
# Emboss Language Reference
-[TOC]
-
## Top Level Structure
An `.emb` file contains four sections: a documentation block, imports, an
@@ -328,6 +326,48 @@
Emboss construct.
+#### `$next`
+
+The keyword `$next` may be used in the offset expression of a physical field:
+
+```
+struct Foo:
+ 0 [+4] UInt x
+ $next [+2] UInt y
+ $next [+1] UInt z
+ $next [+4] UInt q
+```
+
+`$next` translates to a built-in constant meaning "the end of the previous
+physical field." In the example above, `y` will start at offset 4 (0 + 4), `z`
+starts at offset 6 (4 + 2), and `q` at 7 (6 + 1).
+
+`$next` may be used in `bits` as well as `struct`s:
+
+```
+bits Bar:
+ 0 [+4] UInt x
+ $next [+2] UInt y
+ $next [+1] UInt z
+ $next [+4] UInt q
+```
+
+You may use `$next` like a regular variable. For example, if you want to leave
+a two-byte gap between `z` and `q` (so that `q` starts at offset 9):
+
+```
+struct Foo:
+ 0 [+4] UInt x
+ $next [+2] UInt y
+ $next [+1] UInt z
+ $next+2 [+4] UInt q
+```
+
+`$next` is particularly useful if your datasheet defines structures as lists of
+fields without offsets, or if you are translating from a C or C++ packed
+`struct`.
+
+
#### Parameters
`struct`s and `bits` can take runtime parameters:
diff --git a/testdata/BUILD b/testdata/BUILD
index 1791d66..9c68b22 100644
--- a/testdata/BUILD
+++ b/testdata/BUILD
@@ -51,6 +51,7 @@
"int_sizes.emb",
"nested_structure.emb",
"no_cpp_namespace.emb",
+ "next_keyword.emb",
"parameters.emb",
"requires.emb",
"subtypes.emb",
@@ -286,6 +287,13 @@
)
emboss_cc_library(
+ name = "next_keyword_emboss",
+ srcs = [
+ "next_keyword.emb",
+ ],
+)
+
+emboss_cc_library(
name = "complex_offset_emboss",
srcs = [
"complex_offset.emb",
diff --git a/testdata/format/fields_aligned.emb b/testdata/format/fields_aligned.emb
index 2a1ccac..2a82485 100644
--- a/testdata/format/fields_aligned.emb
+++ b/testdata/format/fields_aligned.emb
@@ -18,3 +18,4 @@
0 [+1] UInt short
10 [+1] UInt:8 medium
1_000_000 [+8] UInt:64 long
+ $next [+2] UInt packed
diff --git a/testdata/format/fields_aligned.emb.formatted b/testdata/format/fields_aligned.emb.formatted
index ae2f28e..d907b28 100644
--- a/testdata/format/fields_aligned.emb.formatted
+++ b/testdata/format/fields_aligned.emb.formatted
@@ -19,3 +19,4 @@
0 [+1] UInt short
10 [+1] UInt:8 medium
1_000_000 [+8] UInt:64 long
+ $next [+2] UInt packed
diff --git a/testdata/format/fields_aligned.emb.formatted_indent_4 b/testdata/format/fields_aligned.emb.formatted_indent_4
index fd14ce2..814335f 100644
--- a/testdata/format/fields_aligned.emb.formatted_indent_4
+++ b/testdata/format/fields_aligned.emb.formatted_indent_4
@@ -19,3 +19,4 @@
0 [+1] UInt short
10 [+1] UInt:8 medium
1_000_000 [+8] UInt:64 long
+ $next [+2] UInt packed
diff --git a/testdata/next_keyword.emb b/testdata/next_keyword.emb
new file mode 100644
index 0000000..7c0a91a
--- /dev/null
+++ b/testdata/next_keyword.emb
@@ -0,0 +1,25 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+-- Test structures for the `$next` keyword.
+
+[$default byte_order: "LittleEndian"]
+[(cpp) namespace: "emboss::test"]
+
+
+struct NextKeyword:
+ 0 [+4] UInt value32
+ $next [+2] UInt value16
+ $next [+1] UInt value8
+ $next+3 [+1] UInt value8_offset