blob: ec5a662fd641148464b7a0962e41f02dc13dade6 [file] [log] [blame]
// Copyright 2024 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.
// Program hpack_gen generates a C++ file to help parse and emit HPACK.
package main
import (
"fmt"
)
func main() {
var statsBefore, statsAfter Stats
buildTree()
validate(&rootNode)
computeStats(&statsBefore, &rootNode)
optimize(&rootNode)
validate(&rootNode)
computeStats(&statsAfter, &rootNode)
assignTableIndices(&rootNode, 0)
fmt.Println("// Copyright 2024 The Pigweed Authors")
fmt.Println("//")
fmt.Println("// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not")
fmt.Println("// use this file except in compliance with the License. You may obtain a copy of")
fmt.Println("// the License at")
fmt.Println("//")
fmt.Println("// https://www.apache.org/licenses/LICENSE-2.0")
fmt.Println("//")
fmt.Println("// Unless required by applicable law or agreed to in writing, software")
fmt.Println("// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT")
fmt.Println("// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the")
fmt.Println("// License for the specific language governing permissions and limitations under")
fmt.Println("// the License.")
fmt.Println("//")
fmt.Println("// AUTOGENERATED FILE, DO NOT EDIT")
fmt.Println("//")
fmt.Println("// To regenerate, run `go run gen/hpack_gen.go > hpack.autogen.inc`.")
fmt.Println("// This file should be included in exactly one *.cc file.")
fmt.Println("//")
fmt.Println()
fmt.Println("// clang-format off")
printDecoderTable(&statsAfter)
fmt.Println()
fmt.Printf("// Decoder table stats:\n")
fmt.Printf("// before optimization = %+v\n", statsBefore)
fmt.Printf("// after optimization = %+v\n", statsAfter)
fmt.Println()
printStaticResponses()
}
type NodeType int
const (
BranchNode NodeType = iota
OutputNode
UnprintableNode
EOSNode
)
type Node struct {
t NodeType
output int // if t == OutputNode, byte to output
tableIndex int // if t == BranchNode, index in the decoder table
child [2]*Node
}
var rootNode = Node{t: BranchNode}
type Stats struct {
numBranchNodes int
numOutputNodes int
numUnprintableNodes int
numInvalidNodes int
}
func buildTree() {
for out, code := range huffmanTable {
updateTree(out, code)
}
}
func updateTree(out int, code string) {
if len(code) == 0 {
panic(fmt.Sprintf("code for byte %v is empty", out))
}
curr := &rootNode
for i := 0; i < len(code); i++ {
if code[i] != '0' && code[i] != '1' {
panic(fmt.Sprintf("code for byte %v has invalid bit '%v'", out, code[i]))
}
bit := code[i] - '0'
// Middle of the code: create a branch node if needed.
if i < len(code)-1 {
next := curr.child[bit]
if next == nil {
next = &Node{t: BranchNode}
curr.child[bit] = next
} else if next.t != BranchNode {
panic(fmt.Sprintf("code for byte %v found non-branch node at bit number %v", out, i))
}
curr = next
continue
}
// End of the code: create a leaf node.
// Each code should lead to a unique leaf node.
if curr.child[bit] != nil {
panic(fmt.Sprintf("code for byte %v reached a duplicate leaf node", out))
}
switch {
case out == EOS:
curr.child[bit] = &Node{t: EOSNode}
case out < 32 || 127 < out:
// Unprintable characters are allowed by the Huffman code but not in gRPC.
// See: https://github.com/grpc/grpc/blob/v1.60.x/doc/PROTOCOL-HTTP2.md#requests.
curr.child[bit] = &Node{t: UnprintableNode}
default:
curr.child[bit] = &Node{t: OutputNode, output: out}
}
}
}
func validate(node *Node) {
// The binary tree should be complete: there should not be missing leaf nodes.
if node == nil {
panic("unexpected nil in binary tree")
}
if node.t != BranchNode {
if node.child[0] != nil || node.child[1] != nil {
panic(fmt.Sprintf("unexpected child under node %+v", node))
}
return
}
validate(node.child[0])
validate(node.child[1])
}
func optimize(node *Node) {
if node.t != BranchNode {
return
}
optimize(node.child[0])
optimize(node.child[1])
// Collapse each unprintable subtree into a single unprintable node.
if node.child[0].t == UnprintableNode && node.child[1].t == UnprintableNode {
node.t = UnprintableNode
node.child[0] = nil
node.child[1] = nil
}
}
func computeStats(stats *Stats, node *Node) {
if node == nil {
stats.numInvalidNodes++
return
}
switch node.t {
case BranchNode:
stats.numBranchNodes++
computeStats(stats, node.child[0])
computeStats(stats, node.child[1])
case OutputNode:
stats.numOutputNodes++
case UnprintableNode:
stats.numUnprintableNodes++
case EOSNode:
stats.numInvalidNodes++
}
}
func assignTableIndices(node *Node, idx int) int {
if node.t != BranchNode {
return idx
}
node.tableIndex = idx
idx++
// We use a preorder traversal to ensure the root node has index 0.
idx = assignTableIndices(node.child[0], idx)
idx = assignTableIndices(node.child[1], idx)
return idx
}
const decoderTablePrefix = `
// Huffman decoder table. Decoding starts at index=0. For each bit, we inspect
// kHuffmanDecoderTable[index][bit] and take an action based on that value:
//
// * If the value matches 0b0..._...., set index=value
// * If the value matches 0b1xxx_xxxx, output byte 32 + xx_xxxx, set index=0
// * If the value matches 0b1111_1110, fail: unprintable character
// * If the value matches 0b1111_1111, fail: decoder entered an invalid state
//
static constexpr std::array<uint8_t[2], %d> kHuffmanDecoderTable = {{
`
const decoderTableSuffix = `
}};
`
func printDecoderTable(stats *Stats) {
fmt.Printf(decoderTablePrefix, stats.numBranchNodes)
printDecoderTableEntries(&rootNode)
fmt.Print(decoderTableSuffix)
}
func printDecoderTableEntries(node *Node) {
if node.t != BranchNode {
return
}
// Print nodes in preorder, which is the same order the indices were created.
fmt.Printf(" /*%v=*/ {%v, %v},\n",
node.tableIndex,
toDecoderTableEntry(node.child[0]),
toDecoderTableEntry(node.child[1]))
printDecoderTableEntries(node.child[0])
printDecoderTableEntries(node.child[1])
}
func toDecoderTableEntry(node *Node) uint8 {
switch node.t {
case BranchNode:
if node.tableIndex > 127 {
panic(fmt.Sprintf("BranchNode index %d > 127", node.tableIndex))
}
return uint8(node.tableIndex)
case OutputNode:
return uint8(node.output-32) | 0b1000_0000
case UnprintableNode:
return 0b1111_1110
case EOSNode:
// RFC 7541 ยง5.2: "A Huffman-encoded string literal containing the EOS
// symbol MUST be treated as a decoding error."
return 0b1111_1111
}
panic(fmt.Sprintf("unexpected node type %v", node.t))
}
const responseHeaderFieldsPrefix = `
// HPACK-encoded header fields which form grpc Response-Headers.
// These are the same for every response.
static constexpr std::array<uint8_t, %d> kResponseHeaderFields = {
`
const responseHeaderFieldsSuffix = `
};
`
const maxStatusCode = 16
const responseTrailerFieldsPrefix = `
// HPACK-encoded header fields which form grpc Trailers. All response Trailers
// are identical except for the status code.
//
// This is indexed by pw::Status::Code, which happens to be identical to grpc's
// status code.
struct ResponseTrailerPayload {
uint32_t size;
std::array<uint8_t, %d> bytes;
};
static constexpr std::array<ResponseTrailerPayload, %d> kResponseTrailerFields = {{
`
const responseTrailerFieldsSuffix = `
}};
`
func printStaticResponses() {
respHeaderFields := buildResponseHeaderFields()
fmt.Printf(responseHeaderFieldsPrefix, len(respHeaderFields))
fmt.Printf(" %s", fmtBytes(respHeaderFields))
fmt.Printf("%s", responseHeaderFieldsSuffix)
maxLen := len(buildResponseTrailerFields(maxStatusCode))
fmt.Printf(responseTrailerFieldsPrefix, maxLen, maxStatusCode+1)
for code := 0; code <= maxStatusCode; code++ {
payload := buildResponseTrailerFields(code)
fmt.Printf(" {.size=%d, .bytes={%s}},\n", len(payload), fmtBytes(payload))
}
fmt.Printf("%s", responseTrailerFieldsSuffix)
}
func fmtBytes(bytes []byte) string {
var out string
for i, b := range bytes {
if i < len(bytes)-1 {
out += fmt.Sprintf("0x%x, ", b)
} else {
out += fmt.Sprintf("0x%x", b)
}
}
return out
}
// Response-Headers โ†’ ":status 200" "content-type application/grpc"
func buildResponseHeaderFields() []byte {
var out []byte
out = append(out, 0b1000_1000) // RFC 7541 ยง6.1: index 8 is ":status" "200"
out = append(out, 0b0101_1111) // RFC 7541 ยง6.2.1: index 31 is "content-type"
out = append(out, literalEncodeWithLength("application/grpc")...)
return out
}
// Trailers โ†’ "grpc-status <DIGITS>"
func buildResponseTrailerFields(statusCode int) []byte {
var out []byte
out = append(out, 0b0100_0000) // RFC 7541 ยง6.2.1: literal name and value
out = append(out, literalEncodeWithLength("grpc-status")...)
if statusCode >= 10 {
out = append(out, 0b0000_0010) // RFC 7541 ยง6.2.1: length of value w/out huffman
out = append(out, byte(statusCode/10)+'0') // RFC 7541 ยง6.2.1: value
out = append(out, byte(statusCode%10)+'0')
} else {
out = append(out, 0b0000_0001) // RFC 7541 ยง6.2.1: length of value w/out huffman
out = append(out, byte(statusCode)+'0') // RFC 7541 ยง6.2.1: value
}
return out
}
func literalEncodeWithLength(str string) []byte {
// RFC 7541 ยง6.2.1: we are generating this structure with H=0. Since the
// strings are short, there is minimal benefit to using a Huffman encoding,
// at most 2-3 bytes per string.
// +---+---+-----------------------+
// | H | Value Length (7+) |
// +---+---------------------------+
// | Value String (Length octets) |
// +-------------------------------+
//
// The length must be encoded across multiple bytes if 128 or larger. See RFC
// 7541 ยง5.2. We don't write any strings that large.
if len(str) >= 128 {
panic(fmt.Sprintf("cannot encode string of length %v: %#v", len(str), str))
}
return append([]byte{byte(len(str))}, str...)
}
// Special symbol for Huffman EOS.
// See RFC 7541 Appendix B.
const EOS = 256
// Mapping from symbol to Huffman code.
// Taken verbatim from RFC 7541 Appendix B.
var huffmanTable = map[int]string{
0: "1111111111000",
1: "11111111111111111011000",
2: "1111111111111111111111100010",
3: "1111111111111111111111100011",
4: "1111111111111111111111100100",
5: "1111111111111111111111100101",
6: "1111111111111111111111100110",
7: "1111111111111111111111100111",
8: "1111111111111111111111101000",
9: "111111111111111111101010",
10: "111111111111111111111111111100",
11: "1111111111111111111111101001",
12: "1111111111111111111111101010",
13: "111111111111111111111111111101",
14: "1111111111111111111111101011",
15: "1111111111111111111111101100",
16: "1111111111111111111111101101",
17: "1111111111111111111111101110",
18: "1111111111111111111111101111",
19: "1111111111111111111111110000",
20: "1111111111111111111111110001",
21: "1111111111111111111111110010",
22: "111111111111111111111111111110",
23: "1111111111111111111111110011",
24: "1111111111111111111111110100",
25: "1111111111111111111111110101",
26: "1111111111111111111111110110",
27: "1111111111111111111111110111",
28: "1111111111111111111111111000",
29: "1111111111111111111111111001",
30: "1111111111111111111111111010",
31: "1111111111111111111111111011",
32: "010100",
33: "1111111000",
34: "1111111001",
35: "111111111010",
36: "1111111111001",
37: "010101",
38: "11111000",
39: "11111111010",
40: "1111111010",
41: "1111111011",
42: "11111001",
43: "11111111011",
44: "11111010",
45: "010110",
46: "010111",
47: "011000",
48: "00000",
49: "00001",
50: "00010",
51: "011001",
52: "011010",
53: "011011",
54: "011100",
55: "011101",
56: "011110",
57: "011111",
58: "1011100",
59: "11111011",
60: "111111111111100",
61: "100000",
62: "111111111011",
63: "1111111100",
64: "1111111111010",
65: "100001",
66: "1011101",
67: "1011110",
68: "1011111",
69: "1100000",
70: "1100001",
71: "1100010",
72: "1100011",
73: "1100100",
74: "1100101",
75: "1100110",
76: "1100111",
77: "1101000",
78: "1101001",
79: "1101010",
80: "1101011",
81: "1101100",
82: "1101101",
83: "1101110",
84: "1101111",
85: "1110000",
86: "1110001",
87: "1110010",
88: "11111100",
89: "1110011",
90: "11111101",
91: "1111111111011",
92: "1111111111111110000",
93: "1111111111100",
94: "11111111111100",
95: "100010",
96: "111111111111101",
97: "00011",
98: "100011",
99: "00100",
100: "100100",
101: "00101",
102: "100101",
103: "100110",
104: "100111",
105: "00110",
106: "1110100",
107: "1110101",
108: "101000",
109: "101001",
110: "101010",
111: "00111",
112: "101011",
113: "1110110",
114: "101100",
115: "01000",
116: "01001",
117: "101101",
118: "1110111",
119: "1111000",
120: "1111001",
121: "1111010",
122: "1111011",
123: "111111111111110",
124: "11111111100",
125: "11111111111101",
126: "1111111111101",
127: "1111111111111111111111111100",
128: "11111111111111100110",
129: "1111111111111111010010",
130: "11111111111111100111",
131: "11111111111111101000",
132: "1111111111111111010011",
133: "1111111111111111010100",
134: "1111111111111111010101",
135: "11111111111111111011001",
136: "1111111111111111010110",
137: "11111111111111111011010",
138: "11111111111111111011011",
139: "11111111111111111011100",
140: "11111111111111111011101",
141: "11111111111111111011110",
142: "111111111111111111101011",
143: "11111111111111111011111",
144: "111111111111111111101100",
145: "111111111111111111101101",
146: "1111111111111111010111",
147: "11111111111111111100000",
148: "111111111111111111101110",
149: "11111111111111111100001",
150: "11111111111111111100010",
151: "11111111111111111100011",
152: "11111111111111111100100",
153: "111111111111111011100",
154: "1111111111111111011000",
155: "11111111111111111100101",
156: "1111111111111111011001",
157: "11111111111111111100110",
158: "11111111111111111100111",
159: "111111111111111111101111",
160: "1111111111111111011010",
161: "111111111111111011101",
162: "11111111111111101001",
163: "1111111111111111011011",
164: "1111111111111111011100",
165: "11111111111111111101000",
166: "11111111111111111101001",
167: "111111111111111011110",
168: "11111111111111111101010",
169: "1111111111111111011101",
170: "1111111111111111011110",
171: "111111111111111111110000",
172: "111111111111111011111",
173: "1111111111111111011111",
174: "11111111111111111101011",
175: "11111111111111111101100",
176: "111111111111111100000",
177: "111111111111111100001",
178: "1111111111111111100000",
179: "111111111111111100010",
180: "11111111111111111101101",
181: "1111111111111111100001",
182: "11111111111111111101110",
183: "11111111111111111101111",
184: "11111111111111101010",
185: "1111111111111111100010",
186: "1111111111111111100011",
187: "1111111111111111100100",
188: "11111111111111111110000",
189: "1111111111111111100101",
190: "1111111111111111100110",
191: "11111111111111111110001",
192: "11111111111111111111100000",
193: "11111111111111111111100001",
194: "11111111111111101011",
195: "1111111111111110001",
196: "1111111111111111100111",
197: "11111111111111111110010",
198: "1111111111111111101000",
199: "1111111111111111111101100",
200: "11111111111111111111100010",
201: "11111111111111111111100011",
202: "11111111111111111111100100",
203: "111111111111111111111011110",
204: "111111111111111111111011111",
205: "11111111111111111111100101",
206: "111111111111111111110001",
207: "1111111111111111111101101",
208: "1111111111111110010",
209: "111111111111111100011",
210: "11111111111111111111100110",
211: "111111111111111111111100000",
212: "111111111111111111111100001",
213: "11111111111111111111100111",
214: "111111111111111111111100010",
215: "111111111111111111110010",
216: "111111111111111100100",
217: "111111111111111100101",
218: "11111111111111111111101000",
219: "11111111111111111111101001",
220: "1111111111111111111111111101",
221: "111111111111111111111100011",
222: "111111111111111111111100100",
223: "111111111111111111111100101",
224: "11111111111111101100",
225: "111111111111111111110011",
226: "11111111111111101101",
227: "111111111111111100110",
228: "1111111111111111101001",
229: "111111111111111100111",
230: "111111111111111101000",
231: "11111111111111111110011",
232: "1111111111111111101010",
233: "1111111111111111101011",
234: "1111111111111111111101110",
235: "1111111111111111111101111",
236: "111111111111111111110100",
237: "111111111111111111110101",
238: "11111111111111111111101010",
239: "11111111111111111110100",
240: "11111111111111111111101011",
241: "111111111111111111111100110",
242: "11111111111111111111101100",
243: "11111111111111111111101101",
244: "111111111111111111111100111",
245: "111111111111111111111101000",
246: "111111111111111111111101001",
247: "111111111111111111111101010",
248: "111111111111111111111101011",
249: "1111111111111111111111111110",
250: "111111111111111111111101100",
251: "111111111111111111111101101",
252: "111111111111111111111101110",
253: "111111111111111111111101111",
254: "111111111111111111111110000",
255: "11111111111111111111101110",
256: "111111111111111111111111111111",
}