Implement string variable interpolater

This adds a `StringSub` struct which inserts variable mappings into a
format string and updates download targets' URLs to use it.

Change-Id: Ifc0064415a2e541efbeb349006824001dd54158b
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/qg/+/125550
Commit-Queue: Alexei Frolov <frolv@google.com>
Reviewed-by: Erik Gilling <konkers@google.com>
diff --git a/qg/Cargo.toml b/qg/Cargo.toml
index cb2a8c2..f4b1338 100644
--- a/qg/Cargo.toml
+++ b/qg/Cargo.toml
@@ -17,6 +17,7 @@
 thiserror = "1.0.37"
 tokio = { version = "1.21.2", features = ["full"] }
 toml = "0.5.9"
+nom = "7.1.2"
 
 [dependencies.rustpython]
 git = "https://github.com/RustPython/RustPython"
diff --git a/qg/src/download.rs b/qg/src/download.rs
index 9f0d645..b735785 100644
--- a/qg/src/download.rs
+++ b/qg/src/download.rs
@@ -1,4 +1,4 @@
-// Copyright 2022 The Pigweed Authors
+// Copyright 2023 The Pigweed Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy of
diff --git a/qg/src/lib.rs b/qg/src/lib.rs
index ab1be63..7788e57 100644
--- a/qg/src/lib.rs
+++ b/qg/src/lib.rs
@@ -24,6 +24,7 @@
 pub mod target;
 
 mod download;
+mod util;
 
 #[doc(inline)]
 pub use target::Target;
diff --git a/qg/src/target.rs b/qg/src/target.rs
index af1b105..207f2fe 100644
--- a/qg/src/target.rs
+++ b/qg/src/target.rs
@@ -15,10 +15,8 @@
 use std::collections::{HashMap, HashSet};
 use std::path::{Path, PathBuf};
 
-use once_cell::sync::Lazy;
-use regex::Regex;
-
 use crate::project::manifest;
+use crate::util::StringSub;
 use crate::{download, platform, Error, Result};
 
 /// A source of targets.
@@ -99,7 +97,7 @@
 #[derive(Debug)]
 pub struct Download {
     pub format: download::Format,
-    pub url: String,
+    pub url: StringSub,
     pub url_parameters: HashMap<String, String>,
     pub variants: Vec<DownloadVariant>,
 }
@@ -116,11 +114,6 @@
     arch: Option<platform::Architecture>,
 }
 
-static URL_PARAMETER_NAME_REGEX: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").expect("regex is valid"));
-static URL_PARAMETER_SUB_REGEX: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r"\{(.*?)\}").expect("regex is valid"));
-
 impl TryFrom<manifest::DownloadablePackage> for Download {
     type Error = Error;
 
@@ -163,7 +156,7 @@
         // mapping have invalid names.
         let invalid_vars: Vec<_> = all_params
             .iter()
-            .filter(|&&param| !URL_PARAMETER_NAME_REGEX.is_match(param))
+            .filter(|&&param| !StringSub::valid_variable_name(param))
             .collect();
         if !invalid_vars.is_empty() {
             // TODO(frolv): Return an error containing the invalid variables.
@@ -172,30 +165,11 @@
         }
 
         // Next, check the URL string itself. Each parameter substitution
-        // defined within braces should have a valid name and exist within a
-        // provided `url_parameters` mapping.
-        let mut invalid_vars = Vec::new();
-        let mut missing_vars = Vec::new();
+        // defined within braces should exist within a provided `url_parameters`
+        // mapping.
+        let url: StringSub = value.url.parse()?;
 
-        for var in URL_PARAMETER_SUB_REGEX
-            .captures_iter(&value.url)
-            .filter_map(|cap| cap.get(1))
-        {
-            let var = var.as_str();
-            if !URL_PARAMETER_NAME_REGEX.is_match(var) {
-                invalid_vars.push(var);
-            }
-            if !all_params.contains(var) {
-                missing_vars.push(var);
-            }
-        }
-
-        if !invalid_vars.is_empty() {
-            // TODO(frolv): Return an error containing the invalid variables.
-            println!("invalid URL parameters {invalid_vars:?}");
-            return Err(Error::GenericErrorPlaceholder);
-        }
-
+        let missing_vars: Vec<_> = url.vars().filter(|&v| !all_params.contains(v)).collect();
         if !missing_vars.is_empty() {
             // TODO(frolv): Return an error containing the missing variables.
             println!("missing URL parameters {missing_vars:?}");
@@ -211,7 +185,7 @@
 
         Ok(Self {
             format,
-            url: value.url,
+            url,
             url_parameters,
             variants,
         })
diff --git a/qg/src/util.rs b/qg/src/util.rs
new file mode 100644
index 0000000..549363c
--- /dev/null
+++ b/qg/src/util.rs
@@ -0,0 +1,259 @@
+// Copyright 2023 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+use std::{collections::HashMap, str::FromStr};
+
+use nom::{
+    branch::alt,
+    bytes::complete::{is_a, is_not, tag},
+    character::complete::{alpha1, alphanumeric1},
+    combinator::{map, recognize},
+    multi::{many0_count, many1},
+    sequence::{delimited, pair},
+    IResult,
+};
+use once_cell::sync::Lazy;
+use regex::Regex;
+
+use crate::{Error, Result};
+
+#[derive(Debug)]
+enum StringFragment {
+    Literal(String),
+    Variable(String),
+    OpenBrace,
+    CloseBrace,
+}
+
+#[derive(Debug)]
+pub struct StringSub {
+    fragments: Vec<StringFragment>,
+}
+
+static VARIABLE_NAME_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_-]*$").expect("regex is valid"));
+
+/// Parser for a `StringSub` variable name. Starts with either a letter or
+/// underscore, and can contain alphanumeric characters, underscores, or hyphens.
+fn identifier(input: &str) -> IResult<&str, &str> {
+    recognize(pair(
+        alt((alpha1, tag("_"))),
+        many0_count(alt((alphanumeric1, is_a("_-")))),
+    ))(input)
+}
+
+/// Parser for a `StringSub` variable placeholder, which is an identifier
+/// enclosed in braces.
+fn variable_sub(input: &str) -> IResult<&str, StringFragment> {
+    map(delimited(tag("{"), identifier, tag("}")), |s: &str| {
+        StringFragment::Variable(s.to_owned())
+    })(input)
+}
+
+/// Parser for a `StringSub` literal string, containing any characters other
+/// than braces.
+fn string_literal(input: &str) -> IResult<&str, StringFragment> {
+    map(is_not("{}"), |s: &str| {
+        StringFragment::Literal(s.to_owned())
+    })(input)
+}
+
+fn escaped_open_brace(input: &str) -> IResult<&str, StringFragment> {
+    map(tag("{{"), |_| StringFragment::OpenBrace)(input)
+}
+
+fn escaped_close_brace(input: &str) -> IResult<&str, StringFragment> {
+    map(tag("}}"), |_| StringFragment::CloseBrace)(input)
+}
+
+impl StringSub {
+    pub fn valid_variable_name(var: &str) -> bool {
+        VARIABLE_NAME_REGEX.is_match(var)
+    }
+
+    pub fn new(string: &str) -> Result<Self> {
+        if string.is_empty() {
+            return Ok(Self {
+                fragments: Vec::new(),
+            });
+        }
+
+        let mut parser = many1(alt((
+            escaped_open_brace,
+            escaped_close_brace,
+            variable_sub,
+            string_literal,
+        )));
+
+        // TODO(frolv): Map the nom error to a useful user-facing error.
+        let (remainder, fragments) = parser(string).map_err(|_| Error::GenericErrorPlaceholder)?;
+        if !remainder.is_empty() {
+            // TODO(frolv): Some of the string wasn't parsed.
+            return Err(Error::GenericErrorPlaceholder);
+        }
+
+        Ok(Self { fragments })
+    }
+
+    /// Returns an iterator over the variable names in the string.
+    pub fn vars(&self) -> impl Iterator<Item = &str> {
+        self.fragments.iter().filter_map(|f| match f {
+            StringFragment::Variable(s) => Some(s.as_str()),
+            _ => None,
+        })
+    }
+
+    pub fn substitute<'a>(&self, vars: &HashMap<&'a str, &'a str>) -> Result<String> {
+        let mut s = String::new();
+
+        for frag in &self.fragments {
+            match frag {
+                StringFragment::Literal(lit) => s.push_str(lit),
+                StringFragment::Variable(var) => {
+                    let Some(&value) = vars.get(var.as_str()) else {
+                        // TODO(frolv): no value provided for `var`.
+                        return Err(Error::GenericErrorPlaceholder);
+                    };
+                    s.push_str(value);
+                }
+                StringFragment::OpenBrace => s.push('{'),
+                StringFragment::CloseBrace => s.push('}'),
+            }
+        }
+
+        Ok(s)
+    }
+}
+
+impl FromStr for StringSub {
+    type Err = Error;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        StringSub::new(s)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn substitute_one_var() {
+        let string = StringSub::new("hello, {foo}!").unwrap();
+        assert_eq!(string.vars().count(), 1);
+        let vars = HashMap::from([("foo", "world")]);
+        assert_eq!(string.substitute(&vars).unwrap(), "hello, world!");
+    }
+
+    #[test]
+    fn substitute_multiple_vars() {
+        let string = StringSub::new("{greeting}, {subject}!").unwrap();
+        assert_eq!(string.vars().count(), 2);
+        let vars = HashMap::from([("greeting", "hello"), ("subject", "world")]);
+        assert_eq!(string.substitute(&vars).unwrap(), "hello, world!");
+    }
+
+    #[test]
+    fn substitute_repeated_var() {
+        let string = StringSub::new("b{v}{v}a").unwrap();
+        assert_eq!(string.vars().count(), 2);
+        let vars = HashMap::from([("v", "an")]);
+        assert_eq!(string.substitute(&vars).unwrap(), "banana");
+    }
+
+    #[test]
+    fn substitute_full_string() {
+        let string = StringSub::new("{foo}").unwrap();
+        assert_eq!(string.vars().count(), 1);
+        let vars = HashMap::from([("foo", "hello, world!")]);
+        assert_eq!(string.substitute(&vars).unwrap(), "hello, world!");
+    }
+
+    #[test]
+    fn substitute_no_vars() {
+        let string = StringSub::new("hello").unwrap();
+        assert_eq!(string.vars().count(), 0);
+        let mut vars = HashMap::new();
+        assert_eq!(string.substitute(&vars).unwrap(), "hello");
+        vars.insert("foo", "world");
+        assert_eq!(string.substitute(&vars).unwrap(), "hello");
+    }
+
+    #[test]
+    fn substitute_empty_string() {
+        let string = StringSub::new("").unwrap();
+        assert_eq!(string.vars().count(), 0);
+        let mut vars = HashMap::new();
+        assert_eq!(string.substitute(&vars).unwrap(), "");
+        vars.insert("foo", "world");
+        assert_eq!(string.substitute(&vars).unwrap(), "");
+    }
+
+    #[test]
+    fn substitute_escaped_braces() {
+        let string = StringSub::new("hello, {{foo}}").unwrap();
+        assert_eq!(string.vars().count(), 0);
+        let mut vars = HashMap::new();
+        assert_eq!(string.substitute(&vars).unwrap(), "hello, {foo}");
+        vars.insert("foo", "world");
+        assert_eq!(string.substitute(&vars).unwrap(), "hello, {foo}");
+
+        assert_eq!(
+            StringSub::new("{{").unwrap().substitute(&vars).unwrap(),
+            "{",
+        );
+        assert_eq!(
+            StringSub::new("}}").unwrap().substitute(&vars).unwrap(),
+            "}",
+        );
+        assert_eq!(
+            StringSub::new("{{}}").unwrap().substitute(&vars).unwrap(),
+            "{}",
+        );
+    }
+
+    #[test]
+    fn substitute_variable_names() {
+        assert!(StringSub::new("{f}").is_ok());
+        assert!(StringSub::new("{foo}").is_ok());
+        assert!(StringSub::new("{__foo__}").is_ok());
+        assert!(StringSub::new("{foo123}").is_ok());
+        assert!(StringSub::new("{FooBar}").is_ok());
+        assert!(StringSub::new("{f-o-o-}").is_ok());
+        assert!(StringSub::new("{123foo}").is_err());
+        assert!(StringSub::new("{-foo}").is_err());
+        assert!(StringSub::new("{foo#bar}").is_err());
+        assert!(StringSub::new("{!%(@)}").is_err());
+    }
+
+    #[test]
+    fn substitute_invalid_format_string() {
+        assert!(StringSub::new("{").is_err());
+        assert!(StringSub::new("}").is_err());
+        assert!(StringSub::new("hello, {foo").is_err());
+        assert!(StringSub::new("hello, {}").is_err());
+        assert!(StringSub::new("this is a closing brace: }").is_err());
+        assert!(StringSub::new("hello, {foo{").is_err());
+        assert!(StringSub::new("foo{{bar}baz").is_err());
+        assert!(StringSub::new("{greeting}, {subject").is_err());
+    }
+
+    #[test]
+    fn substitute_missing_variables() {
+        let string = StringSub::new("hello, {foo}!").unwrap();
+        assert_eq!(string.vars().count(), 1);
+        let vars = HashMap::new();
+        assert!(string.substitute(&vars).is_err());
+    }
+}