Implement string variable interpolater
This adds a `StringSub` struct which inserts variable mappings into a
format string and updates download targets' URLs to use it.
Change-Id: Ifc0064415a2e541efbeb349006824001dd54158b
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/qg/+/125550
Commit-Queue: Alexei Frolov <frolv@google.com>
Reviewed-by: Erik Gilling <konkers@google.com>
diff --git a/qg/Cargo.toml b/qg/Cargo.toml
index cb2a8c2..f4b1338 100644
--- a/qg/Cargo.toml
+++ b/qg/Cargo.toml
@@ -17,6 +17,7 @@
thiserror = "1.0.37"
tokio = { version = "1.21.2", features = ["full"] }
toml = "0.5.9"
+nom = "7.1.2"
[dependencies.rustpython]
git = "https://github.com/RustPython/RustPython"
diff --git a/qg/src/download.rs b/qg/src/download.rs
index 9f0d645..b735785 100644
--- a/qg/src/download.rs
+++ b/qg/src/download.rs
@@ -1,4 +1,4 @@
-// Copyright 2022 The Pigweed Authors
+// Copyright 2023 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
diff --git a/qg/src/lib.rs b/qg/src/lib.rs
index ab1be63..7788e57 100644
--- a/qg/src/lib.rs
+++ b/qg/src/lib.rs
@@ -24,6 +24,7 @@
pub mod target;
mod download;
+mod util;
#[doc(inline)]
pub use target::Target;
diff --git a/qg/src/target.rs b/qg/src/target.rs
index af1b105..207f2fe 100644
--- a/qg/src/target.rs
+++ b/qg/src/target.rs
@@ -15,10 +15,8 @@
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
-use once_cell::sync::Lazy;
-use regex::Regex;
-
use crate::project::manifest;
+use crate::util::StringSub;
use crate::{download, platform, Error, Result};
/// A source of targets.
@@ -99,7 +97,7 @@
#[derive(Debug)]
pub struct Download {
pub format: download::Format,
- pub url: String,
+ pub url: StringSub,
pub url_parameters: HashMap<String, String>,
pub variants: Vec<DownloadVariant>,
}
@@ -116,11 +114,6 @@
arch: Option<platform::Architecture>,
}
-static URL_PARAMETER_NAME_REGEX: Lazy<Regex> =
- Lazy::new(|| Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").expect("regex is valid"));
-static URL_PARAMETER_SUB_REGEX: Lazy<Regex> =
- Lazy::new(|| Regex::new(r"\{(.*?)\}").expect("regex is valid"));
-
impl TryFrom<manifest::DownloadablePackage> for Download {
type Error = Error;
@@ -163,7 +156,7 @@
// mapping have invalid names.
let invalid_vars: Vec<_> = all_params
.iter()
- .filter(|&¶m| !URL_PARAMETER_NAME_REGEX.is_match(param))
+ .filter(|&¶m| !StringSub::valid_variable_name(param))
.collect();
if !invalid_vars.is_empty() {
// TODO(frolv): Return an error containing the invalid variables.
@@ -172,30 +165,11 @@
}
// Next, check the URL string itself. Each parameter substitution
- // defined within braces should have a valid name and exist within a
- // provided `url_parameters` mapping.
- let mut invalid_vars = Vec::new();
- let mut missing_vars = Vec::new();
+ // defined within braces should exist within a provided `url_parameters`
+ // mapping.
+ let url: StringSub = value.url.parse()?;
- for var in URL_PARAMETER_SUB_REGEX
- .captures_iter(&value.url)
- .filter_map(|cap| cap.get(1))
- {
- let var = var.as_str();
- if !URL_PARAMETER_NAME_REGEX.is_match(var) {
- invalid_vars.push(var);
- }
- if !all_params.contains(var) {
- missing_vars.push(var);
- }
- }
-
- if !invalid_vars.is_empty() {
- // TODO(frolv): Return an error containing the invalid variables.
- println!("invalid URL parameters {invalid_vars:?}");
- return Err(Error::GenericErrorPlaceholder);
- }
-
+ let missing_vars: Vec<_> = url.vars().filter(|&v| !all_params.contains(v)).collect();
if !missing_vars.is_empty() {
// TODO(frolv): Return an error containing the missing variables.
println!("missing URL parameters {missing_vars:?}");
@@ -211,7 +185,7 @@
Ok(Self {
format,
- url: value.url,
+ url,
url_parameters,
variants,
})
diff --git a/qg/src/util.rs b/qg/src/util.rs
new file mode 100644
index 0000000..549363c
--- /dev/null
+++ b/qg/src/util.rs
@@ -0,0 +1,259 @@
+// Copyright 2023 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+use std::{collections::HashMap, str::FromStr};
+
+use nom::{
+ branch::alt,
+ bytes::complete::{is_a, is_not, tag},
+ character::complete::{alpha1, alphanumeric1},
+ combinator::{map, recognize},
+ multi::{many0_count, many1},
+ sequence::{delimited, pair},
+ IResult,
+};
+use once_cell::sync::Lazy;
+use regex::Regex;
+
+use crate::{Error, Result};
+
+#[derive(Debug)]
+enum StringFragment {
+ Literal(String),
+ Variable(String),
+ OpenBrace,
+ CloseBrace,
+}
+
+#[derive(Debug)]
+pub struct StringSub {
+ fragments: Vec<StringFragment>,
+}
+
+static VARIABLE_NAME_REGEX: Lazy<Regex> =
+ Lazy::new(|| Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_-]*$").expect("regex is valid"));
+
+/// Parser for a `StringSub` variable name. Starts with either a letter or
+/// underscore, and can contain alphanumeric characters, underscores, or hyphens.
+fn identifier(input: &str) -> IResult<&str, &str> {
+ recognize(pair(
+ alt((alpha1, tag("_"))),
+ many0_count(alt((alphanumeric1, is_a("_-")))),
+ ))(input)
+}
+
+/// Parser for a `StringSub` variable placeholder, which is an identifier
+/// enclosed in braces.
+fn variable_sub(input: &str) -> IResult<&str, StringFragment> {
+ map(delimited(tag("{"), identifier, tag("}")), |s: &str| {
+ StringFragment::Variable(s.to_owned())
+ })(input)
+}
+
+/// Parser for a `StringSub` literal string, containing any characters other
+/// than braces.
+fn string_literal(input: &str) -> IResult<&str, StringFragment> {
+ map(is_not("{}"), |s: &str| {
+ StringFragment::Literal(s.to_owned())
+ })(input)
+}
+
+fn escaped_open_brace(input: &str) -> IResult<&str, StringFragment> {
+ map(tag("{{"), |_| StringFragment::OpenBrace)(input)
+}
+
+fn escaped_close_brace(input: &str) -> IResult<&str, StringFragment> {
+ map(tag("}}"), |_| StringFragment::CloseBrace)(input)
+}
+
+impl StringSub {
+ pub fn valid_variable_name(var: &str) -> bool {
+ VARIABLE_NAME_REGEX.is_match(var)
+ }
+
+ pub fn new(string: &str) -> Result<Self> {
+ if string.is_empty() {
+ return Ok(Self {
+ fragments: Vec::new(),
+ });
+ }
+
+ let mut parser = many1(alt((
+ escaped_open_brace,
+ escaped_close_brace,
+ variable_sub,
+ string_literal,
+ )));
+
+ // TODO(frolv): Map the nom error to a useful user-facing error.
+ let (remainder, fragments) = parser(string).map_err(|_| Error::GenericErrorPlaceholder)?;
+ if !remainder.is_empty() {
+ // TODO(frolv): Some of the string wasn't parsed.
+ return Err(Error::GenericErrorPlaceholder);
+ }
+
+ Ok(Self { fragments })
+ }
+
+ /// Returns an iterator over the variable names in the string.
+ pub fn vars(&self) -> impl Iterator<Item = &str> {
+ self.fragments.iter().filter_map(|f| match f {
+ StringFragment::Variable(s) => Some(s.as_str()),
+ _ => None,
+ })
+ }
+
+ pub fn substitute<'a>(&self, vars: &HashMap<&'a str, &'a str>) -> Result<String> {
+ let mut s = String::new();
+
+ for frag in &self.fragments {
+ match frag {
+ StringFragment::Literal(lit) => s.push_str(lit),
+ StringFragment::Variable(var) => {
+ let Some(&value) = vars.get(var.as_str()) else {
+ // TODO(frolv): no value provided for `var`.
+ return Err(Error::GenericErrorPlaceholder);
+ };
+ s.push_str(value);
+ }
+ StringFragment::OpenBrace => s.push('{'),
+ StringFragment::CloseBrace => s.push('}'),
+ }
+ }
+
+ Ok(s)
+ }
+}
+
+impl FromStr for StringSub {
+ type Err = Error;
+
+ fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+ StringSub::new(s)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn substitute_one_var() {
+ let string = StringSub::new("hello, {foo}!").unwrap();
+ assert_eq!(string.vars().count(), 1);
+ let vars = HashMap::from([("foo", "world")]);
+ assert_eq!(string.substitute(&vars).unwrap(), "hello, world!");
+ }
+
+ #[test]
+ fn substitute_multiple_vars() {
+ let string = StringSub::new("{greeting}, {subject}!").unwrap();
+ assert_eq!(string.vars().count(), 2);
+ let vars = HashMap::from([("greeting", "hello"), ("subject", "world")]);
+ assert_eq!(string.substitute(&vars).unwrap(), "hello, world!");
+ }
+
+ #[test]
+ fn substitute_repeated_var() {
+ let string = StringSub::new("b{v}{v}a").unwrap();
+ assert_eq!(string.vars().count(), 2);
+ let vars = HashMap::from([("v", "an")]);
+ assert_eq!(string.substitute(&vars).unwrap(), "banana");
+ }
+
+ #[test]
+ fn substitute_full_string() {
+ let string = StringSub::new("{foo}").unwrap();
+ assert_eq!(string.vars().count(), 1);
+ let vars = HashMap::from([("foo", "hello, world!")]);
+ assert_eq!(string.substitute(&vars).unwrap(), "hello, world!");
+ }
+
+ #[test]
+ fn substitute_no_vars() {
+ let string = StringSub::new("hello").unwrap();
+ assert_eq!(string.vars().count(), 0);
+ let mut vars = HashMap::new();
+ assert_eq!(string.substitute(&vars).unwrap(), "hello");
+ vars.insert("foo", "world");
+ assert_eq!(string.substitute(&vars).unwrap(), "hello");
+ }
+
+ #[test]
+ fn substitute_empty_string() {
+ let string = StringSub::new("").unwrap();
+ assert_eq!(string.vars().count(), 0);
+ let mut vars = HashMap::new();
+ assert_eq!(string.substitute(&vars).unwrap(), "");
+ vars.insert("foo", "world");
+ assert_eq!(string.substitute(&vars).unwrap(), "");
+ }
+
+ #[test]
+ fn substitute_escaped_braces() {
+ let string = StringSub::new("hello, {{foo}}").unwrap();
+ assert_eq!(string.vars().count(), 0);
+ let mut vars = HashMap::new();
+ assert_eq!(string.substitute(&vars).unwrap(), "hello, {foo}");
+ vars.insert("foo", "world");
+ assert_eq!(string.substitute(&vars).unwrap(), "hello, {foo}");
+
+ assert_eq!(
+ StringSub::new("{{").unwrap().substitute(&vars).unwrap(),
+ "{",
+ );
+ assert_eq!(
+ StringSub::new("}}").unwrap().substitute(&vars).unwrap(),
+ "}",
+ );
+ assert_eq!(
+ StringSub::new("{{}}").unwrap().substitute(&vars).unwrap(),
+ "{}",
+ );
+ }
+
+ #[test]
+ fn substitute_variable_names() {
+ assert!(StringSub::new("{f}").is_ok());
+ assert!(StringSub::new("{foo}").is_ok());
+ assert!(StringSub::new("{__foo__}").is_ok());
+ assert!(StringSub::new("{foo123}").is_ok());
+ assert!(StringSub::new("{FooBar}").is_ok());
+ assert!(StringSub::new("{f-o-o-}").is_ok());
+ assert!(StringSub::new("{123foo}").is_err());
+ assert!(StringSub::new("{-foo}").is_err());
+ assert!(StringSub::new("{foo#bar}").is_err());
+ assert!(StringSub::new("{!%(@)}").is_err());
+ }
+
+ #[test]
+ fn substitute_invalid_format_string() {
+ assert!(StringSub::new("{").is_err());
+ assert!(StringSub::new("}").is_err());
+ assert!(StringSub::new("hello, {foo").is_err());
+ assert!(StringSub::new("hello, {}").is_err());
+ assert!(StringSub::new("this is a closing brace: }").is_err());
+ assert!(StringSub::new("hello, {foo{").is_err());
+ assert!(StringSub::new("foo{{bar}baz").is_err());
+ assert!(StringSub::new("{greeting}, {subject").is_err());
+ }
+
+ #[test]
+ fn substitute_missing_variables() {
+ let string = StringSub::new("hello, {foo}!").unwrap();
+ assert_eq!(string.vars().count(), 1);
+ let vars = HashMap::new();
+ assert!(string.substitute(&vars).is_err());
+ }
+}