crates/vendor/memchr-2.5.0/src/tests/memchr/testdata.rs - third_party/rust_crates - Git at Google

 use std::iter::repeat;

 /// Create a sequence of tests that should be run by memchr implementations.
 pub fn memchr_tests() -> Vec<MemchrTest> {
     let mut tests = Vec::new();
     for statict in MEMCHR_TESTS {
         assert!(!statict.corpus.contains("%"), "% is not allowed in corpora");
         assert!(!statict.corpus.contains("#"), "# is not allowed in corpora");
         assert!(!statict.needles.contains(&b'%'), "% is an invalid needle");
         assert!(!statict.needles.contains(&b'#'), "# is an invalid needle");

         let t = MemchrTest {
             corpus: statict.corpus.to_string(),
             needles: statict.needles.to_vec(),
             positions: statict.positions.to_vec(),
         };
         tests.push(t.clone());
         tests.extend(t.expand());
     }
     tests
 }

 /// A set of tests for memchr-like functions.
 ///
 /// These tests mostly try to cover the short string cases. We cover the longer
 /// string cases via the benchmarks (which are tests themselves), via
 /// quickcheck tests and via automatic expansion of each test case (by
 /// increasing the corpus size). Finally, we cover different alignment cases
 /// in the tests by varying the starting point of the slice.
 const MEMCHR_TESTS: &[MemchrTestStatic] = &[
     // one needle (applied to memchr + memchr2 + memchr3)
     MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] },
     MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] },
     MemchrTestStatic {
         corpus: "aaa",
         needles: &[b'a'],
         positions: &[0, 1, 2],
     },
     MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] },
     MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] },
     MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] },
     MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] },
     MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] },
     MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] },
     MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] },
     MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] },
     MemchrTestStatic {
         corpus: "\x00\x00",
         needles: &[b'\x00'],
         positions: &[0, 1],
     },
     MemchrTestStatic {
         corpus: "\x00a\x00",
         needles: &[b'\x00'],
         positions: &[0, 2],
     },
     MemchrTestStatic {
         corpus: "zzzzzzzzzzzzzzzza",
         needles: &[b'a'],
         positions: &[16],
     },
     MemchrTestStatic {
         corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
         needles: &[b'a'],
         positions: &[32],
     },
     // two needles (applied to memchr2 + memchr3)
     MemchrTestStatic {
         corpus: "az",
         needles: &[b'a', b'z'],
         positions: &[0, 1],
     },
     MemchrTestStatic {
         corpus: "az",
         needles: &[b'a', b'z'],
         positions: &[0, 1],
     },
     MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] },
     MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] },
     MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] },
     MemchrTestStatic {
         corpus: "yyyyaz",
         needles: &[b'a', b'z'],
         positions: &[4, 5],
     },
     MemchrTestStatic {
         corpus: "yyyyaz",
         needles: &[b'z', b'a'],
         positions: &[4, 5],
     },
     // three needles (applied to memchr3)
     MemchrTestStatic {
         corpus: "xyz",
         needles: &[b'x', b'y', b'z'],
         positions: &[0, 1, 2],
     },
     MemchrTestStatic {
         corpus: "zxy",
         needles: &[b'x', b'y', b'z'],
         positions: &[0, 1, 2],
     },
     MemchrTestStatic {
         corpus: "zxy",
         needles: &[b'x', b'a', b'z'],
         positions: &[0, 1],
     },
     MemchrTestStatic {
         corpus: "zxy",
         needles: &[b't', b'a', b'z'],
         positions: &[0],
     },
     MemchrTestStatic {
         corpus: "yxz",
         needles: &[b't', b'a', b'z'],
         positions: &[2],
     },
 ];

 /// A description of a test on a memchr like function.
 #[derive(Clone, Debug)]
 pub struct MemchrTest {
     /// The thing to search. We use `&str` instead of `&[u8]` because they
     /// are nicer to write in tests, and we don't miss much since memchr
     /// doesn't care about UTF-8.
     ///
     /// Corpora cannot contain either '%' or '#'. We use these bytes when
     /// expanding test cases into many test cases, and we assume they are not
     /// used. If they are used, `memchr_tests` will panic.
     corpus: String,
     /// The needles to search for. This is intended to be an "alternation" of
     /// needles. The number of needles may cause this test to be skipped for
     /// some memchr variants. For example, a test with 2 needles cannot be used
     /// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
     /// However, a test with only 1 needle can be used to test all of `memchr`,
     /// `memchr2` and `memchr3`. We achieve this by filling in the needles with
     /// bytes that we never used in the corpus (such as '#').
     needles: Vec<u8>,
     /// The positions expected to match for all of the needles.
     positions: Vec<usize>,
 }

 /// Like MemchrTest, but easier to define as a constant.
 #[derive(Clone, Debug)]
 pub struct MemchrTestStatic {
     corpus: &'static str,
     needles: &'static [u8],
     positions: &'static [usize],
 }

 impl MemchrTest {
     pub fn one<F: Fn(u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
         let needles = match self.needles(1) {
             None => return,
             Some(needles) => needles,
         };
         // We test different alignments here. Since some implementations use
         // AVX2, which can read 32 bytes at a time, we test at least that.
         // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128
         // (avx) bytes at a time, so we include that in our offsets as well.
         //
         // You might think this would cause most needles to not be found, but
         // we actually expand our tests to include corpus sizes all the way up
         // to >500 bytes, so we should exercise most branches.
         for align in 0..130 {
             let corpus = self.corpus(align);
             assert_eq!(
                 self.positions(align, reverse).get(0).cloned(),
                 f(needles[0], corpus.as_bytes()),
                 "search for {:?} failed in: {:?} (len: {}, alignment: {})",
                 needles[0] as char,
                 corpus,
                 corpus.len(),
                 align
             );
         }
     }

     pub fn two<F: Fn(u8, u8, &[u8]) -> Option<usize>>(
         &self,
         reverse: bool,
         f: F,
     ) {
         let needles = match self.needles(2) {
             None => return,
             Some(needles) => needles,
         };
         for align in 0..130 {
             let corpus = self.corpus(align);
             assert_eq!(
                 self.positions(align, reverse).get(0).cloned(),
                 f(needles[0], needles[1], corpus.as_bytes()),
                 "search for {:?}|{:?} failed in: {:?} \
                  (len: {}, alignment: {})",
                 needles[0] as char,
                 needles[1] as char,
                 corpus,
                 corpus.len(),
                 align
             );
         }
     }

     pub fn three<F: Fn(u8, u8, u8, &[u8]) -> Option<usize>>(
         &self,
         reverse: bool,
         f: F,
     ) {
         let needles = match self.needles(3) {
             None => return,
             Some(needles) => needles,
         };
         for align in 0..130 {
             let corpus = self.corpus(align);
             assert_eq!(
                 self.positions(align, reverse).get(0).cloned(),
                 f(needles[0], needles[1], needles[2], corpus.as_bytes()),
                 "search for {:?}|{:?}|{:?} failed in: {:?} \
                  (len: {}, alignment: {})",
                 needles[0] as char,
                 needles[1] as char,
                 needles[2] as char,
                 corpus,
                 corpus.len(),
                 align
             );
         }
     }

     pub fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F)
     where
         F: FnOnce(u8, &'a [u8]) -> I,
         I: Iterator<Item = usize>,
     {
         if let Some(ns) = self.needles(1) {
             self.iter(reverse, f(ns[0], self.corpus.as_bytes()));
         }
     }

     pub fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F)
     where
         F: FnOnce(u8, u8, &'a [u8]) -> I,
         I: Iterator<Item = usize>,
     {
         if let Some(ns) = self.needles(2) {
             self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes()));
         }
     }

     pub fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F)
     where
         F: FnOnce(u8, u8, u8, &'a [u8]) -> I,
         I: Iterator<Item = usize>,
     {
         if let Some(ns) = self.needles(3) {
             self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes()));
         }
     }

     /// Test that the positions yielded by the given iterator match the
     /// positions in this test. If reverse is true, then reverse the positions
     /// before comparing them.
     fn iter<I: Iterator<Item = usize>>(&self, reverse: bool, it: I) {
         assert_eq!(
             self.positions(0, reverse),
             it.collect::<Vec<usize>>(),
             r"search for {:?} failed in: {:?}",
             self.needles.iter().map(|&b| b as char).collect::<Vec<char>>(),
             self.corpus
         );
     }

     /// Expand this test into many variations of the same test.
     ///
     /// In particular, this will generate more tests with larger corpus sizes.
     /// The expected positions are updated to maintain the integrity of the
     /// test.
     ///
     /// This is important in testing a memchr implementation, because there are
     /// often different cases depending on the length of the corpus.
     ///
     /// Note that we extend the corpus by adding `%` bytes, which we
     /// don't otherwise use as a needle.
     fn expand(&self) -> Vec<MemchrTest> {
         let mut more = Vec::new();

         // Add bytes to the start of the corpus.
         for i in 1..515 {
             let mut t = self.clone();
             let mut new_corpus: String = repeat('%').take(i).collect();
             new_corpus.push_str(&t.corpus);
             t.corpus = new_corpus;
             t.positions = t.positions.into_iter().map(|p| p + i).collect();
             more.push(t);
         }
         // Add bytes to the end of the corpus.
         for i in 1..515 {
             let mut t = self.clone();
             let padding: String = repeat('%').take(i).collect();
             t.corpus.push_str(&padding);
             more.push(t);
         }

         more
     }

     /// Return the corpus at the given alignment.
     ///
     /// If the alignment exceeds the length of the corpus, then this returns
     /// an empty slice.
     fn corpus(&self, align: usize) -> &str {
         self.corpus.get(align..).unwrap_or("")
     }

     /// Return exactly `count` needles from this test. If this test has less
     /// than `count` needles, then add `#` until the number of needles
     /// matches `count`. If this test has more than `count` needles, then
     /// return `None` (because there is no way to use this test data for a
     /// search using fewer needles).
     fn needles(&self, count: usize) -> Option<Vec<u8>> {
         if self.needles.len() > count {
             return None;
         }

         let mut needles = self.needles.to_vec();
         for _ in needles.len()..count {
             // we assume # is never used in tests.
             needles.push(b'#');
         }
         Some(needles)
     }

     /// Return the positions in this test, reversed if `reverse` is true.
     ///
     /// If alignment is given, then all positions greater than or equal to that
     /// alignment are offset by the alignment. Positions less than the
     /// alignment are dropped.
     fn positions(&self, align: usize, reverse: bool) -> Vec<usize> {
         let positions = if reverse {
             let mut positions = self.positions.to_vec();
             positions.reverse();
             positions
         } else {
             self.positions.to_vec()
         };
         positions
             .into_iter()
             .filter(|&p| p >= align)
             .map(|p| p - align)
             .collect()
     }
 }
	use std::iter::repeat;

	/// Create a sequence of tests that should be run by memchr implementations.
	pub fn memchr_tests() -> Vec<MemchrTest> {
	let mut tests = Vec::new();
	for statict in MEMCHR_TESTS {
	assert!(!statict.corpus.contains("%"), "% is not allowed in corpora");
	assert!(!statict.corpus.contains("#"), "# is not allowed in corpora");
	assert!(!statict.needles.contains(&b'%'), "% is an invalid needle");
	assert!(!statict.needles.contains(&b'#'), "# is an invalid needle");

	let t = MemchrTest {
	corpus: statict.corpus.to_string(),
	needles: statict.needles.to_vec(),
	positions: statict.positions.to_vec(),
	};
	tests.push(t.clone());
	tests.extend(t.expand());
	}
	tests
	}

	/// A set of tests for memchr-like functions.
	///
	/// These tests mostly try to cover the short string cases. We cover the longer
	/// string cases via the benchmarks (which are tests themselves), via
	/// quickcheck tests and via automatic expansion of each test case (by
	/// increasing the corpus size). Finally, we cover different alignment cases
	/// in the tests by varying the starting point of the slice.
	const MEMCHR_TESTS: &[MemchrTestStatic] = &[
	// one needle (applied to memchr + memchr2 + memchr3)
	MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] },
	MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] },
	MemchrTestStatic {
	corpus: "aaa",
	needles: &[b'a'],
	positions: &[0, 1, 2],
	},
	MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] },
	MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] },
	MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] },
	MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] },
	MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] },
	MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] },
	MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] },
	MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] },
	MemchrTestStatic {
	corpus: "\x00\x00",
	needles: &[b'\x00'],
	positions: &[0, 1],
	},
	MemchrTestStatic {
	corpus: "\x00a\x00",
	needles: &[b'\x00'],
	positions: &[0, 2],
	},
	MemchrTestStatic {
	corpus: "zzzzzzzzzzzzzzzza",
	needles: &[b'a'],
	positions: &[16],
	},
	MemchrTestStatic {
	corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
	needles: &[b'a'],
	positions: &[32],
	},
	// two needles (applied to memchr2 + memchr3)
	MemchrTestStatic {
	corpus: "az",
	needles: &[b'a', b'z'],
	positions: &[0, 1],
	},
	MemchrTestStatic {
	corpus: "az",
	needles: &[b'a', b'z'],
	positions: &[0, 1],
	},
	MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] },
	MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] },
	MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] },
	MemchrTestStatic {
	corpus: "yyyyaz",
	needles: &[b'a', b'z'],
	positions: &[4, 5],
	},
	MemchrTestStatic {
	corpus: "yyyyaz",
	needles: &[b'z', b'a'],
	positions: &[4, 5],
	},
	// three needles (applied to memchr3)
	MemchrTestStatic {
	corpus: "xyz",
	needles: &[b'x', b'y', b'z'],
	positions: &[0, 1, 2],
	},
	MemchrTestStatic {
	corpus: "zxy",
	needles: &[b'x', b'y', b'z'],
	positions: &[0, 1, 2],
	},
	MemchrTestStatic {
	corpus: "zxy",
	needles: &[b'x', b'a', b'z'],
	positions: &[0, 1],
	},
	MemchrTestStatic {
	corpus: "zxy",
	needles: &[b't', b'a', b'z'],
	positions: &[0],
	},
	MemchrTestStatic {
	corpus: "yxz",
	needles: &[b't', b'a', b'z'],
	positions: &[2],
	},
	];

	/// A description of a test on a memchr like function.
	#[derive(Clone, Debug)]
	pub struct MemchrTest {
	/// The thing to search. We use `&str` instead of `&[u8]` because they
	/// are nicer to write in tests, and we don't miss much since memchr
	/// doesn't care about UTF-8.
	///
	/// Corpora cannot contain either '%' or '#'. We use these bytes when
	/// expanding test cases into many test cases, and we assume they are not
	/// used. If they are used, `memchr_tests` will panic.
	corpus: String,
	/// The needles to search for. This is intended to be an "alternation" of
	/// needles. The number of needles may cause this test to be skipped for
	/// some memchr variants. For example, a test with 2 needles cannot be used
	/// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
	/// However, a test with only 1 needle can be used to test all of `memchr`,
	/// `memchr2` and `memchr3`. We achieve this by filling in the needles with
	/// bytes that we never used in the corpus (such as '#').
	needles: Vec<u8>,
	/// The positions expected to match for all of the needles.
	positions: Vec<usize>,
	}

	/// Like MemchrTest, but easier to define as a constant.
	#[derive(Clone, Debug)]
	pub struct MemchrTestStatic {
	corpus: &'static str,
	needles: &'static [u8],
	positions: &'static [usize],
	}

	impl MemchrTest {
	pub fn one<F: Fn(u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
	let needles = match self.needles(1) {
	None => return,
	Some(needles) => needles,
	};
	// We test different alignments here. Since some implementations use
	// AVX2, which can read 32 bytes at a time, we test at least that.
	// Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128
	// (avx) bytes at a time, so we include that in our offsets as well.
	//
	// You might think this would cause most needles to not be found, but
	// we actually expand our tests to include corpus sizes all the way up
	// to >500 bytes, so we should exercise most branches.
	for align in 0..130 {
	let corpus = self.corpus(align);
	assert_eq!(
	self.positions(align, reverse).get(0).cloned(),
	f(needles[0], corpus.as_bytes()),
	"search for {:?} failed in: {:?} (len: {}, alignment: {})",
	needles[0] as char,
	corpus,
	corpus.len(),
	align
	);
	}
	}

	pub fn two<F: Fn(u8, u8, &[u8]) -> Option<usize>>(
	&self,
	reverse: bool,
	f: F,
	) {
	let needles = match self.needles(2) {
	None => return,
	Some(needles) => needles,
	};
	for align in 0..130 {
	let corpus = self.corpus(align);
	assert_eq!(
	self.positions(align, reverse).get(0).cloned(),
	f(needles[0], needles[1], corpus.as_bytes()),
	"search for {:?}\|{:?} failed in: {:?} \
	(len: {}, alignment: {})",
	needles[0] as char,
	needles[1] as char,
	corpus,
	corpus.len(),
	align
	);
	}
	}

	pub fn three<F: Fn(u8, u8, u8, &[u8]) -> Option<usize>>(
	&self,
	reverse: bool,
	f: F,
	) {
	let needles = match self.needles(3) {
	None => return,
	Some(needles) => needles,
	};
	for align in 0..130 {
	let corpus = self.corpus(align);
	assert_eq!(
	self.positions(align, reverse).get(0).cloned(),
	f(needles[0], needles[1], needles[2], corpus.as_bytes()),
	"search for {:?}\|{:?}\|{:?} failed in: {:?} \
	(len: {}, alignment: {})",
	needles[0] as char,
	needles[1] as char,
	needles[2] as char,
	corpus,
	corpus.len(),
	align
	);
	}
	}

	pub fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F)
	where
	F: FnOnce(u8, &'a [u8]) -> I,
	I: Iterator<Item = usize>,
	{
	if let Some(ns) = self.needles(1) {
	self.iter(reverse, f(ns[0], self.corpus.as_bytes()));
	}
	}

	pub fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F)
	where
	F: FnOnce(u8, u8, &'a [u8]) -> I,
	I: Iterator<Item = usize>,
	{
	if let Some(ns) = self.needles(2) {
	self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes()));
	}
	}

	pub fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F)
	where
	F: FnOnce(u8, u8, u8, &'a [u8]) -> I,
	I: Iterator<Item = usize>,
	{
	if let Some(ns) = self.needles(3) {
	self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes()));
	}
	}

	/// Test that the positions yielded by the given iterator match the
	/// positions in this test. If reverse is true, then reverse the positions
	/// before comparing them.
	fn iter<I: Iterator<Item = usize>>(&self, reverse: bool, it: I) {
	assert_eq!(
	self.positions(0, reverse),
	it.collect::<Vec<usize>>(),
	r"search for {:?} failed in: {:?}",
	self.needles.iter().map(\|&b\| b as char).collect::<Vec<char>>(),
	self.corpus
	);
	}

	/// Expand this test into many variations of the same test.
	///
	/// In particular, this will generate more tests with larger corpus sizes.
	/// The expected positions are updated to maintain the integrity of the
	/// test.
	///
	/// This is important in testing a memchr implementation, because there are
	/// often different cases depending on the length of the corpus.
	///
	/// Note that we extend the corpus by adding `%` bytes, which we
	/// don't otherwise use as a needle.
	fn expand(&self) -> Vec<MemchrTest> {
	let mut more = Vec::new();

	// Add bytes to the start of the corpus.
	for i in 1..515 {
	let mut t = self.clone();
	let mut new_corpus: String = repeat('%').take(i).collect();
	new_corpus.push_str(&t.corpus);
	t.corpus = new_corpus;
	t.positions = t.positions.into_iter().map(\|p\| p + i).collect();
	more.push(t);
	}
	// Add bytes to the end of the corpus.
	for i in 1..515 {
	let mut t = self.clone();
	let padding: String = repeat('%').take(i).collect();
	t.corpus.push_str(&padding);
	more.push(t);
	}

	more
	}

	/// Return the corpus at the given alignment.
	///
	/// If the alignment exceeds the length of the corpus, then this returns
	/// an empty slice.
	fn corpus(&self, align: usize) -> &str {
	self.corpus.get(align..).unwrap_or("")
	}

	/// Return exactly `count` needles from this test. If this test has less
	/// than `count` needles, then add `#` until the number of needles
	/// matches `count`. If this test has more than `count` needles, then
	/// return `None` (because there is no way to use this test data for a
	/// search using fewer needles).
	fn needles(&self, count: usize) -> Option<Vec<u8>> {
	if self.needles.len() > count {
	return None;
	}

	let mut needles = self.needles.to_vec();
	for _ in needles.len()..count {
	// we assume # is never used in tests.
	needles.push(b'#');
	}
	Some(needles)
	}

	/// Return the positions in this test, reversed if `reverse` is true.
	///
	/// If alignment is given, then all positions greater than or equal to that
	/// alignment are offset by the alignment. Positions less than the
	/// alignment are dropped.
	fn positions(&self, align: usize, reverse: bool) -> Vec<usize> {
	let positions = if reverse {
	let mut positions = self.positions.to_vec();
	positions.reverse();
	positions
	} else {
	self.positions.to_vec()
	};
	positions
	.into_iter()
	.filter(\|&p\| p >= align)
	.map(\|p\| p - align)
	.collect()
	}
	}