manifest/tests: get them passing under Windows

We also need to check more things in the manifest/project handlers,
and use platform_utils in a few places to address Windows behavior.

Drop Python 2.7 from Windows testing as it definitely doesn't work
and we won't be fixing it.

Change-Id: I83d00ee9f1612312bb3f7147cb9535fc61268245
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/256113
Tested-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Jonathan Nieder <jrn@google.com>
Reviewed-by: David Pursehouse <dpursehouse@collab.net>
diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index 9306181..c33002d 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -15,6 +15,9 @@
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
         python-version: [2.7, 3.6, 3.7, 3.8]
+        exclude:
+          - os: windows-latest
+            python-version: 2.7
     runs-on: ${{ matrix.os }}
 
     steps:
diff --git a/manifest_xml.py b/manifest_xml.py
index 4162800..fe0735a 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -1010,19 +1010,30 @@
     # Assume paths might be used on case-insensitive filesystems.
     path = path.lower()
 
+    # Split up the path by its components.  We can't use os.path.sep exclusively
+    # as some platforms (like Windows) will convert / to \ and that bypasses all
+    # our constructed logic here.  Especially since manifest authors only use
+    # / in their paths.
+    resep = re.compile(r'[/%s]' % re.escape(os.path.sep))
+    parts = resep.split(path)
+
     # Some people use src="." to create stable links to projects.  Lets allow
     # that but reject all other uses of "." to keep things simple.
-    parts = path.split(os.path.sep)
     if parts != ['.']:
       for part in set(parts):
         if part in {'.', '..', '.git'} or part.startswith('.repo'):
           return 'bad component: %s' % (part,)
 
-    if not symlink and path.endswith(os.path.sep):
+    if not symlink and resep.match(path[-1]):
       return 'dirs not allowed'
 
+    # NB: The two abspath checks here are to handle platforms with multiple
+    # filesystem path styles (e.g. Windows).
     norm = os.path.normpath(path)
-    if norm == '..' or norm.startswith('../') or norm.startswith(os.path.sep):
+    if (norm == '..' or
+        (len(norm) >= 3 and norm.startswith('..') and resep.match(norm[0])) or
+        os.path.isabs(norm) or
+        norm.startswith('/')):
       return 'path cannot be outside'
 
   @classmethod
diff --git a/project.py b/project.py
index 2112ee3..99ef238 100644
--- a/project.py
+++ b/project.py
@@ -275,7 +275,12 @@
   NB: We rely on a number of paths already being filtered out while parsing the
   manifest.  See the validation logic in manifest_xml.py for more details.
   """
-  components = subpath.split(os.path.sep)
+  # Split up the path by its components.  We can't use os.path.sep exclusively
+  # as some platforms (like Windows) will convert / to \ and that bypasses all
+  # our constructed logic here.  Especially since manifest authors only use
+  # / in their paths.
+  resep = re.compile(r'[/%s]' % re.escape(os.path.sep))
+  components = resep.split(subpath)
   if skipfinal:
     # Whether the caller handles the final component itself.
     finalpart = components.pop()
diff --git a/tests/test_manifest_xml.py b/tests/test_manifest_xml.py
index b6ec5b8..1cb7297 100644
--- a/tests/test_manifest_xml.py
+++ b/tests/test_manifest_xml.py
@@ -18,6 +18,7 @@
 
 from __future__ import print_function
 
+import os
 import unittest
 
 import error
@@ -78,6 +79,11 @@
         # Block Unicode characters that get normalized out by filesystems.
         u'foo\u200Cbar',
     )
+    # Make sure platforms that use path separators (e.g. Windows) are also
+    # rejected properly.
+    if os.path.sep != '/':
+      PATHS += tuple(x.replace('/', os.path.sep) for x in PATHS)
+
     for path in PATHS:
       self.assertRaises(
           error.ManifestInvalidPathError, self.check_both, path, 'a')
diff --git a/tests/test_project.py b/tests/test_project.py
index b416386..67574cb 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -27,6 +27,7 @@
 
 import error
 import git_config
+import platform_utils
 import project
 
 
@@ -40,7 +41,7 @@
     subprocess.check_call(['git', 'init'], cwd=tempdir)
     yield tempdir
   finally:
-    shutil.rmtree(tempdir)
+    platform_utils.rmtree(tempdir)
 
 
 class RepoHookShebang(unittest.TestCase):
@@ -243,17 +244,19 @@
     src = os.path.join(self.worktree, 'foo.txt')
     sym = os.path.join(self.worktree, 'sym')
     self.touch(src)
-    os.symlink('foo.txt', sym)
+    platform_utils.symlink('foo.txt', sym)
     self.assertExists(sym)
     cf = self.CopyFile('sym', 'foo')
     self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
 
   def test_src_block_symlink_traversal(self):
     """Do not allow reading through a symlink dir."""
-    src = os.path.join(self.worktree, 'bar', 'passwd')
-    os.symlink('/etc', os.path.join(self.worktree, 'bar'))
+    realfile = os.path.join(self.tempdir, 'file.txt')
+    self.touch(realfile)
+    src = os.path.join(self.worktree, 'bar', 'file.txt')
+    platform_utils.symlink(self.tempdir, os.path.join(self.worktree, 'bar'))
     self.assertExists(src)
-    cf = self.CopyFile('bar/foo.txt', 'foo')
+    cf = self.CopyFile('bar/file.txt', 'foo')
     self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
 
   def test_src_block_copy_from_dir(self):
@@ -267,7 +270,7 @@
     """Do not allow writing to a symlink."""
     src = os.path.join(self.worktree, 'foo.txt')
     self.touch(src)
-    os.symlink('dest', os.path.join(self.topdir, 'sym'))
+    platform_utils.symlink('dest', os.path.join(self.topdir, 'sym'))
     cf = self.CopyFile('foo.txt', 'sym')
     self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
 
@@ -275,7 +278,8 @@
     """Do not allow writing through a symlink dir."""
     src = os.path.join(self.worktree, 'foo.txt')
     self.touch(src)
-    os.symlink('/tmp', os.path.join(self.topdir, 'sym'))
+    platform_utils.symlink(tempfile.gettempdir(),
+                           os.path.join(self.topdir, 'sym'))
     cf = self.CopyFile('foo.txt', 'sym/foo.txt')
     self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
 
@@ -303,7 +307,7 @@
     dest = os.path.join(self.topdir, 'foo')
     self.assertExists(dest)
     self.assertTrue(os.path.islink(dest))
-    self.assertEqual('git-project/foo.txt', os.readlink(dest))
+    self.assertEqual(os.path.join('git-project', 'foo.txt'), os.readlink(dest))
 
   def test_src_subdir(self):
     """Link to a file in a subdir of a project."""
@@ -320,7 +324,7 @@
     lf = self.LinkFile('.', 'foo/bar')
     lf._Link()
     self.assertExists(dest)
-    self.assertEqual('../git-project', os.readlink(dest))
+    self.assertEqual(os.path.join('..', 'git-project'), os.readlink(dest))
 
   def test_dest_subdir(self):
     """Link a file to a subdir of a checkout."""
@@ -354,10 +358,10 @@
     self.touch(src)
     lf = self.LinkFile('foo.txt', 'sym')
     lf._Link()
-    self.assertEqual('git-project/foo.txt', os.readlink(dest))
+    self.assertEqual(os.path.join('git-project', 'foo.txt'), os.readlink(dest))
 
     # Point the symlink somewhere else.
     os.unlink(dest)
-    os.symlink('/', dest)
+    platform_utils.symlink(self.tempdir, dest)
     lf._Link()
-    self.assertEqual('git-project/foo.txt', os.readlink(dest))
+    self.assertEqual(os.path.join('git-project', 'foo.txt'), os.readlink(dest))