Procházet zdrojové kódy

manifest: add basic path checks for <copyfile> & <linkfile>

Reject paths in <copyfile> & <linkfile> that point outside of their
respective scopes.  This validates paths while parsing the manifest
as this should be quick & cheap: we don't access the filesystem as
this code runs before we've synced.

Bug: https://crbug.com/gerrit/11218
Change-Id: I8e17bb91f3f5b905a9d76391b29fbab4cb77aa58
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/232932
Tested-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Michael Mortensen <mmortensen@google.com>
Mike Frysinger před 6 roky
rodič
revize
04122b7261
4 změnil soubory, kde provedl 170 přidání a 4 odebrání
  1. 1 1
      docs/manifest-format.md
  2. 4 0
      error.py
  3. 82 3
      manifest_xml.py
  4. 83 0
      tests/test_manifest_xml.py

+ 1 - 1
docs/manifest-format.md

@@ -338,7 +338,7 @@ It's just like copyfile and runs at the same time as copyfile but
 instead of copying it creates a symlink.
 
 The symlink is created at "dest" (relative to the top of the tree) and
-points to the path specified by "src".
+points to the path specified by "src" which is a path in the project.
 
 Parent directories of "dest" will be automatically created if missing.
 

+ 4 - 0
error.py

@@ -22,6 +22,10 @@ class ManifestInvalidRevisionError(Exception):
   """The revision value in a project is incorrect.
   """
 
+class ManifestInvalidPathError(Exception):
+  """A path used in <copyfile> or <linkfile> is incorrect.
+  """
+
 class NoManifestException(Exception):
   """The required manifest does not exist.
   """

+ 82 - 3
manifest_xml.py

@@ -35,7 +35,8 @@ from git_config import GitConfig
 from git_refs import R_HEADS, HEAD
 import platform_utils
 from project import RemoteSpec, Project, MetaProject
-from error import ManifestParseError, ManifestInvalidRevisionError
+from error import (ManifestParseError, ManifestInvalidPathError,
+                   ManifestInvalidRevisionError)
 
 MANIFEST_FILE_NAME = 'manifest.xml'
 LOCAL_MANIFEST_NAME = 'local_manifest.xml'
@@ -943,12 +944,88 @@ class XmlManifest(object):
       worktree = os.path.join(parent.worktree, path).replace('\\', '/')
     return relpath, worktree, gitdir, objdir
 
+  @staticmethod
+  def _CheckLocalPath(path, symlink=False):
+    """Verify |path| is reasonable for use in <copyfile> & <linkfile>."""
+    if '~' in path:
+      return '~ not allowed (due to 8.3 filenames on Windows filesystems)'
+
+    # Some filesystems (like Apple's HFS+) try to normalize Unicode codepoints
+    # which means there are alternative names for ".git".  Reject paths with
+    # these in it as there shouldn't be any reasonable need for them here.
+    # The set of codepoints here was cribbed from jgit's implementation:
+    # https://eclipse.googlesource.com/jgit/jgit/+/9110037e3e9461ff4dac22fee84ef3694ed57648/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java#884
+    BAD_CODEPOINTS = {
+        u'\u200C',  # ZERO WIDTH NON-JOINER
+        u'\u200D',  # ZERO WIDTH JOINER
+        u'\u200E',  # LEFT-TO-RIGHT MARK
+        u'\u200F',  # RIGHT-TO-LEFT MARK
+        u'\u202A',  # LEFT-TO-RIGHT EMBEDDING
+        u'\u202B',  # RIGHT-TO-LEFT EMBEDDING
+        u'\u202C',  # POP DIRECTIONAL FORMATTING
+        u'\u202D',  # LEFT-TO-RIGHT OVERRIDE
+        u'\u202E',  # RIGHT-TO-LEFT OVERRIDE
+        u'\u206A',  # INHIBIT SYMMETRIC SWAPPING
+        u'\u206B',  # ACTIVATE SYMMETRIC SWAPPING
+        u'\u206C',  # INHIBIT ARABIC FORM SHAPING
+        u'\u206D',  # ACTIVATE ARABIC FORM SHAPING
+        u'\u206E',  # NATIONAL DIGIT SHAPES
+        u'\u206F',  # NOMINAL DIGIT SHAPES
+        u'\uFEFF',  # ZERO WIDTH NO-BREAK SPACE
+    }
+    if BAD_CODEPOINTS & set(path):
+      # This message is more expansive than reality, but should be fine.
+      return 'Unicode combining characters not allowed'
+
+    # Assume paths might be used on case-insensitive filesystems.
+    path = path.lower()
+
+    # We don't really need to reject '.' here, but there shouldn't really be a
+    # need to ever use it, so no need to accept it either.
+    for part in set(path.split(os.path.sep)):
+      if part in {'.', '..', '.git'} or part.startswith('.repo'):
+        return 'bad component: %s' % (part,)
+
+    if not symlink and path.endswith(os.path.sep):
+      return 'dirs not allowed'
+
+    norm = os.path.normpath(path)
+    if norm == '..' or norm.startswith('../') or norm.startswith(os.path.sep):
+      return 'path cannot be outside'
+
+  @classmethod
+  def _ValidateFilePaths(cls, element, src, dest):
+    """Verify |src| & |dest| are reasonable for <copyfile> & <linkfile>.
+
+    We verify the path independent of any filesystem state as we won't have a
+    checkout available to compare to.  i.e. This is for parsing validation
+    purposes only.
+
+    We'll do full/live sanity checking before we do the actual filesystem
+    modifications in _CopyFile/_LinkFile/etc...
+    """
+    # |dest| is the file we write to or symlink we create.
+    # It is relative to the top of the repo client checkout.
+    msg = cls._CheckLocalPath(dest)
+    if msg:
+      raise ManifestInvalidPathError(
+          '<%s> invalid "dest": %s: %s' % (element, dest, msg))
+
+    # |src| is the file we read from or path we point to for symlinks.
+    # It is relative to the top of the git project checkout.
+    msg = cls._CheckLocalPath(src, symlink=element == 'linkfile')
+    if msg:
+      raise ManifestInvalidPathError(
+          '<%s> invalid "src": %s: %s' % (element, src, msg))
+
   def _ParseCopyFile(self, project, node):
     src = self._reqatt(node, 'src')
     dest = self._reqatt(node, 'dest')
     if not self.IsMirror:
       # src is project relative;
-      # dest is relative to the top of the tree
+      # dest is relative to the top of the tree.
+      # We only validate paths if we actually plan to process them.
+      self._ValidateFilePaths('copyfile', src, dest)
       project.AddCopyFile(src, dest, os.path.join(self.topdir, dest))
 
   def _ParseLinkFile(self, project, node):
@@ -956,7 +1033,9 @@ class XmlManifest(object):
     dest = self._reqatt(node, 'dest')
     if not self.IsMirror:
       # src is project relative;
-      # dest is relative to the top of the tree
+      # dest is relative to the top of the tree.
+      # We only validate paths if we actually plan to process them.
+      self._ValidateFilePaths('linkfile', src, dest)
       project.AddLinkFile(src, dest, os.path.join(self.topdir, dest))
 
   def _ParseAnnotation(self, project, node):

+ 83 - 0
tests/test_manifest_xml.py

@@ -0,0 +1,83 @@
+# -*- coding:utf-8 -*-
+#
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unittests for the manifest_xml.py module."""
+
+from __future__ import print_function
+
+import unittest
+
+import error
+import manifest_xml
+
+
+class ManifestValidateFilePaths(unittest.TestCase):
+  """Check _ValidateFilePaths helper.
+
+  This doesn't access a real filesystem.
+  """
+
+  def check_both(self, *args):
+    manifest_xml.XmlManifest._ValidateFilePaths('copyfile', *args)
+    manifest_xml.XmlManifest._ValidateFilePaths('linkfile', *args)
+
+  def test_normal_path(self):
+    """Make sure good paths are accepted."""
+    self.check_both('foo', 'bar')
+    self.check_both('foo/bar', 'bar')
+    self.check_both('foo', 'bar/bar')
+    self.check_both('foo/bar', 'bar/bar')
+
+  def test_symlink_targets(self):
+    """Some extra checks for symlinks."""
+    def check(*args):
+      manifest_xml.XmlManifest._ValidateFilePaths('linkfile', *args)
+
+    # We allow symlinks to end in a slash since we allow them to point to dirs
+    # in general.  Technically the slash isn't necessary.
+    check('foo/', 'bar')
+
+  def test_bad_paths(self):
+    """Make sure bad paths (src & dest) are rejected."""
+    PATHS = (
+        '..',
+        '../',
+        './',
+        'foo/',
+        './foo',
+        '../foo',
+        'foo/./bar',
+        'foo/../../bar',
+        '/foo',
+        './../foo',
+        '.git/foo',
+        # Check case folding.
+        '.GIT/foo',
+        'blah/.git/foo',
+        '.repo/foo',
+        '.repoconfig',
+        # Block ~ due to 8.3 filenames on Windows filesystems.
+        '~',
+        'foo~',
+        'blah/foo~',
+        # Block Unicode characters that get normalized out by filesystems.
+        u'foo\u200Cbar',
+    )
+    for path in PATHS:
+      self.assertRaises(
+          error.ManifestInvalidPathError, self.check_both, path, 'a')
+      self.assertRaises(
+          error.ManifestInvalidPathError, self.check_both, 'a', path)