Remy Bohmer | 16c1328 | 2020-09-10 10:38:04 +0200 | [diff] [blame] | 1 | # -*- coding:utf-8 -*- |
| 2 | # |
| 3 | # Copyright (C) 2008 The Android Open Source Project |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | |
| 17 | import json |
| 18 | import os |
| 19 | import re |
| 20 | import sys |
| 21 | import traceback |
| 22 | |
| 23 | from error import HookError |
| 24 | from git_refs import HEAD |
| 25 | |
| 26 | from pyversion import is_python3 |
| 27 | if is_python3(): |
| 28 | import urllib.parse |
| 29 | else: |
| 30 | import imp |
| 31 | import urlparse |
| 32 | urllib = imp.new_module('urllib') |
| 33 | urllib.parse = urlparse |
| 34 | input = raw_input # noqa: F821 |
| 35 | |
| 36 | class RepoHook(object): |
| 37 | """A RepoHook contains information about a script to run as a hook. |
| 38 | |
| 39 | Hooks are used to run a python script before running an upload (for instance, |
| 40 | to run presubmit checks). Eventually, we may have hooks for other actions. |
| 41 | |
| 42 | This shouldn't be confused with files in the 'repo/hooks' directory. Those |
| 43 | files are copied into each '.git/hooks' folder for each project. Repo-level |
| 44 | hooks are associated instead with repo actions. |
| 45 | |
| 46 | Hooks are always python. When a hook is run, we will load the hook into the |
| 47 | interpreter and execute its main() function. |
| 48 | """ |
| 49 | |
| 50 | def __init__(self, |
| 51 | hook_type, |
| 52 | hooks_project, |
| 53 | topdir, |
| 54 | manifest_url, |
| 55 | abort_if_user_denies=False): |
| 56 | """RepoHook constructor. |
| 57 | |
| 58 | Params: |
| 59 | hook_type: A string representing the type of hook. This is also used |
| 60 | to figure out the name of the file containing the hook. For |
| 61 | example: 'pre-upload'. |
| 62 | hooks_project: The project containing the repo hooks. If you have a |
| 63 | manifest, this is manifest.repo_hooks_project. OK if this is None, |
| 64 | which will make the hook a no-op. |
| 65 | topdir: Repo's top directory (the one containing the .repo directory). |
| 66 | Scripts will run with CWD as this directory. If you have a manifest, |
| 67 | this is manifest.topdir |
| 68 | manifest_url: The URL to the manifest git repo. |
| 69 | abort_if_user_denies: If True, we'll throw a HookError() if the user |
| 70 | doesn't allow us to run the hook. |
| 71 | """ |
| 72 | self._hook_type = hook_type |
| 73 | self._hooks_project = hooks_project |
| 74 | self._manifest_url = manifest_url |
| 75 | self._topdir = topdir |
| 76 | self._abort_if_user_denies = abort_if_user_denies |
| 77 | |
| 78 | # Store the full path to the script for convenience. |
| 79 | if self._hooks_project: |
| 80 | self._script_fullpath = os.path.join(self._hooks_project.worktree, |
| 81 | self._hook_type + '.py') |
| 82 | else: |
| 83 | self._script_fullpath = None |
| 84 | |
| 85 | def _GetHash(self): |
| 86 | """Return a hash of the contents of the hooks directory. |
| 87 | |
| 88 | We'll just use git to do this. This hash has the property that if anything |
| 89 | changes in the directory we will return a different has. |
| 90 | |
| 91 | SECURITY CONSIDERATION: |
| 92 | This hash only represents the contents of files in the hook directory, not |
| 93 | any other files imported or called by hooks. Changes to imported files |
| 94 | can change the script behavior without affecting the hash. |
| 95 | |
| 96 | Returns: |
| 97 | A string representing the hash. This will always be ASCII so that it can |
| 98 | be printed to the user easily. |
| 99 | """ |
| 100 | assert self._hooks_project, "Must have hooks to calculate their hash." |
| 101 | |
| 102 | # We will use the work_git object rather than just calling GetRevisionId(). |
| 103 | # That gives us a hash of the latest checked in version of the files that |
| 104 | # the user will actually be executing. Specifically, GetRevisionId() |
| 105 | # doesn't appear to change even if a user checks out a different version |
| 106 | # of the hooks repo (via git checkout) nor if a user commits their own revs. |
| 107 | # |
| 108 | # NOTE: Local (non-committed) changes will not be factored into this hash. |
| 109 | # I think this is OK, since we're really only worried about warning the user |
| 110 | # about upstream changes. |
| 111 | return self._hooks_project.work_git.rev_parse('HEAD') |
| 112 | |
| 113 | def _GetMustVerb(self): |
| 114 | """Return 'must' if the hook is required; 'should' if not.""" |
| 115 | if self._abort_if_user_denies: |
| 116 | return 'must' |
| 117 | else: |
| 118 | return 'should' |
| 119 | |
| 120 | def _CheckForHookApproval(self): |
| 121 | """Check to see whether this hook has been approved. |
| 122 | |
| 123 | We'll accept approval of manifest URLs if they're using secure transports. |
| 124 | This way the user can say they trust the manifest hoster. For insecure |
| 125 | hosts, we fall back to checking the hash of the hooks repo. |
| 126 | |
| 127 | Note that we ask permission for each individual hook even though we use |
| 128 | the hash of all hooks when detecting changes. We'd like the user to be |
| 129 | able to approve / deny each hook individually. We only use the hash of all |
| 130 | hooks because there is no other easy way to detect changes to local imports. |
| 131 | |
| 132 | Returns: |
| 133 | True if this hook is approved to run; False otherwise. |
| 134 | |
| 135 | Raises: |
| 136 | HookError: Raised if the user doesn't approve and abort_if_user_denies |
| 137 | was passed to the consturctor. |
| 138 | """ |
| 139 | if self._ManifestUrlHasSecureScheme(): |
| 140 | return self._CheckForHookApprovalManifest() |
| 141 | else: |
| 142 | return self._CheckForHookApprovalHash() |
| 143 | |
| 144 | def _CheckForHookApprovalHelper(self, subkey, new_val, main_prompt, |
| 145 | changed_prompt): |
| 146 | """Check for approval for a particular attribute and hook. |
| 147 | |
| 148 | Args: |
| 149 | subkey: The git config key under [repo.hooks.<hook_type>] to store the |
| 150 | last approved string. |
| 151 | new_val: The new value to compare against the last approved one. |
| 152 | main_prompt: Message to display to the user to ask for approval. |
| 153 | changed_prompt: Message explaining why we're re-asking for approval. |
| 154 | |
| 155 | Returns: |
| 156 | True if this hook is approved to run; False otherwise. |
| 157 | |
| 158 | Raises: |
| 159 | HookError: Raised if the user doesn't approve and abort_if_user_denies |
| 160 | was passed to the consturctor. |
| 161 | """ |
| 162 | hooks_config = self._hooks_project.config |
| 163 | git_approval_key = 'repo.hooks.%s.%s' % (self._hook_type, subkey) |
| 164 | |
| 165 | # Get the last value that the user approved for this hook; may be None. |
| 166 | old_val = hooks_config.GetString(git_approval_key) |
| 167 | |
| 168 | if old_val is not None: |
| 169 | # User previously approved hook and asked not to be prompted again. |
| 170 | if new_val == old_val: |
| 171 | # Approval matched. We're done. |
| 172 | return True |
| 173 | else: |
| 174 | # Give the user a reason why we're prompting, since they last told |
| 175 | # us to "never ask again". |
| 176 | prompt = 'WARNING: %s\n\n' % (changed_prompt,) |
| 177 | else: |
| 178 | prompt = '' |
| 179 | |
| 180 | # Prompt the user if we're not on a tty; on a tty we'll assume "no". |
| 181 | if sys.stdout.isatty(): |
| 182 | prompt += main_prompt + ' (yes/always/NO)? ' |
| 183 | response = input(prompt).lower() |
| 184 | print() |
| 185 | |
| 186 | # User is doing a one-time approval. |
| 187 | if response in ('y', 'yes'): |
| 188 | return True |
| 189 | elif response == 'always': |
| 190 | hooks_config.SetString(git_approval_key, new_val) |
| 191 | return True |
| 192 | |
| 193 | # For anything else, we'll assume no approval. |
| 194 | if self._abort_if_user_denies: |
| 195 | raise HookError('You must allow the %s hook or use --no-verify.' % |
| 196 | self._hook_type) |
| 197 | |
| 198 | return False |
| 199 | |
| 200 | def _ManifestUrlHasSecureScheme(self): |
| 201 | """Check if the URI for the manifest is a secure transport.""" |
| 202 | secure_schemes = ('file', 'https', 'ssh', 'persistent-https', 'sso', 'rpc') |
| 203 | parse_results = urllib.parse.urlparse(self._manifest_url) |
| 204 | return parse_results.scheme in secure_schemes |
| 205 | |
| 206 | def _CheckForHookApprovalManifest(self): |
| 207 | """Check whether the user has approved this manifest host. |
| 208 | |
| 209 | Returns: |
| 210 | True if this hook is approved to run; False otherwise. |
| 211 | """ |
| 212 | return self._CheckForHookApprovalHelper( |
| 213 | 'approvedmanifest', |
| 214 | self._manifest_url, |
| 215 | 'Run hook scripts from %s' % (self._manifest_url,), |
| 216 | 'Manifest URL has changed since %s was allowed.' % (self._hook_type,)) |
| 217 | |
| 218 | def _CheckForHookApprovalHash(self): |
| 219 | """Check whether the user has approved the hooks repo. |
| 220 | |
| 221 | Returns: |
| 222 | True if this hook is approved to run; False otherwise. |
| 223 | """ |
| 224 | prompt = ('Repo %s run the script:\n' |
| 225 | ' %s\n' |
| 226 | '\n' |
| 227 | 'Do you want to allow this script to run') |
| 228 | return self._CheckForHookApprovalHelper( |
| 229 | 'approvedhash', |
| 230 | self._GetHash(), |
| 231 | prompt % (self._GetMustVerb(), self._script_fullpath), |
| 232 | 'Scripts have changed since %s was allowed.' % (self._hook_type,)) |
| 233 | |
| 234 | @staticmethod |
| 235 | def _ExtractInterpFromShebang(data): |
| 236 | """Extract the interpreter used in the shebang. |
| 237 | |
| 238 | Try to locate the interpreter the script is using (ignoring `env`). |
| 239 | |
| 240 | Args: |
| 241 | data: The file content of the script. |
| 242 | |
| 243 | Returns: |
| 244 | The basename of the main script interpreter, or None if a shebang is not |
| 245 | used or could not be parsed out. |
| 246 | """ |
| 247 | firstline = data.splitlines()[:1] |
| 248 | if not firstline: |
| 249 | return None |
| 250 | |
| 251 | # The format here can be tricky. |
| 252 | shebang = firstline[0].strip() |
| 253 | m = re.match(r'^#!\s*([^\s]+)(?:\s+([^\s]+))?', shebang) |
| 254 | if not m: |
| 255 | return None |
| 256 | |
| 257 | # If the using `env`, find the target program. |
| 258 | interp = m.group(1) |
| 259 | if os.path.basename(interp) == 'env': |
| 260 | interp = m.group(2) |
| 261 | |
| 262 | return interp |
| 263 | |
| 264 | def _ExecuteHookViaReexec(self, interp, context, **kwargs): |
| 265 | """Execute the hook script through |interp|. |
| 266 | |
| 267 | Note: Support for this feature should be dropped ~Jun 2021. |
| 268 | |
| 269 | Args: |
| 270 | interp: The Python program to run. |
| 271 | context: Basic Python context to execute the hook inside. |
| 272 | kwargs: Arbitrary arguments to pass to the hook script. |
| 273 | |
| 274 | Raises: |
| 275 | HookError: When the hooks failed for any reason. |
| 276 | """ |
| 277 | # This logic needs to be kept in sync with _ExecuteHookViaImport below. |
| 278 | script = """ |
| 279 | import json, os, sys |
| 280 | path = '''%(path)s''' |
| 281 | kwargs = json.loads('''%(kwargs)s''') |
| 282 | context = json.loads('''%(context)s''') |
| 283 | sys.path.insert(0, os.path.dirname(path)) |
| 284 | data = open(path).read() |
| 285 | exec(compile(data, path, 'exec'), context) |
| 286 | context['main'](**kwargs) |
| 287 | """ % { |
| 288 | 'path': self._script_fullpath, |
| 289 | 'kwargs': json.dumps(kwargs), |
| 290 | 'context': json.dumps(context), |
| 291 | } |
| 292 | |
| 293 | # We pass the script via stdin to avoid OS argv limits. It also makes |
| 294 | # unhandled exception tracebacks less verbose/confusing for users. |
| 295 | cmd = [interp, '-c', 'import sys; exec(sys.stdin.read())'] |
| 296 | proc = subprocess.Popen(cmd, stdin=subprocess.PIPE) |
| 297 | proc.communicate(input=script.encode('utf-8')) |
| 298 | if proc.returncode: |
| 299 | raise HookError('Failed to run %s hook.' % (self._hook_type,)) |
| 300 | |
| 301 | def _ExecuteHookViaImport(self, data, context, **kwargs): |
| 302 | """Execute the hook code in |data| directly. |
| 303 | |
| 304 | Args: |
| 305 | data: The code of the hook to execute. |
| 306 | context: Basic Python context to execute the hook inside. |
| 307 | kwargs: Arbitrary arguments to pass to the hook script. |
| 308 | |
| 309 | Raises: |
| 310 | HookError: When the hooks failed for any reason. |
| 311 | """ |
| 312 | # Exec, storing global context in the context dict. We catch exceptions |
| 313 | # and convert to a HookError w/ just the failing traceback. |
| 314 | try: |
| 315 | exec(compile(data, self._script_fullpath, 'exec'), context) |
| 316 | except Exception: |
| 317 | raise HookError('%s\nFailed to import %s hook; see traceback above.' % |
| 318 | (traceback.format_exc(), self._hook_type)) |
| 319 | |
| 320 | # Running the script should have defined a main() function. |
| 321 | if 'main' not in context: |
| 322 | raise HookError('Missing main() in: "%s"' % self._script_fullpath) |
| 323 | |
| 324 | # Call the main function in the hook. If the hook should cause the |
| 325 | # build to fail, it will raise an Exception. We'll catch that convert |
| 326 | # to a HookError w/ just the failing traceback. |
| 327 | try: |
| 328 | context['main'](**kwargs) |
| 329 | except Exception: |
| 330 | raise HookError('%s\nFailed to run main() for %s hook; see traceback ' |
| 331 | 'above.' % (traceback.format_exc(), self._hook_type)) |
| 332 | |
| 333 | def _ExecuteHook(self, **kwargs): |
| 334 | """Actually execute the given hook. |
| 335 | |
| 336 | This will run the hook's 'main' function in our python interpreter. |
| 337 | |
| 338 | Args: |
| 339 | kwargs: Keyword arguments to pass to the hook. These are often specific |
| 340 | to the hook type. For instance, pre-upload hooks will contain |
| 341 | a project_list. |
| 342 | """ |
| 343 | # Keep sys.path and CWD stashed away so that we can always restore them |
| 344 | # upon function exit. |
| 345 | orig_path = os.getcwd() |
| 346 | orig_syspath = sys.path |
| 347 | |
| 348 | try: |
| 349 | # Always run hooks with CWD as topdir. |
| 350 | os.chdir(self._topdir) |
| 351 | |
| 352 | # Put the hook dir as the first item of sys.path so hooks can do |
| 353 | # relative imports. We want to replace the repo dir as [0] so |
| 354 | # hooks can't import repo files. |
| 355 | sys.path = [os.path.dirname(self._script_fullpath)] + sys.path[1:] |
| 356 | |
| 357 | # Initial global context for the hook to run within. |
| 358 | context = {'__file__': self._script_fullpath} |
| 359 | |
| 360 | # Add 'hook_should_take_kwargs' to the arguments to be passed to main. |
| 361 | # We don't actually want hooks to define their main with this argument-- |
| 362 | # it's there to remind them that their hook should always take **kwargs. |
| 363 | # For instance, a pre-upload hook should be defined like: |
| 364 | # def main(project_list, **kwargs): |
| 365 | # |
| 366 | # This allows us to later expand the API without breaking old hooks. |
| 367 | kwargs = kwargs.copy() |
| 368 | kwargs['hook_should_take_kwargs'] = True |
| 369 | |
| 370 | # See what version of python the hook has been written against. |
| 371 | data = open(self._script_fullpath).read() |
| 372 | interp = self._ExtractInterpFromShebang(data) |
| 373 | reexec = False |
| 374 | if interp: |
| 375 | prog = os.path.basename(interp) |
| 376 | if prog.startswith('python2') and sys.version_info.major != 2: |
| 377 | reexec = True |
| 378 | elif prog.startswith('python3') and sys.version_info.major == 2: |
| 379 | reexec = True |
| 380 | |
| 381 | # Attempt to execute the hooks through the requested version of Python. |
| 382 | if reexec: |
| 383 | try: |
| 384 | self._ExecuteHookViaReexec(interp, context, **kwargs) |
| 385 | except OSError as e: |
| 386 | if e.errno == errno.ENOENT: |
| 387 | # We couldn't find the interpreter, so fallback to importing. |
| 388 | reexec = False |
| 389 | else: |
| 390 | raise |
| 391 | |
| 392 | # Run the hook by importing directly. |
| 393 | if not reexec: |
| 394 | self._ExecuteHookViaImport(data, context, **kwargs) |
| 395 | finally: |
| 396 | # Restore sys.path and CWD. |
| 397 | sys.path = orig_syspath |
| 398 | os.chdir(orig_path) |
| 399 | |
| 400 | def Run(self, user_allows_all_hooks, **kwargs): |
| 401 | """Run the hook. |
| 402 | |
| 403 | If the hook doesn't exist (because there is no hooks project or because |
| 404 | this particular hook is not enabled), this is a no-op. |
| 405 | |
| 406 | Args: |
| 407 | user_allows_all_hooks: If True, we will never prompt about running the |
| 408 | hook--we'll just assume it's OK to run it. |
| 409 | kwargs: Keyword arguments to pass to the hook. These are often specific |
| 410 | to the hook type. For instance, pre-upload hooks will contain |
| 411 | a project_list. |
| 412 | |
| 413 | Raises: |
| 414 | HookError: If there was a problem finding the hook or the user declined |
| 415 | to run a required hook (from _CheckForHookApproval). |
| 416 | """ |
| 417 | # No-op if there is no hooks project or if hook is disabled. |
| 418 | if ((not self._hooks_project) or (self._hook_type not in |
| 419 | self._hooks_project.enabled_repo_hooks)): |
| 420 | return |
| 421 | |
| 422 | # Bail with a nice error if we can't find the hook. |
| 423 | if not os.path.isfile(self._script_fullpath): |
| 424 | raise HookError('Couldn\'t find repo hook: "%s"' % self._script_fullpath) |
| 425 | |
| 426 | # Make sure the user is OK with running the hook. |
| 427 | if (not user_allows_all_hooks) and (not self._CheckForHookApproval()): |
| 428 | return |
| 429 | |
| 430 | # Run the hook with the same version of python we're using. |
| 431 | self._ExecuteHook(**kwargs) |