# Copyright (c) Facebook, Inc. and its affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. from contextlib import contextmanager import importlib.util import logging import os import shlex import subprocess as sp import typing as tp from pathlib import Path from .main import DecoratedMain from .log import fatal from .xp import XP logger = logging.getLogger(__name__) class CommandError(Exception): pass def run_command(command, **kwargs): proc = sp.run(command, stdout=sp.PIPE, stderr=sp.STDOUT, **kwargs) if proc.returncode: command_str = " ".join(shlex.quote(c) for c in command) raise CommandError( f"Command {command_str} failed ({proc.returncode}): \n" + proc.stdout.decode()) return proc.stdout.decode().strip() def check_repo_clean(root: Path, main: DecoratedMain): out = run_command(['git', 'status', '--porcelain']) filtered = [] # Here we try to detect the grids package and allow uncommitted changes # only to that folder. The rational is that as we edit the grid file, it is a pain # to constantly be commiting change to it and it should not impact the actual run code. grid_name = main.dora.grid_package if grid_name is None: grid_name = main.package + ".grids" spec = importlib.util.find_spec(grid_name) grid_path: tp.Optional[Path] = None if spec is not None: assert spec.origin is not None grid_path = Path(spec.origin).resolve().parent for line in out.split("\n"): if not line: continue parts = shlex.split(line) paths: tp.List[str] = [] if len(parts) == 2: paths.append(parts[1]) elif len(parts) == 4: assert parts[3] == "->" paths += [parts[1], parts[2]] else: assert "Invalid parts", parts line_clean = True for path in paths: if grid_path is None: line_clean = False break rpath = (root / path).resolve() try: rpath.relative_to(grid_path) except ValueError: line_clean = False if not line_clean: filtered.append(line) if filtered: files = '\n'.join(filtered) fatal("Repository is not clean! The following files should be commited " f"or git ignored: \n {files}") def get_git_root(): return Path(run_command(['git', 'rev-parse', '--show-toplevel'])).resolve() def get_git_commit(repo: Path = Path('.')): return run_command(['git', 'log', '-1', '--format=%H'], cwd=repo) def shallow_clone(source: Path, target: Path): tmp_target = target.parent / (target.name + ".tmp") run_command(['git', 'clone', '--depth=1', 'file://' + str(source), str(tmp_target)]) # We are not sure that there wasn't a new commit in between, so to make # sure the folder name is correct, we clone to a temporary name, then rename to the # actual commit in there. It seems there is no easy way to directly make a shallow # clone to a specific commit (only specific branch or tag). actual_commit = get_git_commit(tmp_target) actual_target = target.parent / actual_commit tmp_target.rename(actual_target) return actual_target def get_new_clone(main: DecoratedMain) -> Path: """Return a fresh clone in side the given path.""" source = get_git_root() commit = get_git_commit() check_repo_clean(source, main) codes = main.dora.dir / main.dora._codes codes.mkdir(parents=True, exist_ok=True) target = codes / commit if not target.exists(): target = shallow_clone(source, target) assert target.exists() return target @contextmanager def enter_clone(clone: Path): """Context manager that temporarily relocates to a clean clone of the current git repository. """ cwd = Path('.').resolve() root = get_git_root() relative_path = cwd.relative_to(root) os.environ['_DORA_ORIGINAL_DIR'] = str(cwd) os.chdir(clone / relative_path) try: yield finally: os.chdir(cwd) del os.environ['_DORA_ORIGINAL_DIR'] def assign_clone(xp: XP, clone: Path): assert xp.dora.git_save code = xp.code_folder if code.exists(): if code.is_symlink(): code.unlink() elif code.is_dir(): code.rename(code.parent / 'old_code') else: assert "code folder should be symlink or folder", code code.symlink_to(clone) AnyPath = tp.TypeVar("AnyPath", str, Path) def to_absolute_path(path: AnyPath) -> AnyPath: """When using `git_save`, this takes a potentially relative path with respect to the original execution folder and return an absolute path. This is required if you use relative path with respect to this original folder. When using both `git_save` and Hydra, two change of directory happens: - Dora moves to git clone - Hydra moves to XP folder Hydra provides a `to_absolute_path()` function. In order to simplify your code, if `git_save` was not used, and Hydra is in use, this will fallback to calling Hydra version, so that you only need to ever call this function to cover all cases. """ klass = type(path) _path = Path(path) if '_DORA_ORIGINAL_DIR' not in os.environ: # We did not use git_save, we check first if Hydra is used, # in which case we use it to convert to an absolute Path. try: import hydra.utils except ImportError: if not _path.is_absolute(): _path = Path(os.getcwd()) / _path else: _path = Path(hydra.utils.to_absolute_path(str(_path))) return klass(_path) else: # We used git_save, in which case we used the original dir saved by Dora. original_cwd = Path(os.environ['_DORA_ORIGINAL_DIR']) if _path.is_absolute(): return klass(_path) else: return klass(original_cwd / _path)
Memory