Source code for presc.report.runner

from pathlib import Path
import shelve
from tempfile import TemporaryDirectory
import shutil
import subprocess
import warnings
import webbrowser

import yaml

from presc.utils import PrescError, include_exclude_list
from presc.configuration import PrescConfig
from presc import global_config

# Path to the report source dir
REPORT_SOURCE_PATH = Path(__file__).parent / "resources"
JB_CONFIG_FILENAME = "_config.yml"
JB_TOC_FILENAME = "_toc.yml"
REPORT_OUTPUT_DIR = "presc_report"
REPORT_EXECUTION_DIR = "_exec"
REPORT_MAIN_PAGE = "report.html"
SPHINX_INDEX_PAGE = Path("_build") / "html" / "index.html"
JB_CLEAN_LOG = "jupyterbook_clean.log"
JB_BUILD_LOG = "jupyterbook_build.log"
# Path to the store for the inputs to the report, relative to the execution dir
# for the report.
CONTEXT_STORE_BASENAME = "_context_store"


def _updated_jb_config(report_config):
    """Override default jupyter-book options.

    report_config: PRESC config options for the report

    Returns the updated JB config file as a YAML-formatted string that can be
    written to a _config.yml.
    """
    with open(REPORT_SOURCE_PATH / JB_CONFIG_FILENAME) as f:
        jb_config = yaml.load(f, Loader=yaml.FullLoader)

    jb_config["title"] = report_config["title"].get()
    jb_config["author"] = report_config["author"].get()

    # Add any page exclusions
    # First compile the overall list of report pages from the TOC.
    with open(REPORT_SOURCE_PATH / JB_TOC_FILENAME) as f:
        toc_str = f.read()

    stripped_lines = [x.strip() for x in toc_str.split("\n")]
    all_pages = [x[8:] for x in stripped_lines if x.startswith("- file: ")]
    incl_pages = include_exclude_list(
        all_pages,
        report_config["evaluations_include"].get(),
        report_config["evaluations_exclude"].get(),
    )
    if "landing" not in incl_pages:
        incl_pages.append("landing")

    to_exclude = [f"{p}.ipynb" for p in all_pages if p not in incl_pages]
    if to_exclude:
        jb_config["exclude_patterns"] = to_exclude

    return yaml.dump(jb_config)


[docs]class ReportRunner: """Main entrypoint to run the PRESC report for the given modeling inputs. The report is written to `<output_path>/presc_report`. If this dir already exists, it will be overwritten. To generate the report: ``` pr = ReportRunner() pr.run(...) ``` The path to the report is accessible as `pr.report_html` and will open in the default browser by calling `pr.open()` Attributes ---------- output_path : str Path to the dir where the report will be written. If not specified, defaults to the current working dir. execution_path : str Path from which the report is built. If not specified, a temporary dir is used. config_filepath : str Path to a YAML file listing overrides to the default config options. """ def __init__(self, output_path=".", execution_path=None, config_filepath=None): report_config = PrescConfig(global_config) if config_filepath: report_config.update_from_file(config_filepath) self.config = report_config # Path where the report output is written. # Outputs are nested in a subdir. self.output_path = Path(output_path) / REPORT_OUTPUT_DIR self.output_path.mkdir(parents=True, exist_ok=True) # Path where the report is built from. # The report source files are copied here, and the model inputs are # written to a data store. # If missing, a temp dir will be used on execution. self.execution_path = None if execution_path is not None: self.execution_path = Path(execution_path) / REPORT_EXECUTION_DIR # Build artifacts: # The main entry page for the report. self.report_main_page = self.output_path / SPHINX_INDEX_PAGE # Log files for jupyter-book execution. self.jb_clean_log = self.output_path / JB_CLEAN_LOG self.jb_build_log = self.output_path / JB_BUILD_LOG # The main page will be linked to the top-level output dir, if possible. self._linked_main_page = self.output_path / REPORT_MAIN_PAGE # Cache the process results from running jupyter-book commands for # debugging. self._jb_clean_result = None self._jb_build_result = None def _presc_artifacts(self): """List of paths to remove from the top-level output dir on clean.""" return [self._linked_main_page, self.jb_clean_log, self.jb_build_log]
[docs] def run(self, model, test_dataset, train_dataset=None, settings=None, clean=True): """Runs the PRESC report for the given modeling inputs. The report is written to `<output_path>/presc_report`. If this dir already exists, it will be overwritten. Parameters ---------- model: presc.model.ClassificationModel A pre-trained ClassificationModel instance to evaluate test_dataset : presc.dataset.Dataset A test Dataset instance used to evaluate model performance train_dataset: presc.dataset.Dataset The Dataset instance used to train the model. This is not required for every evaluation. settings : dict A dict specifying option values to override report settings, eg. `{"report.title": "My Report"}`. clean : book Should previous outputs be cleaned? Default: True """ if settings: run_config = PrescConfig(self.config) run_config.set(settings) else: run_config = self.config if clean: self.clean() tmpdir = None exec_path = None if self.execution_path: # If using a user-defined execution path, need to make sure # it doesn't exist for `shutil.copytree` to work. # Note that this will only remove the nested subdir, not the actual # user-specified dir. if self.execution_path.exists(): shutil.rmtree(self.execution_path) exec_path = self.execution_path else: # Create a temp dir to run the build from. # We set up the temp dir here rather than using jupyter-book's # `run_in_temp` option so that we have access to the temp path. tmpdir = TemporaryDirectory() exec_path = Path(tmpdir.name) / REPORT_EXECUTION_DIR # Copy the report source files to the execution dir and # execute from there. The data store for the inputs is saved to # the same dir. That way, since the notebooks' working dir on execution # is set to where they are located by jupyter-book, they can find # the data store without needed to know the calling path. try: shutil.copytree(REPORT_SOURCE_PATH, exec_path) except shutil.Error as e: msg = f"Failed to copy report source to execution dir {exec_path}" raise PrescError(msg) from e # Update the default JB config files based on the PRESC config options. with open(exec_path / JB_CONFIG_FILENAME, "w") as f: f.write(_updated_jb_config(run_config["report"])) # Write the inputs to the data store. ctx = Context(store_dir=exec_path) ctx.store_inputs( model=model, test_dataset=test_dataset, train_dataset=train_dataset, config=run_config, ) # Build the report. self._run_jb_build(exec_path) if tmpdir: tmpdir.cleanup() # The build should have created index.html at the `report_main_page` # path. if self.report_main_page.exists(): # Symlink the main page to the top level for convenience. try: main_page_target = self.report_main_page.relative_to( self._linked_main_page.parent ) self._linked_main_page.symlink_to(main_page_target) except OSError: pass else: msg = f"The expected report main page {self.report_main_page} does not appear to exist." msg += " There may have been an error generating the report." msg += f" Output is written to {self.jb_build_log}" warnings.warn(msg)
@property def report_html(self): """The main page of the HTML report.""" # Return symlink, if available, for a more user-friendly experience. if self._linked_main_page.exists(): # Resolve to an absolute path up to the symlink. report_path = ( self._linked_main_page.parent.resolve() / self._linked_main_page.name ) else: report_path = self.report_main_page.resolve() if not report_path.exists(): msg = "Report file does not appear to exist." msg += " Make sure the report has already been built." raise AttributeError(msg) return str(report_path)
[docs] def open(self): """Open the report in the default web browser.""" webbrowser.open_new_tab(f"file://{self.report_html}")
[docs] def clean(self): """Remove artifacts from a previous run, if any.""" for p in self._presc_artifacts(): try: p.unlink() except FileNotFoundError: pass self._run_jb_clean()
def _run_jb_clean(self): """Run `jupyter-book clean`.""" with open(self.jb_clean_log, "w") as outfile: result = subprocess.run( ["jupyter-book", "clean", self.output_path], stdout=outfile, stderr=subprocess.STDOUT, ) if result.returncode > 0: msg = f"`jupyter-book clean {self.output_path} did not succeed." msg += f" Output is written to {self.jb_clean_log}" warnings.warn(msg) self._jb_clean_result = result def _run_jb_build(self, input_path): """Run `jupyter-book build` on the given path.""" with open(self.jb_build_log, "w") as outfile: result = subprocess.run( [ "jupyter-book", "build", "--path-output", str(self.output_path.resolve()), str(input_path.resolve()), ], stdout=outfile, stderr=subprocess.STDOUT, ) if result.returncode > 0: msg = f"`jupyter-book build {input_path} did not succeed." msg += f" Output is written to {self.jb_build_log}" warnings.warn(msg) self._jb_build_result = result
[docs]class Context: """Persistent data store for sharing report inputs across notebooks. Note that the store implementation does not support concurrent access. It is up to the caller to ensure that multiple instances each have a unique store location. Attributes ---------- store_dir : str The dir to contain the data store, implemented as one or more database files. If not specified, defaults to the current working dir. """ def __init__(self, store_dir="."): store_path = Path(store_dir) / CONTEXT_STORE_BASENAME self._store_path = str(store_path.resolve())
[docs] def store_inputs( self, model=None, test_dataset=None, train_dataset=None, config=None ): """Write the report inputs to the data store. Any existing values will be overwritten. Parameters ---------- model: presc.model.ClassificationModel A ClassificationModel instance test_dataset: presc.dataset.Dataset A Dataset instance train_dataset: presc.dataset.Dataset A Dataset instance config: dict A dict of config options """ with shelve.open(self._store_path) as ctx: if model: ctx["model"] = model if test_dataset: ctx["test_dataset"] = test_dataset if train_dataset: ctx["train_dataset"] = train_dataset if config: ctx["config"] = config
def _get(self, key): try: with shelve.open(self._store_path, flag="r") as ctx: val = ctx[key] return val except KeyError: raise PrescError(f"Could not find stored value for '{key}'") @property def model(self): return self._get("model") @property def test_dataset(self): return self._get("test_dataset") @property def train_dataset(self): return self._get("train_dataset") @property def config(self): return self._get("config")