Source code for rsmtool.rsmcompare

#!/usr/bin/env python

"""
Script to compare two RSMTool experiments.

:author: Jeremy Biggs (jbiggs@ets.org)
:author: Anastassia Loukina (aloukina@ets.org)
:author: Nitin Madnani (nmadnani@ets.org)

:date: 10/25/2017
:organization: ETS
"""


import argparse
import glob
import logging
import os
import sys

from os.path import (abspath,
                     dirname,
                     exists,
                     join,
                     normpath)

from rsmtool import VERSION_STRING
from rsmtool.configuration_parser import ConfigurationParser, Configuration
from rsmtool.reader import DataReader
from rsmtool.reporter import Reporter
from rsmtool.utils import LogFormatter


def check_experiment_id(experiment_dir, experiment_id):
    """
    Check that the supplied ``experiment_dir`` contains
    the outputs for the supplied ``experiment_id``.

    Parameters
    ----------
    experiment_dir : str
        path to the directory with the experiment output
    experiment_id : str
        experiment_id of the original experiment used to generate the
        output

    Raises
    ------
    FileNotFoundError
        if the ``experument_dir`` does not contain any outputs
        for the ``experiment_id``
    """

    # list all possible output files which start with
    # experiment_id
    outputs = glob.glob(join(experiment_dir,
                             'output',
                             '{}_*.*'.format(experiment_id)))

    # raise an error if none exists
    if len(outputs) == 0:
        raise FileNotFoundError("The directory {} does not contain "
                                "any outputs of an rsmtool experiment "
                                "{}".format(experiment_dir, experiment_id))


[docs]def run_comparison(config_file_or_obj, output_dir): """ Run an ``rsmcompare`` experiment using the given configuration file and generate the report in the given directory. Parameters ---------- config_file_or_obj : str or Configuration Path to the experiment configuration file. Users can also pass a `Configuration` object that is in memory. output_dir : str Path to the experiment output directory. Raises ------ ValueError If any of the required fields are missing or ill-specified. """ logger = logging.getLogger(__name__) # Allow users to pass Configuration object to the # `config_file_or_obj` argument, rather than read file if not isinstance(config_file_or_obj, Configuration): # Instantiate configuration parser object parser = ConfigurationParser.get_configparser(config_file_or_obj) configuration = parser.read_normalize_validate_and_process_config(config_file_or_obj, context='rsmcompare') # get the directory where the configuration file lives configpath = dirname(config_file_or_obj) else: configuration = config_file_or_obj if configuration.filepath is not None: configpath = dirname(configuration.filepath) else: configpath = os.getcwd() # get the information about the "old" experiment experiment_id_old = configuration['experiment_id_old'] experiment_dir_old = DataReader.locate_files(configuration['experiment_dir_old'], configpath) if not experiment_dir_old: raise FileNotFoundError("The directory {} " "does not exist.".format(configuration['experiment_dir_old'])) else: csvdir_old = normpath(join(experiment_dir_old, 'output')) figdir_old = normpath(join(experiment_dir_old, 'figure')) if not exists(csvdir_old) or not exists(figdir_old): raise FileNotFoundError("The directory {} does not contain " "the output of an rsmtool " "experiment.".format(experiment_dir_old)) check_experiment_id(experiment_dir_old, experiment_id_old) # get the information about the "new" experiment experiment_id_new = configuration['experiment_id_new'] experiment_dir_new = DataReader.locate_files(configuration['experiment_dir_new'], configpath) if not experiment_dir_new: raise FileNotFoundError("The directory {} " "does not exist.".format(configuration['experiment_dir_new'])) else: csvdir_new = normpath(join(experiment_dir_new, 'output')) figdir_new = normpath(join(experiment_dir_new, 'figure')) if not exists(csvdir_new) or not exists(figdir_new): raise FileNotFoundError("The directory {} does not contain " "the output of an rsmtool " "experiment.".format(experiment_dir_new)) check_experiment_id(experiment_dir_new, experiment_id_new) # are there specific general report sections we want to include? general_report_sections = configuration['general_sections'] # what about the special or custom sections? special_report_sections = configuration['special_sections'] custom_report_section_paths = configuration['custom_sections'] # if custom report sections exist, locate sections; otherwise, create empty list if custom_report_section_paths: logger.info('Locating custom report sections') custom_report_sections = Reporter.locate_custom_sections(custom_report_section_paths, configpath) else: custom_report_sections = [] # get the section order section_order = configuration['section_order'] # get the subgroups if any subgroups = configuration.get('subgroups') # Initialize reporter reporter = Reporter() chosen_notebook_files = reporter.get_ordered_notebook_files(general_report_sections, special_report_sections, custom_report_sections, section_order, subgroups, model_type=None, context='rsmcompare') # add chosen notebook files to configuration configuration['chosen_notebook_files'] = chosen_notebook_files # now generate the comparison report logger.info('Starting report generation.') reporter.create_comparison_report(configuration, csvdir_old, figdir_old, csvdir_new, figdir_new, output_dir)
def main(): # set up the basic logging configuration formatter = LogFormatter() handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) logging.root.addHandler(handler) logging.root.setLevel(logging.INFO) # get a logger logger = logging.getLogger(__name__) # set up an argument parser parser = argparse.ArgumentParser(prog='rsmcompare') parser.add_argument('config_file', help="The JSON configuration file for " "this comparison") parser.add_argument('output_dir', nargs='?', default=os.getcwd(), help="The output directory where the report " "files for this comparison will be stored") parser.add_argument('-V', '--version', action='version', version=VERSION_STRING) # parse given command line arguments args = parser.parse_args() logger.info('Output directory: {}'.format(args.output_dir)) # convert all paths to absolute to make sure # all files can be found later config_file = abspath(args.config_file) output_dir = abspath(args.output_dir) # make sure that the given configuration file exists if not exists(config_file): raise FileNotFoundError("Main configuration file {} not " "found.".format(config_file)) # generate a comparison report run_comparison(config_file, output_dir) if __name__ == '__main__': main()