Source code for ontocheck.run_assessment

"""
Ontology Assessment Runner

Provides runner functions for the four OntoCheck assessment modes:

    Mode 1 -- Task-agnostic:  structural, labeling, accessibility, and
              naming-convention metrics applied to a single ontology.
    Mode 2 -- Task-specific Web ontology:  task-based Relevance/Accuracy
              validated against a knowledge graph (e.g., DBpedia via LC-QuAD).
    Mode 3 -- Task-based Scientific:  domain ontology assessed against
              competency questions encoded as SPARQL queries.
    Mode 4 -- Cross-Domain:  multiple ontologies merged and assessed against
              cross-domain competency questions.
"""

import logging
import sys
import csv
from pathlib import Path

from .altLabelCheck import mainAltLabelCheck_v_0_0_1
from .check_external_data_provider_links_ttl import check_external_data_provider_links_ttl
from .check_for_isolated_elements import check_for_isolated_elements
from .check_human_readable_license_ttl import check_human_readable_license_ttl
from .check_rdf_dump_accessibility_ttl import check_rdf_dump_accessibility_ttl
from .check_sparql_accessibility_ttl import check_sparql_accessibility_ttl
from .count_class_connected_components import count_class_connected_components
from .defCheck import mainDefCheck_v_0_0_1
from .find_duplicate_labels_from_graph import find_duplicate_labels_from_graph
from .get_properties_missing_domain_and_range import get_properties_missing_domain_and_range
from .leafNodeCheck import mainLeafNodeCheck_v_0_0_1
from .semanticConnection import mainSemanticConnection_v_0_0_1
from .mds_design_check import mds_design_check_v_0_0_1
from .spell_check import spell_check_v_0_0_1
from .check_class_name_capital import mainClassNameCapitalCheck_v_0_0_1
from .check_class_name_space import mainClassNameSpaceCheck_v_0_0_1
from .check_label import mainLabelCheck_v_0_0_1
from .class_search import mainClassSearch_v_0_0_1
from .task_based_metric import task_based_metric_v_0_0_1

METRIC_DISPATCHER = {
    "altLabelCheck": mainAltLabelCheck_v_0_0_1,
    "externalLinks": check_external_data_provider_links_ttl,
    "isolatedElements": check_for_isolated_elements,
    "humanLicense": check_human_readable_license_ttl,
    "rdfDump": check_rdf_dump_accessibility_ttl,
    "sparqlEndpoint": check_sparql_accessibility_ttl,
    "classConnections": count_class_connected_components,
    "definitionCheck": mainDefCheck_v_0_0_1,
    "duplicateLabels": find_duplicate_labels_from_graph,
    "missingDomainRange": get_properties_missing_domain_and_range,
    "leafNodeCheck": mainLeafNodeCheck_v_0_0_1,
    "semanticConnection": mainSemanticConnection_v_0_0_1,
    "mdsDesignCheck": mds_design_check_v_0_0_1,
    "spellCheck": spell_check_v_0_0_1,
    "classCapitalCheck": mainClassNameCapitalCheck_v_0_0_1,
    "classSpaceCheck": mainClassNameSpaceCheck_v_0_0_1,
    "checkLabel": mainLabelCheck_v_0_0_1,
    "searchClass": mainClassSearch_v_0_0_1,
}


# ---------------------------------------------------------------------------
# Logging helpers
# ---------------------------------------------------------------------------

def _setup_logging(output_log_file):
    """Configure file and console logging, returning the console handler."""
    logging.basicConfig(
        filename=output_log_file,
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        filemode="w",
    )
    console_handler = logging.StreamHandler(sys.stdout)
    logging.getLogger().addHandler(console_handler)
    return console_handler


def _teardown_logging(console_handler):
    """Remove the console handler added by ``_setup_logging``."""
    logging.getLogger().removeHandler(console_handler)


# ---------------------------------------------------------------------------
# Mode 1: Task-agnostic assessment
# ---------------------------------------------------------------------------


[docs]
def run_ontology_assessment(
    ttl_file,
    metrics,
    output_log_file="assessment.log",
    output_csv_file="assessment_scores.csv",
):
    """Run task-agnostic metrics on a single ontology (Mode 1).

    Parameters
    ----------
    ttl_file : str
        Path to the input Turtle (.ttl) ontology file.
    metrics : list of str or str
        Metric names to execute, or ``"all"`` to run every metric in
        ``METRIC_DISPATCHER``.
    output_log_file : str, optional
        Output log file path.
    output_csv_file : str, optional
        Output CSV file path.
    """
    console = _setup_logging(output_log_file)

    if metrics == "all":
        metrics_to_run = list(METRIC_DISPATCHER.keys())
        logging.info("Running all available metrics.")
    elif isinstance(metrics, (list, set, tuple)):
        metrics_to_run = list(metrics)
    else:
        raise ValueError(
            "The 'metrics' argument must be a list of metric names or the string 'all'."
        )

    logging.info(f"--- Starting ontology assessment for: {ttl_file} ---")
    logging.info(f"Metrics to run: {', '.join(metrics_to_run)}")

    results = []

    for metric_name in metrics_to_run:
        if metric_name not in METRIC_DISPATCHER:
            logging.warning(f"Metric '{metric_name}' not found. Skipping.")
            continue

        metric_function = METRIC_DISPATCHER[metric_name]
        logging.info(f"--- Running Metric: {metric_name} ---")

        try:
            score = metric_function(ttl_file)
            logging.info(f"Metric '{metric_name}' completed successfully.")
            results.append({"Metric": metric_name, "Score": score, "Status": "Success"})
        except Exception as e:
            logging.error(f"Metric '{metric_name}' failed with an error: {e}", exc_info=True)
            results.append({"Metric": metric_name, "Score": "N/A", "Status": f"Error: {e}"})

    _write_csv(results, output_csv_file)

    logging.info("--- Assessment Complete ---")
    _teardown_logging(console)



# ---------------------------------------------------------------------------
# Mode 2: Task-specific Web ontology assessment
# ---------------------------------------------------------------------------


[docs]
def run_web_ontology_assessment(
    ttl_file,
    questions,
    domain_prefixes,
    knowledge_graph,
    domain_ns_fragments=None,
    metrics=None,
    output_log_file="assessment.log",
    output_csv_file="assessment_scores.csv",
):
    """Assess a Web ontology against KGQA benchmark queries (Mode 2).

    Runs the task-based Relevance/Accuracy assessment using competency
    queries drawn from a knowledge-graph question-answering benchmark
    (e.g., LC-QuAD over DBpedia).  Optionally runs task-agnostic metrics
    as well.

    Parameters
    ----------
    ttl_file : str
        Path to the ontology Turtle file.
    questions : str or list of str
        Path to a JSON/Markdown file of SPARQL queries, or a list of raw
        SPARQL query strings.
    domain_prefixes : list of str
        Namespace prefixes used in the SPARQL queries (e.g., ``["dbo"]``).
    knowledge_graph : str
        Path to the knowledge-graph file (Turtle/RDF) used for validation
        context.
    domain_ns_fragments : list of str or None, optional
        Namespace URI fragments to restrict domain-term filtering.
    metrics : list of str or None, optional
        Task-agnostic metric names to run alongside the task-based
        assessment.  Pass ``"all"`` for every available metric.
    output_log_file : str, optional
        Output log file path.
    output_csv_file : str, optional
        Output CSV file path.
    """
    console = _setup_logging(output_log_file)

    logging.info("--- Mode 2: Task-specific Web Ontology Assessment ---")
    logging.info(f"Ontology: {ttl_file}")
    logging.info(f"Knowledge graph: {knowledge_graph}")

    result = task_based_metric_v_0_0_1(
        ttl_file=ttl_file,
        questions=questions,
        domain_prefixes=domain_prefixes,
        domain_ns_fragments=domain_ns_fragments,
    )

    _log_task_based_result(result)

    results = _task_based_result_to_rows(result)

    if metrics:
        logging.info("--- Running task-agnostic metrics ---")
        results.extend(_run_agnostic_metrics(ttl_file, metrics))

    _write_csv(results, output_csv_file)

    logging.info("--- Assessment Complete ---")
    _teardown_logging(console)

    return result



# ---------------------------------------------------------------------------
# Mode 3 & 4: Task-based Scientific / Cross-Domain assessment
# ---------------------------------------------------------------------------


[docs]
def run_task_based_assessment(
    ttl_files,
    questions,
    domain_prefixes,
    domain_ns_fragments=None,
    metrics=None,
    output_log_file="assessment.log",
    output_csv_file="assessment_scores.csv",
):
    """Assess one or more ontologies against competency questions (Modes 3/4).

    When a single ontology is provided this corresponds to Mode 3
    (task-based scientific assessment).  When multiple ontologies are
    provided they are merged and evaluated jointly, corresponding to
    Mode 4 (cross-domain assessment).

    Parameters
    ----------
    ttl_files : str or list of str
        Path(s) to Turtle (.ttl) ontology file(s).  A single path is
        accepted and will be wrapped in a list internally.
    questions : str or list of str
        Path to a JSON/Markdown file of SPARQL queries, or a list of raw
        SPARQL query strings.
    domain_prefixes : list of str
        Namespace prefixes used in the SPARQL queries (e.g., ``["mds"]``).
    domain_ns_fragments : list of str or None, optional
        Namespace URI fragments to restrict domain-term filtering.
    metrics : list of str or None, optional
        Task-agnostic metric names to run alongside the task-based
        assessment.  Pass ``"all"`` for every available metric.
    output_log_file : str, optional
        Output log file path.
    output_csv_file : str, optional
        Output CSV file path.

    Returns
    -------
    dict
        The result dictionary from ``task_based_metric_v_0_0_1``.
    """
    if isinstance(ttl_files, (str, Path)):
        ttl_files = [ttl_files]

    console = _setup_logging(output_log_file)

    if len(ttl_files) > 1:
        logging.info("--- Mode 4: Cross-Domain Ontology Assessment ---")
    else:
        logging.info("--- Mode 3: Task-based Scientific Ontology Assessment ---")
    logging.info(f"Ontologies: {', '.join(str(f) for f in ttl_files)}")

    result = task_based_metric_v_0_0_1(
        ttl_file=ttl_files,
        questions=questions,
        domain_prefixes=domain_prefixes,
        domain_ns_fragments=domain_ns_fragments,
    )

    _log_task_based_result(result)

    results = _task_based_result_to_rows(result)

    if metrics:
        logging.info("--- Running task-agnostic metrics ---")
        for f in ttl_files:
            logging.info(f"--- Task-agnostic metrics for: {f} ---")
            results.extend(_run_agnostic_metrics(str(f), metrics))

    _write_csv(results, output_csv_file)

    logging.info("--- Assessment Complete ---")
    _teardown_logging(console)

    return result



# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------

def _log_task_based_result(result):
    """Log the task-based Relevance/Accuracy results."""
    logging.info(f"Relevance (Recall):    {result['relevance']:.4f}")
    logging.info(f"Accuracy  (Precision): {result['accuracy']:.4f}")
    logging.info(f"Ontology terms  (T_o): {result['T_o_count']}")
    logging.info(f"Task terms      (T_a): {result['T_a_count']}")
    logging.info(f"Intersection:          {result['intersection']}")
    if result["missing_from_onto"]:
        logging.info(
            f"Missing from ontology: {', '.join(sorted(result['missing_from_onto']))}"
        )
    if result["unused_in_onto"]:
        logging.info(
            f"Unused ontology terms: {len(result['unused_in_onto'])} terms"
        )


def _task_based_result_to_rows(result):
    """Convert a task-based result dict to CSV-compatible row dicts."""
    return [
        {"Metric": "Relevance", "Score": f"{result['relevance']:.4f}", "Status": "Success"},
        {"Metric": "Accuracy", "Score": f"{result['accuracy']:.4f}", "Status": "Success"},
        {"Metric": "T_o_count", "Score": result["T_o_count"], "Status": "Success"},
        {"Metric": "T_a_count", "Score": result["T_a_count"], "Status": "Success"},
        {"Metric": "Intersection", "Score": result["intersection"], "Status": "Success"},
    ]


def _run_agnostic_metrics(ttl_file, metrics):
    """Run task-agnostic metrics and return a list of result row dicts."""
    if metrics == "all":
        metrics_to_run = list(METRIC_DISPATCHER.keys())
    elif isinstance(metrics, (list, set, tuple)):
        metrics_to_run = list(metrics)
    else:
        metrics_to_run = []

    rows = []
    for metric_name in metrics_to_run:
        if metric_name not in METRIC_DISPATCHER:
            logging.warning(f"Metric '{metric_name}' not found. Skipping.")
            continue

        metric_function = METRIC_DISPATCHER[metric_name]
        logging.info(f"--- Running Metric: {metric_name} ---")

        try:
            score = metric_function(ttl_file)
            logging.info(f"Metric '{metric_name}' completed successfully.")
            rows.append({"Metric": metric_name, "Score": score, "Status": "Success"})
        except Exception as e:
            logging.error(f"Metric '{metric_name}' failed with an error: {e}", exc_info=True)
            rows.append({"Metric": metric_name, "Score": "N/A", "Status": f"Error: {e}"})

    return rows


def _write_csv(results, output_csv_file):
    """Write a list of result row dicts to a CSV file."""
    try:
        with open(output_csv_file, "w", newline="", encoding="utf-8") as csvfile:
            fieldnames = ["Metric", "Score", "Status"]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(results)
        logging.info(f"--- Successfully wrote results to {output_csv_file} ---")
    except IOError as e:
        logging.error(f"Failed to write to CSV file {output_csv_file}: {e}")