Source code for ontocheck.check_human_readable_license_ttl

from rdflib import OWL, RDFS, SKOS
import rdflib
import requests

[docs] def check_human_readable_license_ttl(ttl_file): """ L2 - Human-readable license detection Detects the presence of human-readable licensing information within a TTL file. This metric evaluates whether the dataset provides clear licensing terms that users can understand without legal expertise. The function searches for common license-related keywords in both RDF literals and TTL file comments, including references to popular licenses like Creative Commons, GPL, MIT, Apache, and BSD. Author: Redad Mehdi Version: 0.0.1 Parameters: ----------- ttl_file : str Path to the Turtle (.ttl) file to analyze Returns: -------- int Binary score (0 or 1) - 0: No human-readable license information found - 1: License-related keywords detected Notes: ------ Keywords searched include: 'license', 'licence', 'copyright', 'terms of use', 'creative commons', 'GPL', 'MIT', 'Apache', 'BSD' Example: -------- >>> score = check_human_readable_license_ttl('dataset.ttl') >>> if score: ... print("Human-readable license information found") ... else: ... print("No license information detected") References: ----------- Zaveri, A., Rula, A., Maurino, A., Pietrobon, R., Lehmann, J., & Auer, S. (2015). Quality assessment for Linked Data: A Survey: A systematic literature review and conceptual framework. Semantic Web, 7(1), 63-93. Hogan, A., Umbrich, J., Harth, A., Cyganiak, R., Polleres, A., & Decker, S. (2012). An empirical survey of Linked Data conformance. Journal of Web Semantics, 14, 14-44. """ g = rdflib.Graph() g.parse(ttl_file, format='turtle') license_keywords = ['license', 'licence', 'copyright', 'terms of use', 'creative commons', 'GPL', 'MIT', 'Apache', 'BSD'] # Check all literal values in the graph for s, p, o in g: if isinstance(o, rdflib.Literal): text_lower = str(o).lower() for keyword in license_keywords: if keyword in text_lower: return 1 # Also check comments in the TTL file directly with open(ttl_file, 'r', encoding='utf-8') as f: content = f.read().lower() for keyword in license_keywords: if keyword in content: return 1 return 0