#!/usr/bin/env python3 import glob import getopt import os import sys import shutil import logging """ correct_references.py: This script scans all .dita files found in the as-argument-passed folder and creates a dictionary of topic id and file names. This is done since DITA expects href targets in the form of filename#topic-id. Then all the href targets found in man_VBoxManage... files are prefixed with files names. """ __copyright__ = \ """ Copyright (C) 2009-2023 Oracle and/or its affiliates. This file is part of VirtualBox base platform packages, as available from https://www.virtualbox.org. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, in version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . SPDX-License-Identifier: GPL-3.0-only """ """ key is the id, value is the name of the containing file. """ xref_dictionary = {} """" ================================================================= Cross reference utility functions to add file name to target name. ================================================================= """ # Search for hrefs and return a tuple where 0th element is target and 1st element is its index in @p line def extractHrefTarget(line, start = 0): target = "" pointer0 = line.find("href=\"", start) if pointer0 == -1: return (target, -1) pointer0 += len("href=\"") pointer1 = line.find("\"", pointer0) if pointer1 == -1: return (target, -1) return (line[pointer0:pointer1], pointer0) # Return list of href targets contained in @p line def extractHrefList(line): targets = [] index = 0 while index < len(line): ttarget = extractHrefTarget(line, index) index = ttarget[1] if index == -1: break targets.append(ttarget[0]) return targets def correctHrefTargets(file_content): for idx, line in enumerate(file_content): if "href" in line: targets = extractHrefList(line) newline = line for target in targets: ## @todo r=bird: This never works, os.path.basename is a function/method ## object and the dictionary value is a string. It is also ## very inefficient way of parsing the line via a string ## list. Just use extractHrefTarget directly. if target in xref_dictionary and os.path.basename != xref_dictionary[target]: old = "\"" + target + "\"" new = "\"" + xref_dictionary[target] + "#" + target + "\"" newline = newline.replace(old, new) file_content[idx] = newline def createXrefIdDictionary(ditafolder): xref_dictionary.clear() dita_files = glob.glob(ditafolder + "/*.dita") for file in dita_files: file_content = open(file, 'r', encoding="utf-8") for line in file_content: if "reference" in line or "topic" in line or "section" in line: start = line.find("id=\"") if start != -1: start += len("id=\"") end = line.find("\"", start) if end != -1: id = line[start:end] if len(id) > 0: if id not in xref_dictionary: xref_dictionary[id] = os.path.basename(file) else: logging.warning('Non unique topic/section id %s in file %s. This is already found in %s' % (id, os.path.basename(file), os.path.basename(xref_dictionary[id]))) def usage(arg): print('correct_references.py -d ') sys.exit(arg) def main(): ditafolder = '' try: opts, args = getopt.getopt(sys.argv[1:],"hd:") except getopt.GetoptError as err: print(err) usage(2) for opt, arg in opts: if opt == '-h': usage(0) elif opt in ("-d"): ditafolder = arg if not ditafolder: logging.error('No folder is provided. Exiting') usage(2) createXrefIdDictionary(ditafolder) vboxmanage_dita_files = glob.glob(ditafolder + "/man_V*dita") for file in vboxmanage_dita_files: file_handle = open(file, 'r', encoding="utf-8") file_content = file_handle.readlines() correctHrefTargets(file_content) file_handle.close() file_handle = open(file, 'w', encoding="utf-8") file_handle.write("".join(file_content)) file_handle.close() if __name__ == "__main__": main()