#!/usr/bin/env python3 import os import sys import json import html import shutil from collections import OrderedDict from pathlib import Path from bs4 import BeautifulSoup import zipfile from urllib.request import urlretrieve from multiprocessing import Pool, cpu_count, Manager from functools import partial import traceback import gitlab def get_documentation_artifact_url(project_name='gstreamer/gstreamer', job_name='documentation', branch='main') -> str: """ Returns the URL of the latest artifact from GitLab for the specified job. Args: project_name (str): Name of the GitLab project job_name (str): Name of the job branch (str): Name of the git branch """ gl = gitlab.Gitlab("https://gitlab.freedesktop.org/") project = gl.projects.get(project_name) pipelines = project.pipelines.list(get_all=False) for pipeline in pipelines: if pipeline.ref != branch: continue job, = [j for j in pipeline.jobs.list(iterator=True) if j.name == job_name] if job.status != "success": continue return f"https://gitlab.freedesktop.org/{project_name}/-/jobs/{job.id}/artifacts/download" raise Exception("Could not find documentation artifact") def get_relative_prefix(file_path, docs_root): """ Returns the relative path prefix for a given HTML file. Args: file_path (Path): Path to the HTML file docs_root (Path): Root directory of the documentation """ rel_path = os.path.relpath(docs_root, file_path.parent) if rel_path == '.': return './' return '../' + '../' * rel_path.count(os.sep) def fix_relative_urls(element, prefix): """ Fixes relative URLs in a hotdoc component to include the correct prefix. Args: element: BeautifulSoup element containing hotdoc navigation or resources prefix: Prefix to add to relative URLs """ # Fix href attributes for tag in element.find_all(True, {'href': True}): url = tag['href'] if url.startswith(('http://', 'https://', 'mailto:', '#', 'javascript:')): continue if url.endswith('/') or '.' not in url.split('/')[-1]: if not url.endswith('index.html'): url = url.rstrip('/') + '/index.html' if ".html" in url and '?gi-language=' not in url: url += '?gi-language=rust' tag['href'] = prefix + url # Fix src attributes for tag in element.find_all(True, {'src': True}): url = tag['src'] if not url.startswith(('http://', 'https://', 'data:', 'javascript:')): if '?gi-language=' not in url: url += '?gi-language=rust' tag['src'] = prefix + url def extract_hotdoc_resources(index_html_soup, prefix): """ Extracts required CSS and JS resources from the main hotdoc page. Returns tuple of (css_links, js_scripts) """ head = index_html_soup.find('head') # Extract CSS links css_links = [link for link in head.find_all('link') if 'enable_search.css' not in link['href']] # Extract JS scripts js_scripts = [] for script in head.find_all('script'): src = script.get('src', '') if [unwanted for unwanted in ["trie_index.js", "prism-console-min.js", 'trie.js', 'language-menu.js'] if unwanted in src]: continue if 'language_switching.js' in script['src']: js_scripts.append(BeautifulSoup(' ''', 'html.parser')) js_scripts.append(script) return css_links, js_scripts def extract_hotdoc_nav(index_html_soup): """ Extracts the navigation bar from the main GStreamer page. Returns the navigation HTML. """ nav = index_html_soup.find('nav', class_='navbar') for tag in nav.find_all(True, {'href': True}): url = tag['href'] if "gstreamer/gi-index.html" in url: tag['href'] = "rust/stable/latest/docs/gstreamer/index.html" elif "libs.html" in url: tag['href'] = "rust/stable/latest/docs/index.html" return nav def get_hotdoc_components(docs_root, prefix): """ Reads the main GStreamer page and extracts required components. Returns tuple of (resources_html, nav_html) """ index_path = docs_root / "index.html" with open(index_path, 'r', encoding='utf-8') as f: content = f.read() soup = BeautifulSoup(content, 'html.parser') # Extract resources and navigation first css_links, js_scripts = extract_hotdoc_resources(soup, prefix) nav = extract_hotdoc_nav(soup) if not css_links: raise Exception("Failed to extract CSS links") if not js_scripts: raise Exception("Failed to extract JS scripts") if not nav: raise Exception("Failed to extract navigation") resources_soup = BeautifulSoup("
", 'html.parser') assert resources_soup.div for component in css_links + js_scripts: resources_soup.div.append(component) # Fix URLs in the extracted components fix_relative_urls(resources_soup, prefix) fix_relative_urls(nav, prefix) # Build final HTML resources_html = "\n".join(str(tag) for tag in resources_soup.div.contents) resources_html += f'\n