mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-02-24 15:06:38 +00:00
Since the documentation job is manually triggered, the latest docs might not be in the most recent pipelines. Change the pipeline listing to iterate over all pipelines until we find the last successful documentation build. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8463>
564 lines
18 KiB
Python
Executable file
564 lines
18 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import html
|
|
import shutil
|
|
from collections import OrderedDict
|
|
from pathlib import Path
|
|
from bs4 import BeautifulSoup
|
|
import zipfile
|
|
from urllib.request import urlretrieve
|
|
from multiprocessing import Pool, cpu_count, Manager
|
|
from functools import partial
|
|
import traceback
|
|
import gitlab
|
|
|
|
|
|
def get_documentation_artifact_url(project_name='gstreamer/gstreamer',
|
|
job_name='documentation',
|
|
branch='main') -> str:
|
|
"""
|
|
Returns the URL of the latest artifact from GitLab for the specified job.
|
|
|
|
Args:
|
|
project_name (str): Name of the GitLab project
|
|
job_name (str): Name of the job
|
|
branch (str): Name of the git branch
|
|
"""
|
|
gl = gitlab.Gitlab("https://gitlab.freedesktop.org/")
|
|
project = gl.projects.get(project_name)
|
|
pipelines = project.pipelines.list(iterator=True)
|
|
for pipeline in pipelines:
|
|
if pipeline.ref != branch:
|
|
continue
|
|
|
|
job, = [j for j in pipeline.jobs.list(iterator=True)
|
|
if j.name == job_name]
|
|
if job.status != "success":
|
|
continue
|
|
|
|
return f"https://gitlab.freedesktop.org/{project_name}/-/jobs/{job.id}/artifacts/download"
|
|
|
|
raise Exception("Could not find documentation artifact")
|
|
|
|
|
|
def get_relative_prefix(file_path, docs_root):
|
|
"""
|
|
Returns the relative path prefix for a given HTML file.
|
|
|
|
Args:
|
|
file_path (Path): Path to the HTML file
|
|
docs_root (Path): Root directory of the documentation
|
|
"""
|
|
rel_path = os.path.relpath(docs_root, file_path.parent)
|
|
if rel_path == '.':
|
|
return './'
|
|
return '../' + '../' * rel_path.count(os.sep)
|
|
|
|
|
|
def fix_relative_urls(element, prefix):
|
|
"""
|
|
Fixes relative URLs in a hotdoc component to include the correct prefix.
|
|
|
|
Args:
|
|
element: BeautifulSoup element containing hotdoc navigation or resources
|
|
prefix: Prefix to add to relative URLs
|
|
"""
|
|
# Fix href attributes
|
|
for tag in element.find_all(True, {'href': True}):
|
|
url = tag['href']
|
|
if url.startswith(('http://', 'https://', 'mailto:', '#', 'javascript:')):
|
|
continue
|
|
|
|
if url.endswith('/') or '.' not in url.split('/')[-1]:
|
|
if not url.endswith('index.html'):
|
|
url = url.rstrip('/') + '/index.html'
|
|
|
|
if ".html" in url and '?gi-language=' not in url:
|
|
url += '?gi-language=rust'
|
|
|
|
tag['href'] = prefix + url
|
|
|
|
# Fix src attributes
|
|
for tag in element.find_all(True, {'src': True}):
|
|
url = tag['src']
|
|
if not url.startswith(('http://', 'https://', 'data:', 'javascript:')):
|
|
if '?gi-language=' not in url:
|
|
url += '?gi-language=rust'
|
|
tag['src'] = prefix + url
|
|
|
|
|
|
def extract_hotdoc_resources(index_html_soup, prefix):
|
|
"""
|
|
Extracts required CSS and JS resources from the main hotdoc page.
|
|
Returns tuple of (css_links, js_scripts)
|
|
"""
|
|
head = index_html_soup.find('head')
|
|
|
|
# Extract CSS links
|
|
css_links = [link for link in head.find_all('link') if 'enable_search.css'
|
|
not in link['href']]
|
|
|
|
# Extract JS scripts
|
|
js_scripts = []
|
|
for script in head.find_all('script'):
|
|
src = script.get('src', '')
|
|
if [unwanted for unwanted in ["trie_index.js", "prism-console-min.js", 'trie.js', 'language-menu.js'] if unwanted in src]:
|
|
continue
|
|
|
|
if 'language_switching.js' in script['src']:
|
|
js_scripts.append(BeautifulSoup('<script src="assets/js/utils.js" />', 'html.parser'))
|
|
|
|
# Inject necessary data for the 'language_switching.js' script to
|
|
# properly populate the Language dropdown menu for us.
|
|
js_scripts.append(BeautifulSoup(f'''
|
|
<script>
|
|
utils.hd_context.project_url_path = "/../{prefix}libs.html";
|
|
utils.hd_context.gi_languages = ['c', 'python', 'javascript', 'rust'];
|
|
</script>
|
|
''', 'html.parser'))
|
|
|
|
js_scripts.append(script)
|
|
|
|
return css_links, js_scripts
|
|
|
|
|
|
def extract_hotdoc_nav(index_html_soup):
|
|
"""
|
|
Extracts the navigation bar from the main GStreamer page.
|
|
Returns the navigation HTML.
|
|
"""
|
|
nav = index_html_soup.find('nav', class_='navbar')
|
|
|
|
for tag in nav.find_all(True, {'href': True}):
|
|
url = tag['href']
|
|
if "gstreamer/gi-index.html" in url:
|
|
tag['href'] = "rust/stable/latest/docs/gstreamer/index.html"
|
|
elif "libs.html" in url:
|
|
tag['href'] = "rust/stable/latest/docs/index.html"
|
|
|
|
return nav
|
|
|
|
|
|
def get_hotdoc_components(docs_root, prefix):
|
|
"""
|
|
Reads the main GStreamer page and extracts required components.
|
|
|
|
Returns tuple of (resources_html, nav_html)
|
|
"""
|
|
index_path = docs_root / "index.html"
|
|
with open(index_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
soup = BeautifulSoup(content, 'html.parser')
|
|
|
|
# Extract resources and navigation first
|
|
css_links, js_scripts = extract_hotdoc_resources(soup, prefix)
|
|
nav = extract_hotdoc_nav(soup)
|
|
if not css_links:
|
|
raise Exception("Failed to extract CSS links")
|
|
if not js_scripts:
|
|
raise Exception("Failed to extract JS scripts")
|
|
if not nav:
|
|
raise Exception("Failed to extract navigation")
|
|
|
|
resources_soup = BeautifulSoup("<div></div>", 'html.parser')
|
|
assert resources_soup.div
|
|
for component in css_links + js_scripts:
|
|
resources_soup.div.append(component)
|
|
|
|
# Fix URLs in the extracted components
|
|
fix_relative_urls(resources_soup, prefix)
|
|
fix_relative_urls(nav, prefix)
|
|
|
|
# Build final HTML
|
|
resources_html = "\n".join(str(tag) for tag in resources_soup.div.contents)
|
|
resources_html += f'\n<script src="{prefix}assets/rustdoc/js/theme-sync.js" />'
|
|
nav_html = str(nav) if nav else ""
|
|
|
|
return resources_html, nav_html
|
|
|
|
|
|
def modify_rustdoc_html_file(file_path, docs_root):
|
|
"""Modifies a single HTML file to include hotdoc navigation."""
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Calculate the relative path prefix for this file
|
|
prefix = get_relative_prefix(file_path, docs_root)
|
|
|
|
# Get hotdoc components with fixed URLs
|
|
resources_html, nav_html = get_hotdoc_components(docs_root, prefix)
|
|
|
|
soup = BeautifulSoup(content, 'html.parser')
|
|
|
|
remove_rustdoc_settings(soup)
|
|
if not add_rust_api_menu(soup, prefix):
|
|
raise Exception("Failed to add Rust API menu")
|
|
|
|
# Add hotdoc resources to head
|
|
head = soup.find('head')
|
|
if not head or not resources_html:
|
|
raise Exception("Failed to add get hotdoc components")
|
|
|
|
rust_versions = OrderedDict({
|
|
"Stable": f'{prefix}rust/stable/latest/docs/index.html',
|
|
"Development": f'{prefix}rust/git/docs/index.html',
|
|
})
|
|
|
|
for file in (docs_root / "rust" / "stable").iterdir():
|
|
if file.name == "latest":
|
|
continue
|
|
|
|
if file.is_dir() and (file / "index.html").exists():
|
|
rust_versions[file.name] = f'{prefix}rust/stable/{file.name}/docs/index.html'
|
|
|
|
head.insert(0, BeautifulSoup(
|
|
f'''<meta id="hotdoc-rust-info"
|
|
hotdoc-root-prefix="{prefix}"
|
|
hotdoc-rustdoc-versions="{html.escape(json.dumps(rust_versions))}" />''', 'html.parser'))
|
|
resources = BeautifulSoup(resources_html, 'html.parser')
|
|
styles = BeautifulSoup(f"<link rel=\"stylesheet\" href=\"{prefix}assets/rustdoc/css/rustdoc-in-hotdoc.css\" />", 'html.parser')
|
|
head.append(resources)
|
|
head.append(styles)
|
|
|
|
# Add hotdoc navigation
|
|
body = soup.find('body')
|
|
if body and nav_html:
|
|
nav = BeautifulSoup(nav_html, 'html.parser')
|
|
first_child = body.find(True)
|
|
if first_child:
|
|
first_child.insert_before(nav)
|
|
|
|
# Write modified content back to file
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
f.write(str(soup))
|
|
|
|
return True
|
|
|
|
|
|
def remove_rustdoc_settings(soup):
|
|
"""Removes the entire rustdoc toolbar."""
|
|
toolbar = soup.find('rustdoc-toolbar')
|
|
if toolbar:
|
|
toolbar.decompose()
|
|
|
|
|
|
def copy_rustdoc_integration_assets(docs_dir):
|
|
"""Ensures the rustdoc assets directory exists."""
|
|
asset_dir = docs_dir / "assets" / "rustdoc"
|
|
asset_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
src_asset_dir = Path(__file__).parent / "assets"
|
|
shutil.copytree(src_asset_dir, asset_dir, dirs_exist_ok=True)
|
|
|
|
# Get list of non-sys Rust crates
|
|
latest_file = docs_dir / "rust" / "stable" / "latest"
|
|
try:
|
|
with open(latest_file, 'r') as f:
|
|
version = f.read().strip()
|
|
except IsADirectoryError:
|
|
version = 'latest'
|
|
|
|
rust_docs_path = docs_dir / "rust" / "stable" / version / "docs"
|
|
if not rust_docs_path.exists():
|
|
print(f"Warning: Rust docs directory not found at {rust_docs_path}")
|
|
return
|
|
|
|
crates = []
|
|
for item in rust_docs_path.iterdir():
|
|
if item.is_dir() and not item.name.endswith('_sys') and (item / "index.html").exists():
|
|
crates.append(item.name)
|
|
|
|
crates.sort() # Sort alphabetically
|
|
crates_renames = {
|
|
"gstreamer": "core",
|
|
"gstreamer_gl": "OpenGL",
|
|
"gstreamer_gl_egl": "OpenGL EGL",
|
|
"gstreamer_gl_wayland": "OpenGL Wayland",
|
|
"gstreamer_gl_x11": "OpenGL X11",
|
|
}
|
|
|
|
rs_fixer_script = asset_dir / "js/sitemap-rs-fixer.js"
|
|
with rs_fixer_script.open("r") as f:
|
|
script_template = f.read()
|
|
|
|
# Replace the placeholder values
|
|
script_content = script_template.replace(
|
|
"CRATES_LIST", str(crates)
|
|
).replace(
|
|
"CRATES_RENAMES", str(crates_renames)
|
|
)
|
|
|
|
# Write the modified script
|
|
print('\nAdding crates information into sitemap-rs-fixer.js')
|
|
with rs_fixer_script.open("w") as f:
|
|
f.write(script_content)
|
|
|
|
return asset_dir
|
|
|
|
|
|
def add_rust_api_menu(soup, prefix=''):
|
|
"""
|
|
Adds a script to dynamically insert Rust into the language dropdown menu.
|
|
|
|
Args:
|
|
soup: BeautifulSoup object of the HTML content
|
|
|
|
Returns:
|
|
bool: True if modification was needed and successful, False otherwise
|
|
"""
|
|
# Check if script already exists
|
|
existing_scripts = soup.find_all('script')
|
|
to_remove = []
|
|
for script in existing_scripts:
|
|
if 'language-menu.js' in script.get('src', ''):
|
|
to_remove.append(script)
|
|
break
|
|
for script in to_remove:
|
|
existing_scripts.remove(script)
|
|
|
|
# Add script to the end of head
|
|
head = soup.find('head')
|
|
if not head:
|
|
raise Exception("Failed to find <head> tag")
|
|
script_tag = BeautifulSoup(f'<script src="{prefix}assets/rustdoc/js/language-menu.js" />', 'html.parser')
|
|
|
|
head.append(script_tag)
|
|
return True
|
|
|
|
|
|
def modify_hotdoc_html_file(file_path):
|
|
"""Modifies a single hotdoc HTML file to add the Rust API menu."""
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
soup = BeautifulSoup(content, 'html.parser')
|
|
|
|
# Add Rust API menu
|
|
success = add_rust_api_menu(soup)
|
|
if not success:
|
|
raise Exception("Failed to add Rust API menu")
|
|
|
|
# Write modified content back to file
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
f.write(str(soup))
|
|
return True
|
|
|
|
|
|
def is_rustdoc_file(path):
|
|
# Convert to Path object and get parts
|
|
parts = Path(path).parts
|
|
|
|
# Check if path has any parts and if first part is "rust"
|
|
return len(parts) > 0 and parts[0] == "rust"
|
|
|
|
|
|
def process_single_file(file_path, docs_path, counters):
|
|
"""
|
|
Process a single HTML file with appropriate modifications.
|
|
|
|
Args:
|
|
file_path (Path): Path to the HTML file
|
|
docs_path (Path): Root documentation directory
|
|
counters (dict): Shared dictionary for counting successes/failures
|
|
"""
|
|
try:
|
|
if not is_rustdoc_file(file_path.relative_to(docs_path)):
|
|
if modify_hotdoc_html_file(file_path):
|
|
with counters['lock']:
|
|
counters['processed'] += 1
|
|
else:
|
|
with counters['lock']:
|
|
counters['failed'] += 1
|
|
return
|
|
|
|
if modify_rustdoc_html_file(file_path, docs_path):
|
|
with counters['lock']:
|
|
counters['processed'] += 1
|
|
else:
|
|
with counters['lock']:
|
|
counters['failed'] += 1
|
|
|
|
if sys.stdout.isatty():
|
|
print(f"\rProcessed: {counters['processed'] + counters['failed']}/{counters['total']} files", end='')
|
|
|
|
except Exception as e:
|
|
print(f"\nError processing {file_path}: {repr(e)}")
|
|
traceback.print_exc()
|
|
with counters['lock']:
|
|
counters['failed'] += 1
|
|
|
|
|
|
def has_ancestor(path: Path, ancestor: Path) -> bool:
|
|
try:
|
|
path.resolve().relative_to(ancestor.resolve())
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
|
|
def process_docs(docs_dir: Path):
|
|
"""
|
|
Recursively processes all HTML files in the Rust documentation directory using parallel processing.
|
|
|
|
Args:
|
|
docs_dir (str): Path to the root of the Rust documentation
|
|
"""
|
|
docs_path = Path(docs_dir).resolve()
|
|
|
|
if not docs_path.exists():
|
|
print(f"Error: Directory {docs_dir} does not exist")
|
|
return
|
|
|
|
sitemap_path = (docs_path / "hotdoc-sitemap.html").resolve()
|
|
assets_dir = (docs_path / 'assets').resolve()
|
|
|
|
def html_file_needs_processing(html_file):
|
|
html_file = html_file.resolve()
|
|
if has_ancestor(html_file, assets_dir):
|
|
return False
|
|
if html_file == sitemap_path:
|
|
return False
|
|
return True
|
|
|
|
# Get list of all HTML files
|
|
html_files = [html_file for html_file in docs_path.rglob('*.html') if
|
|
html_file_needs_processing(html_file)]
|
|
|
|
# Create a manager for sharing counters between processes
|
|
manager = Manager()
|
|
counters = manager.dict({
|
|
'processed': 0,
|
|
'failed': 0,
|
|
'total': len(html_files)
|
|
})
|
|
counters['lock'] = manager.Lock()
|
|
|
|
# Calculate optimal number of processes
|
|
num_processes = min(cpu_count(), len(html_files))
|
|
|
|
print(f"\nStarting parallel processing with {num_processes} processes...")
|
|
|
|
# Create a partial function with fixed arguments
|
|
process_func = partial(process_single_file, docs_path=docs_path, counters=counters)
|
|
|
|
# process_single_file(Path('/Users/thiblahute/fs-devel/gstreamer/tmptestdoc/documentation/rust/stable/0.23/docs/gstreamer/index.html'), docs_path, counters)
|
|
# for html_file in html_files:
|
|
# process_func(html_file)
|
|
# Process files in parallel
|
|
with Pool(processes=num_processes) as pool:
|
|
pool.map(process_func, html_files)
|
|
|
|
print("\n\nProcessing complete:")
|
|
print(f"Successfully processed: {counters['processed']} files")
|
|
print(f"Failed to process: {counters['failed']} files")
|
|
|
|
|
|
def download_rust_docs(doc_dir):
|
|
"""
|
|
Downloads the latest Rust documentation from rust-lang.org and unzips it.
|
|
|
|
Args:
|
|
doc_dir (str): Path to the directory where the documentation will be downloaded
|
|
"""
|
|
print("Looking for rust latest rust documention")
|
|
if not os.path.exists("rustdocs.zip"):
|
|
rustdoc_url = get_documentation_artifact_url("gstreamer/gstreamer-rs",
|
|
"pages")
|
|
|
|
def progress_hook(count, block_size, total_size):
|
|
percent = int(count * block_size * 100 / total_size)
|
|
sys.stdout.write(f"\rDownloading... {percent}%")
|
|
sys.stdout.flush()
|
|
|
|
print(f"Downloading rust documentation from {rustdoc_url}")
|
|
urlretrieve(rustdoc_url, "rustdocs.zip", reporthook=progress_hook)
|
|
|
|
print("Unpacking rust documentation")
|
|
with zipfile.ZipFile('rustdocs.zip', 'r') as zip_ref:
|
|
# Get list of files
|
|
file_list = zip_ref.namelist()
|
|
total_files = len(file_list)
|
|
|
|
# Extract each file with progress
|
|
for i, file in enumerate(file_list, 1):
|
|
if file.endswith('html.gz'):
|
|
continue
|
|
|
|
zip_ref.extract(file, '.')
|
|
if sys.stdout.isatty():
|
|
print(f"\rExtracting: {i}/{total_files} files", end='')
|
|
|
|
shutil.move("public", doc_dir / "rust")
|
|
|
|
|
|
def move_rust_latest(doc_dir):
|
|
# Handle latest symlink
|
|
latest_file = doc_dir / "rust" / "stable" / "latest"
|
|
assert latest_file.exists
|
|
if latest_file.is_dir():
|
|
print(f'{latest_file} is already a directory')
|
|
return
|
|
|
|
# Read the version from the latest file
|
|
with open(latest_file, 'r') as f:
|
|
version = f.read().strip()
|
|
|
|
# Remove the 'latest' file
|
|
latest_file.unlink()
|
|
|
|
# Create symlink from version directory to 'latest'
|
|
version_dir = latest_file.parent / version
|
|
assert version_dir.exists(), f"Version directory '{version}' not found"
|
|
version_dir.rename(latest_file)
|
|
print(f'Moved {version_dir} to {latest_file}')
|
|
|
|
|
|
def add_rust_fixer_to_sitemap(docs_root: Path):
|
|
"""Modifies the hotdoc-sitemap.html file to customize the Rust API references."""
|
|
sitemap_path = docs_root / "hotdoc-sitemap.html"
|
|
if not sitemap_path.exists():
|
|
print(f"Warning: {sitemap_path} not found")
|
|
return
|
|
|
|
# Read the existing sitemap to extract the structure
|
|
with open(sitemap_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
soup = BeautifulSoup(content, 'html.parser')
|
|
|
|
head = soup.find('head')
|
|
assert head, "Failed to find <head> tag"
|
|
for script in head.find_all('script'):
|
|
src = script.get('src', '')
|
|
if 'sitemap-rs-fixer.js' in src:
|
|
print('hotdoc-sitemap.html already contains the script')
|
|
return
|
|
|
|
# The utils script is required byt the fixer to detect
|
|
# the configured language
|
|
head.append(BeautifulSoup('''
|
|
<script src="assets/js/utils.js" />
|
|
<script src="assets/rustdoc/js/sitemap-rs-fixer.js" />
|
|
''', 'html.parser'))
|
|
|
|
with open(sitemap_path, 'w', encoding='utf-8') as f:
|
|
f.write(str(soup))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if len(sys.argv) != 2:
|
|
print("Usage: python modify_rust_docs.py <rust_docs_directory>")
|
|
sys.exit(1)
|
|
|
|
docs_dir = Path(sys.argv[1])
|
|
download_rust_docs(docs_dir)
|
|
move_rust_latest(docs_dir)
|
|
copy_rustdoc_integration_assets(docs_dir)
|
|
process_docs(docs_dir)
|
|
add_rust_fixer_to_sitemap(docs_dir)
|