Source code for scopesim.server.github_utils

# -*- coding: utf-8 -*-
"""
Used only by the `database` submodule.

Original comment for these functions:
    2022-04-10 (KL)
    Code taken directly from https://github.com/sdushantha/gitdir
    Adapted for ScopeSim usage.
    Many thanks to the authors!

"""

import re
from pathlib import Path
from typing import Union

from .download_utils import handle_download, send_get, create_client
from ..utils import get_logger


logger = get_logger(__name__)


[docs]def create_github_url(url: str) -> None: """ From the given url, produce a URL compatible with Github's REST API. Can handle blob or tree paths. """ repo_only_url = re.compile(r"https:\/\/github\.com\/[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}\/[a-zA-Z0-9]+$") re_branch = re.compile("/(tree|blob)/(.+?)/") # Check if the given url is a url to a GitHub repo. If it is, tell the # user to use 'git clone' to download it if re.match(repo_only_url, url): message = ("✘ The given url is a complete repository. Use 'git clone'" " to download the repository") logger.error(message) raise ValueError(message) # extract the branch name from the given url (e.g master) branch = re_branch.search(url) download_dirs = url[branch.end():] api_url = (url[:branch.start()].replace("github.com", "api.github.com/repos", 1) + f"/contents/{download_dirs}?ref={branch.group(2)}") return api_url, download_dirs
[docs]def download_github_folder(repo_url: str, output_dir: Union[Path, str] = "./") -> None: """ Download the files and directories in repo_url. Re-written based on the on the download function `here <https://github.com/sdushantha/gitdir/blob/f47ce9d85ee29f8612ce5ae804560a12b803ddf3/gitdir/gitdir.py#L55>`_ """ output_dir = Path(output_dir) # convert repo_url into an api_url api_url, download_dirs = create_github_url(repo_url) # get the contents of the github folder with create_client("", cached=False) as client: data = send_get(client, api_url).json() # Make the base directories for this GitHub folder (output_dir / download_dirs).mkdir(parents=True, exist_ok=True) for entry in data: # if the entry is a further folder, walk through it if entry["type"] == "dir": download_github_folder(repo_url=entry["html_url"], output_dir=output_dir) # if the entry is a file, download it elif entry["type"] == "file": # download the file save_path = output_dir / entry["path"] handle_download(client, entry["download_url"], save_path, entry["path"], padlen=0, disable_bar=True) logger.info("Downloaded: %s", entry["path"])