🎸

GitHub のリポジトリを丸ごとコピーする

2024/10/02に公開

背景

GitHub TeamsにおいてTeam ATeam Bに所属しており、Team AのリポジトリをTeam Bのリポジトリへコピーしたいという状況になった。リポジトリを複製するではIsuue等を引き継ぐことはできないという問題が発生し、リポジトリを移譲するではIsuue等を引き継ぐことはできるが元のリポジトリの権限が消えてしまうという問題がある。

方針

以下の方針でリポジトリをコピーする

  1. リポジトリを複製するに従ってブランチ等をコピーする。
  2. 1.でコピーできない要素についてはPyGithubを用いてスクリプトを書いてコピーする。

リポジトリのコピー

リポジトリを複製するを参考にコピーする。

git clone --bare https://github.com/EXAMPLE-USER/OLD-REPOSITORY.git
cd OLD-REPOSITORY.git
git push --mirror https://github.com/EXAMPLE-USER/NEW-REPOSITORY.git

PyGitHubを用いたコピー

Tokenの発行

個人用アクセス トークンを管理するを参考に以下の権限を付与したfine-grained personal access tokenを発行する。ここでAccount permissionsは必要ない。

Repository permissions Access
Contents Read and write
Issues Read and write
Metadata Read-only
Pull requests Read and write

環境構築

以下を実行して環境構築する。

python -m venv venv
source venv/bin/activate
pip install PyGithub requests packaging

Labelのコピー

以下を実行してLabelをコピーする。本コードはコピー先のリポジトリのLabelを削除して、コピー元のリポジトリのLabelをコピーする。

python migrate_labels.py --source EXAMPLE-USER/OLD-REPOSITORY --target EXAMPLE-USER/NEW-REPOSITORY --token $TOKEN
ソースコード
migrate_labels.py
import argparse
from typing import List

from github import Github
from github.GithubException import GithubException
from github.Label import Label as GithubLabel
from github.Repository import Repository


def main() -> None:
    # Set up argument parser
    parser = argparse.ArgumentParser(description="GitHub Labels Migrator")
    parser.add_argument(
        "--source", required=True, help="Source repository (e.g., owner/source-repo)"
    )
    parser.add_argument(
        "--target", required=True, help="Target repository (e.g., owner/target-repo)"
    )
    parser.add_argument("--token", required=True, help="GitHub personal access token")
    args = parser.parse_args()

    # Get repository names and token
    source_repo_name: str = args.source.strip()
    target_repo_name: str = args.target.strip()
    token: str = args.token.strip()

    # Authenticate with GitHub
    g: Github = Github(token)

    try:
        source_repo: Repository = g.get_repo(source_repo_name)
        target_repo: Repository = g.get_repo(target_repo_name)
    except GithubException as e:
        print(f"Failed to retrieve repositories: {e}")
        return

    # Migrate labels
    print("Migrating labels...")
    migrate_labels(source_repo, target_repo)

    print("Label migration completed.")


def migrate_labels(source_repo: Repository, target_repo: Repository) -> None:
    try:
        # Delete existing labels in the target repository
        target_labels: List[GithubLabel] = list(target_repo.get_labels())
        for label in target_labels:
            label.delete()
            print(f"Deleted label '{label.name}' from target repository.")

        # Get labels from the source repository
        source_labels: List[GithubLabel] = list(source_repo.get_labels())

        # Create labels in the target repository
        for label in source_labels:
            target_repo.create_label(
                name=label.name, color=label.color, description=label.description or ""
            )
            print(f"Created label '{label.name}' in target repository.")
    except GithubException as e:
        print(f"Error occurred while migrating labels: {e}")


if __name__ == "__main__":
    main()

Issueのコピー

以下を実行してIssueをコピーする。本コードはコピー元のリポジトリのLabelをコピーするが、#番号のような自動で割り当てられる番号は一致しないことに注意する。

python migrate_issues.py --source EXAMPLE-USER/OLD-REPOSITORY --target EXAMPLE-USER/NEW-REPOSITORY --token $TOKEN
ソースコード
migrate_issues.py
import argparse
from typing import List

from github import Github
from github.GithubException import GithubException
from github.Issue import Issue as GithubIssue
from github.Label import Label as GithubLabel
from github.Repository import Repository


def main() -> None:
    # Set up argument parser
    parser = argparse.ArgumentParser(description="GitHub Issues Migrator")
    parser.add_argument(
        "--source", required=True, help="Source repository (e.g., owner/source-repo)"
    )
    parser.add_argument(
        "--target", required=True, help="Target repository (e.g., owner/target-repo)"
    )
    parser.add_argument("--token", required=True, help="GitHub personal access token")
    args = parser.parse_args()

    # Get repository names and token
    source_repo_name: str = args.source.strip()
    target_repo_name: str = args.target.strip()
    token: str = args.token.strip()

    # Authenticate with GitHub
    g: Github = Github(token)

    try:
        source_repo: Repository = g.get_repo(source_repo_name)
        target_repo: Repository = g.get_repo(target_repo_name)
    except GithubException as e:
        print(f"Failed to retrieve repositories: {e}")
        return

    # Migrate issues
    print("Migrating issues...")
    migrate_issues(source_repo, target_repo)

    print("Issue migration completed.")


def migrate_issues(source_repo: Repository, target_repo: Repository) -> None:
    try:
        # Retrieve all issues from the source repository
        issues = source_repo.get_issues(state="all")
        issues_list = list(issues)

        # Sort issues by their original number in ascending order
        sorted_issues: List[GithubIssue] = sorted(
            issues_list, key=lambda issue: issue.number
        )

        for issue in sorted_issues:
            # Skip pull requests
            if issue.pull_request is not None:
                continue

            # Get labels
            label_names: List[str] = [label.name for label in issue.labels]

            # Get label objects from target repo
            labels: List[GithubLabel] = []
            for name in label_names:
                try:
                    label: GithubLabel = target_repo.get_label(name)
                    labels.append(label)
                except GithubException:
                    print(f"Label '{name}' does not exist in target repository.")
                    pass  # Ignore if label does not exist

            # Create issue in target repo
            new_issue: GithubIssue = target_repo.create_issue(
                title=issue.title, body=issue.body or "", labels=labels
            )
            print(
                f"Created issue #{new_issue.number} from source issue #{issue.number}."
            )

            # Migrate comments
            comments = issue.get_comments()
            for comment in comments:
                new_issue.create_comment(body=comment.body or "")

            # Update issue state
            if issue.state == "closed":
                new_issue.edit(state="closed")

    except GithubException as e:
        print(f"Error occurred while migrating issues: {e}")


if __name__ == "__main__":
    main()

Releaseのコピー

以下を実行してReleaseをコピーする。本コードはコピー先のリポジトリのReleaseのをコピー元のリポジトリのReleaseで上書きする。ここでWhat's Changedなどで参照されるPull requerstなどは元のリポジトリを参照する形になることに注意する。

python migrate_releases.py --source EXAMPLE-USER/OLD-REPOSITORY --target EXAMPLE-USER/NEW-REPOSITORY --token $TOKEN
ソースコード
migrate_releases.py
import argparse
import re
from typing import List

import requests
from github import Github
from github.GithubException import GithubException, UnknownObjectException
from github.GitRelease import GitRelease
from github.Repository import Repository
from packaging import version


def main() -> None:
    # Set up argument parser
    parser = argparse.ArgumentParser(
        description="GitHub Releases Migrator with Semantic Version Sorting"
    )
    parser.add_argument(
        "--source", required=True, help="Source repository (e.g., owner/source-repo)"
    )
    parser.add_argument(
        "--target", required=True, help="Target repository (e.g., owner/target-repo)"
    )
    parser.add_argument("--token", required=True, help="GitHub personal access token")
    args = parser.parse_args()

    # Retrieve repository names and token
    source_repo_name: str = args.source.strip()
    target_repo_name: str = args.target.strip()
    token: str = args.token.strip()

    # Authenticate with GitHub
    g: Github = Github(token)

    try:
        source_repo: Repository = g.get_repo(source_repo_name)
        target_repo: Repository = g.get_repo(target_repo_name)
    except GithubException as e:
        print(f"Failed to retrieve repositories: {e}")
        return

    # Migrate releases
    print("Migrating releases...")
    migrate_releases(source_repo, target_repo, token)

    print("Release migration completed.")


def migrate_releases(
    source_repo: Repository, target_repo: Repository, token: str
) -> None:
    try:
        # Retrieve releases from the source repository
        source_releases: List[GitRelease] = list(source_repo.get_releases())

        # Sort releases using Semantic Versioning
        sorted_releases: List[GitRelease] = sorted(
            source_releases,
            key=lambda r: parse_semver(r.tag_name),
            reverse=False,  # Set to True for descending order
        )

        for release in sorted_releases:
            tag_name = release.tag_name
            print(f"\nProcessing release '{tag_name}'...")

            # Attempt to retrieve the release with the same tag in the target repository
            try:
                target_release = target_repo.get_release(tag_name)
                print(
                    f"Existing release '{tag_name}' found in target repository. Deleting..."
                )

                # Delete all assets in the existing target release
                target_assets = target_release.get_assets()
                for asset in target_assets:
                    try:
                        asset.delete_asset()
                        print(
                            f"Deleted asset '{asset.name}' from release '{tag_name}'."
                        )
                    except GithubException as e:
                        print(f"Failed to delete asset '{asset.name}': {e}")

                # Delete the existing release
                target_release.delete_release()
                print(f"Deleted existing release '{tag_name}'.")
            except UnknownObjectException:
                print(
                    f"No existing release with tag '{tag_name}' found in target repository."
                )
            except GithubException as e:
                print(
                    f"Failed to retrieve or delete existing release '{tag_name}': {e}"
                )
                continue  # Skip to the next release

            # Create a new release in the target repository
            try:
                new_release: GitRelease = target_repo.create_git_release(
                    tag=tag_name,
                    name=release.title or release.name,
                    message=release.body or "",
                    draft=release.draft,
                    prerelease=release.prerelease,
                    target_commitish=release.target_commitish,
                )
                print(f"Created new release '{tag_name}' in target repository.")
            except GithubException as e:
                print(f"Failed to create release '{tag_name}': {e}")
                continue  # Skip to the next release

            # Upload assets from the source release to the new target release
            source_assets = release.get_assets()
            for asset in source_assets:
                try:
                    print(f"Downloading asset '{asset.name}' from source release...")
                    # Download the asset content with authentication
                    headers = {
                        "Accept": "application/octet-stream",
                        "Authorization": f"Bearer {token}",
                        "X-GitHub-Api-Version": "2022-11-28",
                    }
                    response = requests.get(
                        url=f"https://api.github.com/repos/{source_repo.full_name}/releases/assets/{asset.id}",
                        headers=headers,
                        stream=True,
                        allow_redirects=True,
                    )
                    response.raise_for_status()

                    print(f"Uploading asset '{asset.name}' to target release...")
                    # Upload the asset to the target release
                    new_release.upload_asset_from_memory(
                        file_like=response.content,
                        file_size=int(response.headers["content-length"]),
                        name=asset.name,
                        label=asset.label or "",
                        content_type=asset.content_type or "application/octet-stream",
                    )
                    print(f"Successfully uploaded asset '{asset.name}'.")
                except requests.RequestException as e:
                    print(f"Failed to download asset '{asset.name}': {e}")
                except GithubException as e:
                    print(f"Failed to upload asset '{asset.name}': {e}")

    except GithubException as e:
        print(f"An error occurred while migrating releases: {e}")


def parse_semver(tag: str) -> version.Version:
    # Remove common prefix like 'v'
    semver_pattern = r"v?(\d+\.\d+\.\d+)"
    match = re.match(semver_pattern, tag)
    if match:
        try:
            return version.parse(match.group(1))
        except version.InvalidVersion:
            pass
    # Return a default low version if parsing fails
    return version.parse("0.0.0")


if __name__ == "__main__":
    main()

Discussion