From ab259d69b9ad9a272a750e91f374553b88dd65e8 Mon Sep 17 00:00:00 2001 From: cdboer Date: Sun, 29 Jan 2023 18:15:31 +0100 Subject: [PATCH] Add subpackage for github fetching --- gitlab2prov/adapters/hub/__init__.py | 1 + gitlab2prov/adapters/hub/fetcher.py | 159 ++++++++++++++++++++++ gitlab2prov/adapters/hub/parser.py | 191 +++++++++++++++++++++++++++ 3 files changed, 351 insertions(+) create mode 100644 gitlab2prov/adapters/hub/__init__.py create mode 100644 gitlab2prov/adapters/hub/fetcher.py create mode 100644 gitlab2prov/adapters/hub/parser.py diff --git a/gitlab2prov/adapters/hub/__init__.py b/gitlab2prov/adapters/hub/__init__.py new file mode 100644 index 0000000..f777384 --- /dev/null +++ b/gitlab2prov/adapters/hub/__init__.py @@ -0,0 +1 @@ +from gitlab2prov.adapters.hub.fetcher import GithubFetcher \ No newline at end of file diff --git a/gitlab2prov/adapters/hub/fetcher.py b/gitlab2prov/adapters/hub/fetcher.py new file mode 100644 index 0000000..8e5f955 --- /dev/null +++ b/gitlab2prov/adapters/hub/fetcher.py @@ -0,0 +1,159 @@ +import logging +import itertools +from typing import Iterator +from dataclasses import dataclass, field, InitVar + +from github import Github +from github.Repository import Repository + +from gitlab2prov.adapters.project_url import GithubProjectUrl +from gitlab2prov.adapters.hub.parser import GithubAnnotationParser +from gitlab2prov.domain.constants import ProvRole +from gitlab2prov.domain.objects import ( + Asset, + User, + Commit, + Issue, + MergeRequest, + GitTag, + Release, +) + + +log = logging.getLogger(__name__) + + +@dataclass +class GithubFetcher: + token: InitVar[str] + url: InitVar[str] + + parser: GithubAnnotationParser = GithubAnnotationParser() + client: Github = field(init=False) + repository: Repository = field(init=False) + + def __post_init__(self, token, url) -> None: + self.client = Github(login_or_token=token, per_page=100) + self.repository = self.client.get_repo(full_name_or_id=GithubProjectUrl(url).slug) + log.warning(f"Remaining requests: {self.client.rate_limiting[0]}") + + def fetch_all(self) -> Iterator[Commit | Issue | MergeRequest | Release | GitTag]: + yield from itertools.chain( + self.fetch_commits(), + self.fetch_issues(), + self.fetch_mergerequests(), + self.fetch_releases(), + self.fetch_tags(), + ) + + def fetch_commits(self) -> Iterator[Commit]: + for commit in self.repository.get_commits(): + raw_annotations = [ + *commit.get_statuses(), + *commit.get_comments(), + *(comment.get_reactions() for comment in commit.get_comments()), + ] + yield Commit( + sha=commit.sha, + url=commit.url, + author=User( + commit.commit.author.name, + commit.commit.author.email, + prov_role=ProvRole.COMMIT_AUTHOR, + ), + platform="github", + annotations=self.parser.parse(raw_annotations), + authored_at=commit.commit.author.date, + committed_at=commit.commit.committer.date, + ) + + def fetch_issues(self) -> Iterator[Issue]: + for issue in self.repository.get_issues(state="all"): + raw_annotations = [ + *issue.get_comments(), + *issue.get_reactions(), + *(comment.get_reactions() for comment in issue.get_comments()), + *issue.get_events(), + *issue.get_timeline(), + ] + yield Issue( + id=issue.number, + iid=issue.id, + platform="github", + title=issue.title, + body=issue.body, + url=issue.url, + author=User(issue.user.name, issue.user.email, prov_role=ProvRole.ISSUE_AUTHOR), + annotations=self.parser.parse(raw_annotations), + created_at=issue.created_at, + closed_at=issue.closed_at, + ) + + def fetch_mergerequests(self) -> Iterator[MergeRequest]: + for pull in self.repository.get_pulls(state="all"): + raw_annotations = [] + raw_annotations.extend(pull.get_comments()) + raw_annotations.extend(comment.get_reactions() for comment in pull.get_comments()) + raw_annotations.extend(pull.get_review_comments()) + raw_annotations.extend( + comment.get_reactions() for comment in pull.get_review_comments() + ) + raw_annotations.extend(pull.get_reviews()) + raw_annotations.extend(pull.as_issue().get_reactions()) + raw_annotations.extend(pull.as_issue().get_events()) + raw_annotations.extend(pull.as_issue().get_timeline()) + + yield MergeRequest( + id=pull.number, + iid=pull.id, + title=pull.title, + body=pull.body, + url=pull.url, + platform="github", + source_branch=pull.base.ref, + target_branch=pull.head.ref, + author=User( + name=pull.user.name, + email=pull.user.email, + prov_role=ProvRole.MERGE_REQUEST_AUTHOR, + ), + annotations=self.parser.parse(raw_annotations), + created_at=pull.created_at, + closed_at=pull.closed_at, + merged_at=pull.merged_at, + ) + + def fetch_releases(self) -> Iterator[Release]: + for release in self.repository.get_releases(): + yield Release( + name=release.title, + body=release.body, + tag_name=release.tag_name, + platform="github", + author=User( + name=release.author.name, + email=release.author.email, + prov_role=ProvRole.RELEASE_AUTHOR, + ), + assets=[ + Asset(url=asset.url, format=asset.content_type) + for asset in release.get_assets() + ], + evidences=[], + created_at=release.created_at, + released_at=release.published_at, + ) + + def fetch_tags(self) -> Iterator[GitTag]: + for tag in self.repository.get_tags(): + yield GitTag( + name=tag.name, + sha=tag.commit.sha, + message=tag.commit.commit.message, + author=User( + name=tag.commit.author.name, + email=tag.commit.author.email, + prov_role=ProvRole.TAG_AUTHOR, + ), + created_at=tag.commit.commit.author.date, + ) diff --git a/gitlab2prov/adapters/hub/parser.py b/gitlab2prov/adapters/hub/parser.py new file mode 100644 index 0000000..493256a --- /dev/null +++ b/gitlab2prov/adapters/hub/parser.py @@ -0,0 +1,191 @@ +import logging +from dataclasses import dataclass +from typing import TypeVar, Callable + +from github.CommitComment import CommitComment +from github.CommitStatus import CommitStatus +from github.Reaction import Reaction +from github.IssueComment import IssueComment +from github.IssueEvent import IssueEvent +from github.TimelineEvent import TimelineEvent +from github.PullRequestComment import PullRequestComment +from github.PullRequestReview import PullRequestReview + +from gitlab2prov.domain.objects import Annotation, User +from gitlab2prov.domain.constants import ProvRole + +A = TypeVar("A") + +log = logging.getLogger(__name__) + + +@dataclass +class GithubAnnotationParser: + @staticmethod + def sort_by_date(annotations: list[Annotation]) -> list[Annotation]: + return list(sorted(annotations, key=lambda a: a.start)) + + def choose_parser(self, raw_annotation: A) -> Callable[[A], Annotation]: + match raw_annotation: + case CommitComment(): + return self.parse_commit_comment + case CommitStatus(): + return self.parse_commit_status + case Reaction(): + return self.parse_reaction + case IssueComment(): + return self.parse_issue_comment + case IssueEvent(): + return self.parse_issue_event + case TimelineEvent(): + return self.parse_timeline_event + case PullRequestReview(): + return self.parse_pull_request_review + case PullRequestComment(): + return self.parse_pull_request_comment + case _: + log.warning(f"no parser found for {raw_annotation=}") + + def parse(self, annotations: list[A]) -> list[Annotation]: + parsed_annotations = [] + for annotation in annotations: + if parser := self.choose_parser(annotation): + parsed_annotations.append(parser(annotation)) + return self.sort_by_date(parsed_annotations) + + def parse_commit_comment(self, comment: CommitComment) -> Annotation: + annotator = User( + name=comment.user.name, + email=comment.user.email, + github_username=comment.user.login, + github_id=comment.user.id, + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=comment.id, + name="add_comment", + body=comment.body, + start=comment.created_at, + end=comment.created_at, + annotator=annotator, + ) + + def parse_commit_status(self, status: CommitStatus) -> Annotation: + annotator = User( + name=status.creator.name, + email=status.creator.email, + github_username=status.creator.login, + github_id=status.creator.id, + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=status.id, + name="add_commit_status", + body=status.description, + start=status.created_at, + end=status.created_at, + annotator=annotator, + ) + + def parse_reaction(self, reaction: Reaction) -> Annotation: + annotator = User( + name=reaction.user.name, + email=reaction.user.email, + github_username=reaction.user.login, + github_id=reaction.user.id, + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=reaction.id, + name="add_award", + body=reaction.content, + start=reaction.created_at, + end=reaction.created_at, + annotator=annotator, + ) + + + def parse_issue_comment(self, comment: IssueComment) -> Annotation: + annotator = User( + name=comment.user.name, + email=comment.user.email, + github_username=comment.user.login, + github_id=comment.user.id, + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=comment.id, + name="add_comment", + body=comment.body, + start=comment.created_at, + end=comment.created_at, + annotator=annotator, + ) + + def parse_issue_event(self, event: IssueEvent) -> Annotation: + annotator = User( + name=event.actor.name, + email=event.actor.email, + github_username=event.actor.login, + github_id=event.actor.id, + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=event.id, + name=event.event, + body=event.event, + start=event.created_at, + end=event.created_at, + annotator=annotator, + ) + + def parse_timeline_event(self, event: TimelineEvent) -> Annotation: + annotator = User( + name=event.actor.name, + email=event.actor.email, + github_username=event.actor.login, + github_id=event.actor.id, + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=event.id, + name=event.event, + body=event.event, + start=event.created_at, + end=event.created_at, + annotator=annotator, + ) + + def parse_pull_request_review(self, review: PullRequestReview) -> Annotation: + annotator = User( + name=review.user.name, + email=review.user.email, + github_username=review.user.login, + github_id=review.user.id, + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=review.id, + name="add_review", + body=review.body, + start=review.submitted_at, + end=review.submitted_at, + annotator=annotator, + ) + + def parse_pull_request_comment(self, comment: PullRequestComment) -> Annotation: + annotator = User( + name=comment.user.name, + email=comment.user.email, + github_username=comment.user.login, + github_id=comment.user.id, + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=comment.id, + name="add_comment", + body=comment.body, + start=comment.created_at, + end=comment.created_at, + annotator=annotator, + )