import logging
from functools import partial, wraps
import petl
import requests
from github import Github as PyGithub
from github.GithubException import UnknownObjectException
from parsons.etl.table import Table
from parsons.utilities import check_env, files
logger = logging.getLogger(__name__)
def _wrap_method(decorator, method):
@wraps(method)
def _wrapper(self, *args, **kwargs):
bound_method = partial(method.__get__(self, type(self)))
return decorator(bound_method)(*args, **kwargs)
return _wrapper
def decorate_methods(decorator):
# Based on Django's django.utils.decorators.method_decorator
def decorate(cls):
for method in dir(cls):
# Don't decorate dunder methods
if method.startswith("__"):
continue
cls_method = getattr(cls, method)
if callable(cls_method):
setattr(cls, method, _wrap_method(decorator, cls_method))
return cls
return decorate
def wrap_github_404(func):
@wraps(func)
def _wrapped_func(*args, **kwargs):
try:
return (func)(*args, **kwargs)
except UnknownObjectException:
raise ParsonsGitHubError(
"Couldn't find the object you referenced, maybe you need to log in?"
)
return _wrapped_func
class ParsonsGitHubError(Exception):
pass
[docs]
@decorate_methods(wrap_github_404)
class GitHub(object):
"""Creates a GitHub class for accessing the GitHub API.
Uses ``parsons.utilities.check_env`` to load credentials from environment variables if not
supplied. Supports either a username and password or an access token for authentication. The
client also supports unauthenticated access.
Args:
username: Optional[str]
Username of account to use for credentials. Can be set with ``GITHUB_USERNAME``
environment variable.
password: Optional[str]
Password of account to use for credentials. Can be set with ``GITHUB_PASSWORD``
environment variable.
access_token: Optional[str]
Access token to use for credentials. Can be set with ``GITHUB_ACCESS_TOKEN`` environment
variable.
"""
def __init__(self, username=None, password=None, access_token=None):
self.username = check_env.check("GITHUB_USERNAME", username, optional=True)
self.password = check_env.check("GITHUB_PASSWORD", password, optional=True)
self.access_token = check_env.check("GITHUB_ACCESS_TOKEN", access_token, optional=True)
if self.username and self.password:
self.client = PyGithub(self.username, self.password)
elif self.access_token:
self.client = PyGithub(self.access_token)
else:
self.client = PyGithub()
def _as_table(self, paginated_list, page=None, page_size=100):
"""Converts a paginated list into a Parsons ``Table``. Uses the ``_rawData`` property of
each item instead of calling ``raw_data`` to avoid making a separate request for each item
in a page for types that PyGithub doesn't consider complete.
Args:
paginated_list: ``pygithub.PaginatedList.PaginatedList``
PyGithub paginated list
page: Optional[int]
Page number to load. Defaults to None. If not specified, all results are returned.
page_size: int
Page size. Defaults to 100. Ignored if ``page`` is not set.
Returns:
``Table``
Table object created from the raw data of the list
"""
if page is not None:
page_start = (page - 1) * page_size
page_end = page_start + page_size
list_pages = paginated_list[page_start:page_end]
else:
list_pages = paginated_list
return Table([list_item._rawData for list_item in list_pages])
[docs]
def get_user(self, username):
"""Loads a GitHub user by username
Args:
username: str
Username of user to load
Returns:
dict
User information
"""
return self.client.get_user(username).raw_data
[docs]
def get_organization(self, organization_name):
"""Loads a GitHub organization by name
Args:
organization_name: str
Name of organization to load
Returns:
dict
Organization information
"""
return self.client.get_organization(organization_name).raw_data
[docs]
def get_repo(self, repo_name):
"""Loads a GitHub repo by name
Args:
repo_name: str
Full repo name (account/name)
Returns:
dict
Repo information
"""
return self.client.get_repo(repo_name).raw_data
[docs]
def list_user_repos(self, username, page=None, page_size=100):
"""List user repos with pagination, returning a ``Table``
Args:
username: str
GitHub username
page: Optional[int]
Page number. All results are returned if not set.
page_size: int
Page size. Defaults to 100.
Returns:
``Table``
Table with page of user repos
"""
logger.info(f"Listing page {page} of repos for user {username}")
return self._as_table(
self.client.get_user(username).get_repos(), page=page, page_size=page_size
)
[docs]
def list_organization_repos(self, organization_name, page=None, page_size=100):
"""List organization repos with pagination, returning a ``Table``
Args:
organization_name: str
GitHub organization name
page: Optional[int]
Page number. All results are returned if not set.
page_size: int
Page size. Defaults to 100.
Returns:
``Table``
Table with page of organization repos
"""
logger.info(f"Listing page {page} of repos for organization {organization_name}")
return self._as_table(
self.client.get_organization(organization_name).get_repos(),
page=page,
page_size=page_size,
)
[docs]
def get_issue(self, repo_name, issue_number):
"""Loads a GitHub issue
Args:
repo_name: str
Full repo name (account/name)
issue_number: int
Number of issue to load
Returns:
dict
Issue information
"""
return self.client.get_repo(repo_name).get_issue(number=issue_number).raw_data
[docs]
def list_repo_issues(
self,
repo_name,
state="open",
assignee=None,
creator=None,
mentioned=None,
labels=[],
sort="created",
direction="desc",
since=None,
page=None,
page_size=100,
):
"""List issues for a given repo
Args:
repo_name: str
Full repo name (account/name)
state: str
State of issues to return. One of "open", "closed", "all". Defaults to "open".
assignee: Optional[str]
Name of assigned user, "none", or "*".
creator: Optional[str]
Name of user that created the issue.
mentioned: Optional[str]
Name of user mentioned in the issue.
labels: list[str]
List of label names. Defaults to []
sort: str
What to sort results by. One of "created", "updated", "comments". Defaults to
"created".
direction: str
Direction to sort. One of "asc", "desc". Defaults to "desc".
since: Optional[Union[datetime.datetime, datetime.date]]
Timestamp to pull issues since. Defaults to None.
page: Optional[int]
Page number. All results are returned if not set.
page_size: int
Page size. Defaults to 100.
Returns:
``Table``
Table with page of repo issues
"""
logger.info(f"Listing page {page} of issues for repo {repo_name}")
kwargs_dict = {"state": state, "sort": sort, "direction": direction}
if assignee:
kwargs_dict["assignee"] = assignee
if creator:
kwargs_dict["creator"] = creator
if mentioned:
kwargs_dict["mentioned"] = mentioned
if len(labels) > 0:
kwargs_dict["labels"] = ",".join(labels)
if since:
kwargs_dict["since"] = f"{since.isoformat()[:19]}Z"
return self._as_table(
self.client.get_repo(repo_name).get_issues(**kwargs_dict),
page=page,
page_size=page_size,
)
[docs]
def get_pull_request(self, repo_name, pull_request_number):
"""Loads a GitHub pull request
Args:
repo_name: str
Full repo name (account/name)
pull_request_number: int
Pull request number
Returns:
dict
Pull request information
"""
return self.client.get_repo(repo_name).get_pull(pull_request_number).raw_data
[docs]
def list_repo_pull_requests(
self,
repo_name,
state="open",
base=None,
sort="created",
direction="desc",
page=None,
page_size=100,
):
"""Lists pull requests for a given repo
Args:
repo_name: str
Full repo name (account/name)
state: str
One of "open, "closed", "all". Defaults to "open".
base: Optional[str]
Base branch to filter pull requests by.
sort: str
How to sort pull requests. One of "created", "updated", "popularity". Defaults to
"created".
direction: str
Direction to sort by. Defaults to "desc".
page: Optional[int]
Page number. All results are returned if not set.
page_size: int
Page size. Defaults to 100.
Returns:
``Table``
Table with page of repo pull requests
"""
logger.info(f"Listing page {page} of pull requests for repo {repo_name}")
kwargs_dict = {"state": state, "sort": sort, "direction": direction}
if base:
kwargs_dict["base"] = base
self._as_table(
self.client.get_repo(repo_name).get_pulls(**kwargs_dict),
page=page,
page_size=page_size,
)
[docs]
def list_repo_contributors(self, repo_name, page=None, page_size=100):
"""Lists contributors for a given repo
Args:
repo_name: str
Full repo name (account/name)
page: Optional[int]
Page number. All results are returned if not set.
page_size: int
Page size. Defaults to 100.
Returns:
``Table``
Table with page of repo contributors
"""
logger.info(f"Listing page {page} of contributors for repo {repo_name}")
return self._as_table(
self.client.get_repo(repo_name).get_contributors(),
page=page,
page_size=page_size,
)
[docs]
def download_file(self, repo_name, path, branch=None, local_path=None):
"""Download a file from a repo by path and branch. Defaults to the repo's default branch if
branch is not supplied.
Uses the download_url directly rather than the API because the API only supports contents up
to 1MB from a repo directly, and the process for downloading larger files through the API is
much more involved.
Because download_url does not go through the API, it does not support username / password
authentication, and requires a token to authenticate.
Args:
repo_name: str
Full repo name (account/name)
path: str
Path from the repo base directory
branch: Optional[str]
Branch to download file from. Defaults to repo default branch
local_path: Optional[str]
Local file path to download file to. Will create a temp file if not supplied.
Returns:
str
File path of downloaded file
"""
if not local_path:
local_path = files.create_temp_file_for_path(path)
repo = self.client.get_repo(repo_name)
if branch is None:
branch = repo.default_branch
logger.info(f"Downloading {path} from {repo_name}, branch {branch} to {local_path}")
headers = None
if self.access_token:
headers = {
"Authorization": f"token {self.access_token}",
}
res = requests.get(
f"https://raw.githubusercontent.com/{repo_name}/{branch}/{path}",
headers=headers,
)
if res.status_code == 404:
raise UnknownObjectException(status=404, data=res.content)
elif res.status_code != 200:
raise ParsonsGitHubError(
f"Error downloading {path} from repo {repo_name}: {res.content}"
)
with open(local_path, "wb") as f:
f.write(res.content)
logger.info(f"Downloaded {path} to {local_path}")
return local_path
[docs]
def download_table(self, repo_name, path, branch=None, local_path=None, delimiter=","):
"""Download a CSV file from a repo by path and branch as a Parsons Table.
Args:
repo_name: str
Full repo name (account/name)
path: str
Path from the repo base directory
branch: Optional[str]
Branch to download file from. Defaults to repo default branch
local_path: Optional[str]
Local file path to download file to. Will create a temp file if not supplied.
delimiter: Optional[str]
The CSV delimiter to use to parse the data. Defaults to ','
Returns:
Parsons Table
See :ref:`parsons-table` for output options.
"""
downloaded_file = self.download_file(repo_name, path, branch, local_path)
return Table(petl.fromcsv(downloaded_file, delimiter=delimiter))