#!/usr/bin/env nix # vim: set filetype=python: #!nix develop --impure --expr #!nix `` #!nix let flake = builtins.getFlake ("git+file://" + toString ../.); #!nix pkgs = flake.inputs.nixpkgs.legacyPackages.${builtins.currentSystem}; #!nix in pkgs.mkShell { nativeBuildInputs = [ #!nix (pkgs.python3.withPackages (ps: with ps; [ requests ])) #!nix ]; } #!nix `` --command python3 # This script lists out the contributors for a given release. # It must be run from the root of the Nix repository. import os import sys import json import requests github_token = os.environ.get("GITHUB_TOKEN") if not github_token: print("GITHUB_TOKEN is not set. If you hit the rate limit, set it", file=sys.stderr) # Might be ok, as we have a cache. # raise ValueError("GITHUB_TOKEN must be set") # 1. Read the current version in .version version = os.environ.get("VERSION") if not version: version = open(".version").read().strip() print(f"Generating release credits for Nix {version}", file=sys.stderr) # 2. Compute previous version vcomponents = version.split(".") if len(vcomponents) >= 2: prev_version = f"{vcomponents[0]}.{int(vcomponents[1])-1}.0" else: raise ValueError(".version must have at least two components") # For unreleased versions endref = "HEAD" # For older releases # endref = version # 2. Find the merge base between the current version and the previous version mergeBase = os.popen(f"git merge-base {prev_version} {endref}").read().strip() print(f"Merge base between {prev_version} and {endref} is {mergeBase}", file=sys.stderr) # 3. Find the date of the merge base mergeBaseDate = os.popen(f"git show -s --format=%ci {mergeBase}").read().strip()[0:10] print(f"Merge base date is {mergeBaseDate}", file=sys.stderr) # 4. Get the commits between the merge base and the current version def get_commits(): raw = os.popen(f"git log --pretty=format:'%H\t%an\t%ae' {mergeBase}..{endref}").read().strip() lines = raw.split("\n") return [ { "hash": items[0], "author": items[1], "email": items[2] } for line in lines for items in (line.split("\t"),) ] def commits_to_first_commit_by_email(commits): by_email = dict() for commit in commits: email = commit["email"] if email not in by_email: by_email[email] = commit return by_email samples = commits_to_first_commit_by_email(get_commits()) # For quick testing, only pick two samples from the dict # samples = dict(list(samples.items())[:2]) # Query the GitHub API to get handle def get_github_commit(commit): url = f"https://api.github.com/repos/NixOS/nix/commits/{commit['hash']}" headers = {'Authorization': f'token {github_token}'} response = requests.get(url, headers=headers) response.raise_for_status() return response.json() class Cache: def __init__(self, filename, require = True): self.filename = filename try: with open(filename, "r") as f: self.values = json.load(f) except FileNotFoundError: if require: raise self.values = dict() def save(self): with open(self.filename, "w") as f: json.dump(self.values, f, indent=4) print(f"Saved cache to {self.filename}", file=sys.stderr) # The email to handle cache maps email addresses to either # - a handle (string) # - None (if no handle was found) email_to_handle_cache = Cache("maintainers/data/release-credits-email-to-handle.json") handles = set() emails = dict() for sample in samples: s = samples[sample] email = s["email"] if not email in email_to_handle_cache.values: print(f"Querying GitHub API for {s['hash']}, to get handle for {s['email']}") ghc = get_github_commit(samples[sample]) gha = ghc["author"] if gha and gha["login"]: handle = gha["login"] print(f"Handle: {handle}") email_to_handle_cache.values[email] = handle else: print(f"Found no handle for {s['email']}") email_to_handle_cache.values[email] = None handle = email_to_handle_cache.values[email] if handle is not None: handles.add(handle) else: emails[email] = s["author"] # print(email_to_handle_cache.values) email_to_handle_cache.save() handle_to_name_cache = Cache("maintainers/data/release-credits-handle-to-name.json") print(f"Found {len(handles)} handles", file=sys.stderr) for handle in handles: if not handle in handle_to_name_cache.values: print(f"Querying GitHub API for {handle}, to get name", file=sys.stderr) url = f"https://api.github.com/users/{handle}" headers = {'Authorization': f'token {github_token}'} response = requests.get(url, headers=headers) response.raise_for_status() user = response.json() name = user["name"] print(f"Name: {name}", file=sys.stderr) handle_to_name_cache.values[handle] = name handle_to_name_cache.save() entries = list() def is_bot(handle): return handle.endswith("[bot]") for handle in handles: name = handle_to_name_cache.values[handle] if name is None: # This way it looks more regular name = handle # Note to bots: I'm only doing this because it would distract humans, and # therefore delay your creation. I'm sorry; it's for your benefit. if not is_bot(handle): entries += [ f"- {name} [**(@{handle})**](https://github.com/{handle})" ] def shuffle(entries): salt = os.urandom(16) return sorted(entries, key=lambda x: hash((x, salt))) # Fair ordering is undecidable entries = shuffle(entries) # For a sanity check, we could sort the entries by handle instead. # entries = sorted(entries) print("") print(f"This release was made possible by the following {len(entries)} contributors:") print("") for entry in entries: print(entry) for email in emails: print(f"- {emails[email]}")