Skip to content

Open Source

2024 Open Source Report

This is my first yearly report on Open Source! 🎉

I dedicate a lot of my free time doing Open Source work, and I would like to share with you some numbers. I hope you find them interesting!

Project Downloads/month Time spent Releases Closed issues Merged PRs Closed unmerged PRs Answered discussions
Starlette 57 million 70 hrs 29 mins 29 76 182 64 93
Uvicorn 49 million 48 hrs 3 mins 20 61 100 65 38
Python Multipart 25 million 17 hrs 29 mins 13 37 87 12 0
Total 131 million 136 hrs 1 min 62 174 369 141 131

Most of the time dedicated in maintaining open source projects is actually not spent coding, as most of people think. It's mainly on interacting with people: answering questions, reviewing pull requests, and investigating issues.

Sponsors

I would like to thank all the sponsors that supported me in 2024! ❤

Data Analysis

I got this data from a script I created that queries the GitHub API and WakaTime API.

Click here to see the script...

Most of the script was created with the help of Claude AI, but I had to tweak it a bit to get the data I wanted.

If you want to use it, make sure you have the following environment variables set:

  • WAKATIME_API_KEY: Your WakaTime API key.
  • GH_TOKEN: Your GitHub token.
import os
import httpx
from datetime import datetime, timedelta
from wakatime_client import WakatimeClient


def main():
    client = WakatimeClient(api_key=os.getenv("WAKATIME_API_KEY"))
    for project in client.stats(range="last_year")["data"]["projects"]:
        if project["name"] in ("starlette", "uvicorn", "python-multipart"):
            print(f'{project["name"]}: {project["text"]}')
    print()

    print(f"starlette releases: {count_releases('encode', 'starlette')}")
    print(f"uvicorn releases: {count_releases('encode', 'uvicorn')}")
    print(f"python-multipart releases: {count_releases('Kludex', 'python-multipart')}")
    print()
    print(f"starlette stats: {get_repo_stats('encode', 'starlette')}")
    print(f"uvicorn stats: {get_repo_stats('encode', 'uvicorn')}")
    print(f"python-multipart stats: {get_repo_stats('Kludex', 'python-multipart')}")
    print()
    print(f"starlette activity: {get_repo_activity('encode', 'starlette')}")
    print(f"uvicorn activity: {get_repo_activity('encode', 'uvicorn')}")
    print(f"python-multipart activity: {get_repo_activity('Kludex', 'python-multipart')}")


def count_releases(owner: str, repo: str):
    url = f"https://api.github.com/repos/{owner}/{repo}/releases"
    headers = {"Accept": "application/vnd.github.v3+json", "Authorization": f"Bearer {os.getenv('GH_TOKEN')}"}

    with httpx.Client() as client:
        response = client.get(url, headers=headers)
        response.raise_for_status()

        one_year_ago = datetime.now() - timedelta(days=365)
        return sum(
            1
            for release in response.json()
            if datetime.strptime(release["published_at"], "%Y-%m-%dT%H:%M:%SZ") > one_year_ago
        )


def get_repo_stats(owner: str, repo: str):
    headers = {"Accept": "application/vnd.github.v3+json", "Authorization": f"Bearer {os.getenv('GH_TOKEN')}"}

    base_url = f"https://api.github.com/repos/{owner}/{repo}"
    since = (datetime.now() - timedelta(days=365)).isoformat()

    try:
        with httpx.Client() as client:
            # Get issues (excluding PRs)
            issues_count = 0
            issues_url = f"{base_url}/issues"
            issues_params = {"state": "closed", "since": since}

            issues_response = client.get(issues_url, headers=headers, params=issues_params)
            issues_response.raise_for_status()

            while issues_response.status_code == 200:
                issues_count += sum(1 for issue in issues_response.json() if "pull_request" not in issue)

                if "Link" in issues_response.headers and 'rel="next"' in issues_response.headers["Link"]:
                    next_url = [
                        link.split(";")[0].strip("<> ")
                        for link in issues_response.headers["Link"].split(",")
                        if 'rel="next"' in link
                    ][0]
                    issues_response = client.get(next_url, headers=headers)
                else:
                    break

            # Get PRs
            prs_url = f"{base_url}/pulls"
            merged_count = 0
            closed_count = 0

            # First get merged PRs
            pr_params = {"state": "closed", "sort": "updated", "direction": "desc"}
            pr_response = client.get(prs_url, headers=headers, params=pr_params)
            pr_response.raise_for_status()

            while pr_response.status_code == 200:
                for pr in pr_response.json():
                    # Check if PR was updated in the last year
                    if datetime.strptime(pr["updated_at"], "%Y-%m-%dT%H:%M:%SZ") < datetime.now() - timedelta(days=365):
                        break

                    if pr["merged_at"]:
                        merged_count += 1
                    else:
                        closed_count += 1

                if "Link" in pr_response.headers and 'rel="next"' in pr_response.headers["Link"]:
                    next_url = [
                        link.split(";")[0].strip("<> ")
                        for link in pr_response.headers["Link"].split(",")
                        if 'rel="next"' in link
                    ][0]
                    pr_response = client.get(next_url, headers=headers)
                else:
                    break

            return {"closed_issues": issues_count, "merged_prs": merged_count, "closed_unmerged_prs": closed_count}

    except httpx.HTTPError as e:
        print(f"Error fetching repository stats: {e}")
        return None


def get_repo_activity(owner: str, repo: str):
    headers = {"Accept": "application/vnd.github.v3+json", "Authorization": f"Bearer {os.getenv('GH_TOKEN')}"}

    # GraphQL query for discussions (REST API doesn't support discussions)
    graphql_url = "https://api.github.com/graphql"
    query = """
    query($owner:String!, $repo:String!) {
    repository(owner: $owner, name: $repo) {
        discussions(first: 100, answered: true, orderBy: {field: UPDATED_AT, direction: DESC}) {
        totalCount
        nodes {
            answerChosenAt
        }
        }
    }
}
"""

    with httpx.Client() as client:
        # Get discussions via GraphQL
        response = client.post(
            graphql_url, json={"query": query, "variables": {"owner": owner, "repo": repo}}, headers=headers
        )
        response.raise_for_status()

        one_year_ago = datetime.now() - timedelta(days=365)
        data = response.json()

        return sum(
            1
            for discussion in data["data"]["repository"]["discussions"]["nodes"]
            if datetime.strptime(discussion["answerChosenAt"], "%Y-%m-%dT%H:%M:%SZ") > one_year_ago
        )


main()