diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md new file mode 100644 index 0000000..7d1d69f --- /dev/null +++ b/.claude/CLAUDE.md @@ -0,0 +1,339 @@ +# Contributor Network - Developer Guide + +**Start here.** This file provides quick orientation for anyone working with this codebase. + +> **Important for AI agents:** When you make changes to the codebase, update the relevant documentation in `.claude/` and `README.md` to reflect those changes. Keep `README.md` short, concise, and human-readable -- it is the public-facing project overview. This file (`CLAUDE.md`) is the detailed reference for developers and agents. + +## What Is This? + +An interactive D3.js web visualization of Development Seed's contributions to open-source projects. Shows the relationships between team members, repositories, and collaborators. + +**Live**: https://developmentseed.org/contributor-network + +**Repo**: https://github.com/developmentseed/contributor-network + +--- + +## For New Developers + +**First read**: [`PRD.md`](./PRD.md) (5 min) - Understand what this product is and why it exists. + +--- + +## Quick Start + +### Prerequisites +- [uv](https://docs.astral.sh/uv/getting-started/installation/) for Python +- [Node.js](https://nodejs.org/) 18+ for JavaScript +- GitHub personal access token with `public_repo` scope + +### Installation +```bash +uv sync # Install Python dependencies +npm install # Install JavaScript dependencies +``` + +### View Locally +```bash +python -m http.server 8000 +# Open http://localhost:8000/ +``` + +### Fetch Data & Build +```bash +export GITHUB_TOKEN="your_token_here" +uv run contributor-network data # Fetch from GitHub +uv run contributor-network csvs # Generate CSVs +uv run contributor-network build assets/data dist # Build static site +``` + +--- + +## Key Commands + +### Development +```bash +# Run CLI commands +uv run contributor-network data # Fetch contribution data from GitHub +uv run contributor-network csvs # Generate CSVs from JSON +uv run contributor-network build assets/data dist # Build static site to dist/ +uv run contributor-network discover # Find new repositories to track +uv run contributor-network list-contributors # Display all configured contributors + +# JavaScript testing +npm test # Run Vitest +npm run build # Bundle JavaScript +``` + +### Quality Checks +```bash +# Python: as in CI +uv run ruff format --check . +uv run ruff check . +uv run mypy +uv run pytest + +# Auto-fix issues +uv run ruff format . +uv run ruff check --fix . +``` + +--- + +## Project Structure + +``` +python/ # Python backend (CLI) + contributor_network/ # Main package + cli.py # Click CLI commands + client.py # GitHub API wrapper + config.py # Pydantic config models + models.py # Data models + tests/ # Python tests + templates/ # Jinja2 HTML templates + +src/js/ # JavaScript frontend (modular) + index.js # Barrel exports (re-exports all modules) + visualization/ + index.js # Main visualization factory + config/ + theme.js # Colors, fonts, layout constants + scales.js # D3 scale factories + data/ + filter.js # Filtering logic + interaction/ + hover.js # Hover event handling + click.js # Click event handling + findNode.js # Node detection via Delaunay + layout/ + resize.js # Canvas resize handling + render/ + canvas.js # Canvas setup + shapes.js # Shape drawing utilities + text.js # Text rendering + tooltip.js # Tooltip rendering + labels.js # Node labels + repoCard.js # Repo details card + simulations/ + ownerSimulation.js # Owner node forces + contributorSimulation.js # Contributor node forces + collaborationSimulation.js # Collaboration link forces + remainingSimulation.js # Remaining/community node forces + state/ + filterState.js # Filter state + interactionState.js # Hover/click state + utils/ + helpers.js # Math utilities + formatters.js # Date/number formatting + validation.js # Data validation + debug.js # Debug logging + __tests__/ # Unit tests + +assets/ + data/ # JSON data files (generated) + css/ # Stylesheets + img/ # Images + +index.html # Main entry point +config.toml # Repository and contributor config +``` + +--- + +## Key Files + +- **`python/contributor_network/cli.py`** - Click-based CLI with 5 subcommands +- **`python/contributor_network/client.py`** - GitHub API client wrapper +- **`python/contributor_network/models.py`** - Pydantic data models (Repo, Link, etc.) +- **`src/js/index.js`** - Main visualization orchestrator (still being refactored) +- **`config.toml`** - Configuration: which repos to track, who are contributors +- **`index.html`** - Static HTML that loads the visualization + +--- + +## Code Standards + +### Python +- Type hints required (mypy validates in CI) +- Formatted with `ruff` (not black) +- Pydantic for data validation +- Click for CLI commands +- Docstrings on public functions + +### JavaScript +- ES6 modules (no transpilation) +- Modular architecture: each module <300 lines +- JSDoc comments on exported functions +- Tests with Vitest +- No external build step for development (changes auto-available in browser) + +--- + +## Architecture Notes + +### Data Flow + +``` +GitHub API → Python CLI (client.py) → JSON files → CSV generation → D3.js visualization → Interactive web app +``` + +Inside the Python backend: `CLI (cli.py) → Client (client.py) → Models (models.py) → Config (config.py) → JSON/CSV output` + +### Data Storage + +Data is stored as JSON and CSV files (not a database). This keeps the project as a simple static site with no infrastructure to manage -- files are human-readable, version-controllable, and work offline. If the project grows past ~200 repositories or ~500 contributors, consider migrating to SQLite, then PostgreSQL. See `DATA_EXPANSION_PLAN.md` for details. + +### Visualization Concepts + +**Node types:** +- **Contributors** -- team members, arranged alphabetically in an outer ring, sized by total contributions +- **Repositories** -- GitHub projects, positioned by force simulation, color-coded by ownership type +- **Owners** -- intermediary nodes that visually group repos by their owner + +**Links** connect contributors to repositories (sometimes through owner nodes). Link width reflects commit count; opacity reflects recency of contribution. + +**Simulations**: Four separate D3 force simulations, each tuned for a different repo grouping pattern: +- **ownerSimulation** -- repos owned by the organization +- **contributorSimulation** -- repos with a single DevSeed contributor +- **collaborationSimulation** -- repos shared between multiple DevSeed contributors +- **remainingSimulation** -- community contributors outside the main circle + +### Rendering Pipeline + +The frontend processes data in this order: **Load → Prepare → Filter → Simulate → Render → Interact** + +The visualization uses multiple composited canvas layers for performance: main (nodes + links), tooltip, labels, and hover highlighting. Canvas is used instead of SVG because 200+ nodes and 500+ links would be too slow as DOM elements. + +### Code Patterns + +- All JS modules export **functions, not classes** +- State updates are **immutable** (e.g., `state = setHovered(state, node)`) +- All magic numbers and constants are **centralized** in `config/theme.js` + +### Dependencies + +**Python:** click, pydantic, pygithub, requests, tomli, pytest + +**JavaScript:** d3, vitest + +--- + +## Documentation Structure + +| Document | Purpose | Read When | +|----------|---------|-----------| +| **README.md** (root) | Project overview, CLI reference, full workflows | Understanding the product and CLI usage | +| **PRD.md** | Product requirements and vision | First - understand the *why* | +| **DATA_EXPANSION_PLAN.md** | Data collection phases (1-5) with details | Adding new data fields | + +--- + +## Branding + +Development Seed colors: +- **Grenadier** (#CF3F02): Primary orange accent +- **Aquamarine** (#2E86AB): Secondary blue +- **Base** (#443F3F): Text color + +Configured in `src/js/config/theme.js`. + +--- + +## Common Tasks + +### Add a New Repository to Track +1. Edit `config.toml` - add repo to `[repositories]` section +2. Run `uv run contributor-network data` to fetch GitHub data +3. Run `uv run contributor-network csvs` to generate CSVs +4. Run `uv run contributor-network build assets/data dist` to rebuild site + +### Add a New Contributor +1. Edit `config.toml` - add to `[contributors.devseed]` or `[contributors.alumni]` +2. Re-run data fetch and build (above) + +### Making Frontend Changes + +JavaScript files in `src/js/` are auto-available to the browser without a build step during development. + +1. Make changes to files in `src/js/` +2. Refresh http://localhost:8000/ in your browser +3. Run tests to verify: `npm test` + +**Note:** If you modify `src/js/chart.js` (the main visualization), it compiles to `chart.js` in the root. If you add new modules to `src/js/`, export them from `src/js/index.js`. + +### Customizing the Visualization + +- **Colors & Fonts**: Edit `src/js/config/theme.js` +- **Layout Constants**: Edit the `LAYOUT` object in `src/js/config/theme.js` +- **Filters Available**: Check `src/js/state/filterState.js` +- **Data Filtering Logic**: See `src/js/data/filter.js` + +### Debug Visualization Issues +- Open DevTools (F12) +- Look for `debug-contributor-network` flag in console +- Check network tab to see what data was loaded +- See `src/js/utils/debug.js` for debug utilities + +### Debug Python Issues + +```bash +# Run a specific test with verbose output +uv run pytest python/tests/test_file.py -v -s +``` + +The `-s` flag shows print statements and logging output. + +### Run Tests +```bash +# Python +uv run pytest +uv run pytest python/tests/test_config.py::test_function_name # Single test + +# JavaScript +npm test +npm test -- --watch +``` + +--- + +## Troubleshooting + +### "GitHub API rate limit exceeded" +- Make sure you're using a GitHub token: `export GITHUB_TOKEN="your_token"` +- Unauthenticated requests have a much lower limit (60/hour vs 5,000/hour) +- Wait an hour for the limit to reset, or wait for exponential backoff retry logic + +### `uv: command not found` +```bash +# Install uv +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Or on macOS with Homebrew +brew install uv +``` + +### Changes to `src/js/` aren't showing up +1. Make sure you're running `python -m http.server 8000` +2. Hard-refresh your browser: Ctrl+Shift+R (or Cmd+Shift+R on Mac) +3. Check that the file was actually saved + +### Tests are failing +```bash +# Run tests with verbose output +npm test -- --reporter=verbose + +# Or for Python +uv run pytest -v +``` + +--- + +## Need Help? + +- **Project overview and CLI usage?** → `README.md` (root) +- **What are we building next?** → `DATA_EXPANSION_PLAN.md` +- **What's the product for?** → `PRD.md` + +--- + +**Last Updated**: February 2026 diff --git a/.claude/DATA_EXPANSION_PLAN.md b/.claude/DATA_EXPANSION_PLAN.md new file mode 100644 index 0000000..f4407d0 --- /dev/null +++ b/.claude/DATA_EXPANSION_PLAN.md @@ -0,0 +1,449 @@ +# GitHub Data Expansion Plan + +## Progress Tracker + +| Phase | Status | Completed | +|-------|--------|-----------| +| **Phase 1: Quick Wins** | ✅ Complete | Jan 2026 | +| **Phase 2: Community Metrics** | ✅ Complete | Jan 2026 | +| **Phase 3: Timeline Data** | 🔲 Not started | - | +| **Phase 4: PR/Issues** | 🔲 Not started | - | +| **Phase 5: Advanced** | 🔲 Not started | - | + +--- + +## Goals + +1. **Showcase OSS Impact** - Demonstrate the value and reach of repositories DevSeed contributes to +2. **Prove Community Effort** - Show that these are truly community-driven projects, not just DevSeed initiatives +3. **Track Contributions Over Time** - Visualize when and how DevSeed has contributed to the ecosystem +4. **Identify Active vs Stale Projects** - Help prioritize where effort is being spent + +## Current State + +### Repository Data Collected +| Field | Source | Purpose | +|-------|--------|---------| +| `repo_stars` | `repo.stargazers_count` | Popularity metric | +| `repo_forks` | `repo.forks_count` | Ecosystem reach | +| `repo_createdAt` | `repo.created_at` | Project age | +| `repo_updatedAt` | `repo.updated_at` | Recent activity indicator | +| `repo_total_commits` | `repo.get_commits().totalCount` | Development effort | +| `repo_languages` | `repo.get_languages()` | Tech stack | +| `repo_description` | `repo.description` | Context | + +### Contributor Link Data Collected +| Field | Source | Purpose | +|-------|--------|---------| +| `commit_count` | `contributor.contributions` | Individual contribution size | +| `commit_sec_min` | First commit timestamp | When contributor started | +| `commit_sec_max` | Last commit timestamp | Most recent activity | + +--- + +## Phase 1: Quick Wins ✅ COMPLETE + +**Effort: Minimal** - Same API calls, just extracting more fields +**Value: High** - Immediately enriches the visualization +**Status: Implemented in `models.py`** + +### Repository Fields + +| Field | PyGithub Property | Value | Notes | +|-------|-------------------|-------|-------| +| `watchers_count` | `repo.subscribers_count` | Shows sustained interest beyond "star and forget" | Free with existing call | +| `open_issues_count` | `repo.open_issues_count` | Indicates active project with ongoing work | Free with existing call | +| `license` | `repo.license.spdx_id` | OSS credibility, helps filtering | Free with existing call | +| `topics` | `repo.get_topics()` | Categorization, ecosystem mapping | 1 extra lightweight call | +| `has_discussions` | `repo.has_discussions` | Community engagement indicator | Free with existing call | +| `has_wiki` | `repo.has_wiki` | Documentation investment | Free with existing call | +| `default_branch` | `repo.default_branch` | Useful for links | Free with existing call | +| `archived` | `repo.archived` | Filter out inactive projects | Free with existing call | + +### Contributor Link Fields + +| Field | Derivation | Value | Notes | +|-------|------------|-------|-------| +| `contribution_span_days` | `commit_sec_max - commit_sec_min` | Shows long-term stewardship | Computed from existing data | +| `is_recent_contributor` | `commit_sec_max > (now - 90 days)` | Identifies active vs historical | Computed from existing data | + +### Implementation + +```python +# models.py - Repository additions +repo_watchers: int # repo.subscribers_count +repo_open_issues: int # repo.open_issues_count +repo_license: str | None # repo.license.spdx_id if repo.license else None +repo_topics: str # ",".join(repo.get_topics()) +repo_has_discussions: bool # repo.has_discussions +repo_archived: bool # repo.archived + +# models.py - Link additions (computed) +contribution_span_days: int # (commit_sec_max - commit_sec_min) // 86400 +is_recent_contributor: bool # commit_sec_max > (now - 90 days).timestamp() +``` + +--- + +## Phase 2: Community Metrics ✅ COMPLETE + +**Effort: Low-Medium** - One additional API call per repo +**Value: Very High** - Directly addresses "community effort" goal +**Status: Implemented in `models.py` and `client.py`** + +### Repository Fields + +| Field | PyGithub Property | Value | Notes | +|-------|-------------------|-------|-------| +| `total_contributors` | `repo.get_contributors().totalCount` | Proves community involvement | 1 call per repo | +| `devseed_contributor_count` | Count from existing links | Context for DevSeed's role | Computed | +| `external_contributor_count` | `total - devseed` | Community health metric | Computed | +| `community_ratio` | `external / total` | Key "community effort" metric | Computed | + +### Derived Metrics + +| Metric | Calculation | Value | +|--------|-------------|-------| +| **Bus Factor Indicator** | If `devseed_contributor_count == 1` and that person has >80% commits | Risk indicator | +| **Community Health Score** | `(external_contributors / total) * 100` | Higher = more community-driven | + +### Implementation + +```python +# models.py additions +repo_total_contributors: int +repo_devseed_contributors: int +repo_external_contributors: int +repo_community_ratio: float # external / total + +# client.py - new method +def get_contributor_stats(self, repo: Repo, devseed_usernames: set[str]) -> dict: + contributors = list(repo.get_contributors()) + total = len(contributors) + devseed = sum(1 for c in contributors if c.login in devseed_usernames) + return { + "total": total, + "devseed": devseed, + "external": total - devseed, + "ratio": (total - devseed) / total if total > 0 else 0 + } +``` + +--- + +## Phase 3: Timeline Data + +**Effort: Medium** - Uses GitHub Statistics API, may need retry logic +**Value: Very High** - Enables rich temporal visualizations + +### GitHub Statistics API Overview + +GitHub pre-computes repository statistics and caches them. First request may return `202 Accepted` (computing), requiring a retry after a few seconds. + +### Repository Timeline Fields + +| Field | PyGithub Method | Value | Notes | +|-------|-----------------|-------|-------| +| `weekly_commits` | `repo.get_stats_commit_activity()` | Activity heatmap, trend lines | 52 weeks of data | +| `owner_vs_community_weekly` | `repo.get_stats_participation()` | Shows community growth over time | 52 weeks, split by owner | +| `code_frequency` | `repo.get_stats_code_frequency()` | Additions/deletions over time | Shows sustained development | + +### Contributor Timeline Fields + +| Field | PyGithub Method | Value | Notes | +|-------|-----------------|-------|-------| +| `weekly_activity` | `repo.get_stats_contributors()` | Per-contributor commit timeline | Full history | +| `lines_added_total` | Sum from weekly data | Code contribution size | More meaningful than commits | +| `lines_deleted_total` | Sum from weekly data | Refactoring/maintenance work | Shows cleanup effort | +| `active_weeks_count` | Count weeks with commits > 0 | Consistency of contribution | Sustained vs burst | +| `first_contribution_week` | First week with activity | When they joined | More precise than first commit | +| `last_contribution_week` | Last week with activity | Current status | More precise than last commit | + +### Data Structures + +```python +# Weekly commit activity (repo-level) +weekly_commits: list[WeeklyCommit] # stored as JSON string in CSV + +class WeeklyCommit(BaseModel): + week: int # Unix timestamp (start of week) + total: int # Total commits that week + days: list[int] # Commits per day [Sun, Mon, ..., Sat] + +# Participation split (repo-level) +class ParticipationStats(BaseModel): + owner_total: int + community_total: int + owner_weekly: list[int] # 52 weeks + community_weekly: list[int] # 52 weeks + +# Contributor timeline (link-level) +class ContributorWeeklyStats(BaseModel): + week: int # Unix timestamp + commits: int + additions: int + deletions: int +``` + +### Implementation Notes + +```python +# client.py - with retry logic for stats API +import time + +def get_stats_with_retry(self, repo: Repo, stat_method: str, max_retries: int = 3): + """GitHub stats API returns 202 while computing. Retry until ready.""" + for attempt in range(max_retries): + result = getattr(repo, stat_method)() + if result is not None: + return result + time.sleep(2 ** attempt) # Exponential backoff: 1s, 2s, 4s + return None +``` + +--- + +## Phase 4: PR and Issue Activity + +**Effort: Medium-High** - Requires additional API calls, potentially many for active repos +**Value: High** - PRs often more meaningful than raw commits + +### Repository Fields + +| Field | PyGithub Method | Value | Notes | +|-------|-----------------|-------|-------| +| `total_prs` | `repo.get_pulls(state='all').totalCount` | Development activity | 1 call | +| `open_prs` | `repo.get_pulls(state='open').totalCount` | Current activity | 1 call | +| `merged_prs_30d` | Search API with date filter | Recent momentum | More expensive | +| `pr_merge_rate` | `merged / total` | Project health | Computed | +| `avg_pr_time_to_merge` | Requires iterating PRs | Maintainer responsiveness | Expensive | + +### Contributor Fields + +| Field | Method | Value | Notes | +|-------|--------|-------|-------| +| `prs_opened` | Search API: `author:{user} type:pr` | Contribution beyond commits | Per-user search | +| `prs_merged` | Search API with `is:merged` | Accepted contributions | Per-user search | +| `issues_opened` | Search API: `author:{user} type:issue` | Community engagement | Per-user search | +| `reviews_given` | GraphQL API | Quality contribution | Complex | + +### API Cost Considerations + +- **Search API**: 30 requests/minute (authenticated) +- **GraphQL**: More efficient for complex queries, 5000 points/hour +- **Recommendation**: Batch contributor queries, cache aggressively + +### Implementation Approach + +```python +# Use search API for contributor PR/issue counts +def get_contributor_activity(self, username: str, repo_full_name: str) -> dict: + # PRs authored in this repo + prs = self.github.search_issues( + f"repo:{repo_full_name} author:{username} type:pr" + ) + + # Issues authored in this repo + issues = self.github.search_issues( + f"repo:{repo_full_name} author:{username} type:issue" + ) + + return { + "prs_total": prs.totalCount, + "issues_total": issues.totalCount, + } +``` + +--- + +## Phase 5: Advanced Metrics + +**Effort: High** - Complex calculations, GraphQL, or external data +**Value: Medium-High** - Nice-to-have polish + +### Repository Health Metrics + +| Metric | Calculation | Value | +|--------|-------------|-------| +| **Release Frequency** | `repo.get_releases()` + date analysis | Project maturity | +| **Issue Response Time** | Avg time from issue open to first comment | Maintainer engagement | +| **PR Review Turnaround** | Avg time from PR open to first review | Community responsiveness | +| **Documentation Score** | Check for README length, CONTRIBUTING, CODE_OF_CONDUCT | Project professionalism | + +### Contributor Impact Metrics + +| Metric | Calculation | Value | +|--------|-------------|-------| +| **Code Review Activity** | GraphQL: `pullRequestReviews` | Quality contribution | +| **Cross-Repo Presence** | Count repos contributed to | Ecosystem influence | +| **Mentorship Indicator** | Reviews given vs commits made | Senior contributor signal | + +### External Data Sources + +| Source | Data Available | Integration | +|--------|----------------|-------------| +| **npm/PyPI downloads** | Package popularity | API calls to registries | +| **GitHub Sponsors** | Funding status | GraphQL API | +| **Dependent repos** | `repo.get_network_count()` | Shows downstream impact | + +--- + +## Implementation Phases Summary + +| Phase | New Fields | API Calls Added | Effort | Value | +|-------|------------|-----------------|--------|-------| +| **1: Quick Wins** | 8 repo, 2 link | ~1 per repo | 1-2 hours | High | +| **2: Community** | 4 repo | 1 per repo | 2-3 hours | Very High | +| **3: Timeline** | 3 repo, 6 link | 3 per repo (with retry) | 4-6 hours | Very High | +| **4: PR/Issues** | 5 repo, 4 link | 2-4 per repo + per contributor | 1-2 days | High | +| **5: Advanced** | Variable | Variable | 2-3 days | Medium | + +--- + +## Rate Limit Considerations + +| API Type | Limit (Authenticated) | Mitigation | +|----------|----------------------|------------| +| REST API | 5,000/hour | Cache responses, batch where possible | +| Search API | 30/minute | Queue searches, respect rate limits | +| GraphQL | 5,000 points/hour | Use for complex queries only | +| Statistics API | Subject to REST limit | Implement retry logic for 202 responses | + +### Recommendations + +1. **Cache aggressively** - Repository data changes slowly, cache for 24h minimum +2. **Incremental updates** - Only fetch new data since last run +3. **Batch operations** - Group API calls, respect rate limits +4. **Store raw responses** - Keep JSON files for debugging and re-processing + +--- + +## Visualization Opportunities + +With expanded data, new visualization options become possible: + +| Data | Visualization | Goal Addressed | +|------|---------------|----------------| +| Timeline data | Stacked area chart of commits over time | Show sustained contribution | +| Community ratio | Pie/donut chart per repo | Prove community effort | +| Contributor spans | Gantt-style timeline | Show long-term stewardship | +| Cross-repo activity | Network graph thickness | Show ecosystem presence | +| Activity heatmap | Calendar view | Identify active periods | + +--- + +## Next Steps + +1. ~~**Phase 1**: Add quick-win fields to models, update CLI~~ ✅ +2. ~~**Phase 2**: Add contributor counting, compute community ratios~~ ✅ +3. **Phase 3**: Integrate statistics API with retry logic +4. Review visualization needs before Phase 4+ + +--- + +## When to Consider a Database + +The current architecture uses JSON files for individual records and CSVs for aggregated output. This works well for the current scale but has tradeoffs worth considering. + +### Current Approach: JSON + CSV Files + +**Pros:** +- Simple, no infrastructure to maintain +- Easy to debug (human-readable files) +- Version controllable with git +- No setup required for new contributors +- Works offline + +**Cons:** +- No query capability (must load all data into memory) +- No relationships between entities +- Duplicate API calls if data isn't cached properly +- File I/O overhead scales linearly with data size + +### When to Switch: Decision Matrix + +| Trigger | Threshold | Recommendation | +|---------|-----------|----------------| +| **Number of repositories** | > 200 | Consider SQLite | +| **Number of contributors** | > 500 | Consider SQLite | +| **Timeline data (Phase 3)** | 52 weeks × repos × contributors | Likely need SQLite | +| **Query complexity** | Need JOINs, aggregations, filtering | Need a database | +| **Multiple consumers** | Dashboard + API + reports | Consider PostgreSQL | +| **Real-time updates** | Live data refresh | Consider PostgreSQL + cache | + +### Recommended Migration Path + +**Stage 1: SQLite (Local, Single-file)** +- When: Phase 3 implementation or >100 repos +- Why: Still simple, no server, but enables SQL queries +- Schema: Normalize repos, contributors, links, weekly_stats tables +- Effort: ~1 day to migrate + +**Stage 2: PostgreSQL (If needed)** +- When: Multiple users/services need access, or need advanced features +- Why: Concurrent access, better JSON support, full-text search +- Effort: ~2-3 days + infrastructure + +### Suggested SQLite Schema (for reference) + +```sql +-- Core entities +CREATE TABLE repositories ( + id INTEGER PRIMARY KEY, + full_name TEXT UNIQUE NOT NULL, + stars INTEGER, + forks INTEGER, + total_contributors INTEGER, + community_ratio REAL, + -- ... other fields + fetched_at TIMESTAMP +); + +CREATE TABLE contributors ( + id INTEGER PRIMARY KEY, + login TEXT UNIQUE NOT NULL, + display_name TEXT, + is_devseed BOOLEAN +); + +-- Relationships +CREATE TABLE contributions ( + id INTEGER PRIMARY KEY, + repo_id INTEGER REFERENCES repositories(id), + contributor_id INTEGER REFERENCES contributors(id), + commit_count INTEGER, + first_commit_at TIMESTAMP, + last_commit_at TIMESTAMP, + contribution_span_days INTEGER, + UNIQUE(repo_id, contributor_id) +); + +-- Timeline data (Phase 3) +CREATE TABLE weekly_stats ( + id INTEGER PRIMARY KEY, + repo_id INTEGER REFERENCES repositories(id), + contributor_id INTEGER REFERENCES contributors(id), -- NULL for repo-level + week_start TIMESTAMP, + commits INTEGER, + additions INTEGER, + deletions INTEGER +); + +-- Indexes for common queries +CREATE INDEX idx_contributions_repo ON contributions(repo_id); +CREATE INDEX idx_weekly_stats_repo_week ON weekly_stats(repo_id, week_start); +``` + +### Current Recommendation + +**Stay with JSON/CSV for now.** The current ~50 repositories and ~30 contributors are well within the file-based approach's sweet spot. Re-evaluate when: + +1. You implement Phase 3 (timeline data significantly increases data volume) +2. You want to query data in new ways (e.g., "show me all repos where DevSeed contribution dropped in the last 6 months") +3. Build times become noticeably slow (>5 minutes for full refresh) + +--- + +*Document created: January 2026* +*Last updated: January 2026* +*For: Development Seed Contributor Network Tool* diff --git a/.claude/DATE_RANGE_IMPLEMENTATION_PLAN.md b/.claude/DATE_RANGE_IMPLEMENTATION_PLAN.md new file mode 100644 index 0000000..b7244a5 --- /dev/null +++ b/.claude/DATE_RANGE_IMPLEMENTATION_PLAN.md @@ -0,0 +1,318 @@ +# Date Range Implementation Plan + +Granular commit counts over time, enabling dynamic link/flow sizing based on commit activity within a selected date window. + +--- + +## Goal + +When a user selects a time range, the visualization should reflect the actual commit count within that window — not just whether activity overlapped. Link widths, contributor node sizes, and flow visuals should all scale to the filtered commit counts. This also lays the groundwork for a future animation feature that steps through time. + +--- + +## Current State + +### Data Pipeline + +The Python CLI (`python/contributor_network/cli.py`) fetches commit data via PyGithub. The `data` command calls `Link.from_github()` for each contributor-repo pair, which: + +1. Calls `repo.get_commits(author=contributor.login)` to get all commits +2. Extracts only three values: first commit timestamp, last commit timestamp, total count +3. Discards individual commit dates + +The raw temporal data passes through the code but is not stored. + +### Link Model (`python/contributor_network/models.py`) + +``` +author_name: str +repo: str +commit_count: int +commit_sec_min: int # Unix timestamp of first commit +commit_sec_max: int # Unix timestamp of last commit +contribution_span_days: int +is_recent_contributor: bool +``` + +### CSV Output (`links.csv`) + +One row per contributor-repo pair. 369 rows for the current dataset. Contains the aggregate `commit_count` with no temporal breakdown. + +### JavaScript Consumption + +`d3.csv()` loads `links.csv`. `prepare.js` parses `commit_count` as a number and uses it to scale link widths via `scale_link_width`. Contributor radii are scaled by `total_commits` (sum of their link commit counts). + +--- + +## Changes Required + +### 1. Python: Collect Monthly Commit Histograms + +**File:** `python/contributor_network/models.py` + +Add a new field to the `Link` model: + +```python +commit_histogram: dict[str, int] = {} # {"2024-01": 5, "2024-02": 12, ...} +``` + +**File:** `python/contributor_network/cli.py` (inside `Link.from_github()` or `update_links()`) + +The method already iterates all commits to compute the total count. During that iteration, extract each commit's `author.date`, format as `YYYY-MM`, and bucket: + +```python +from collections import defaultdict + +histogram = defaultdict(int) +for commit in repo.get_commits(author=login): + date = commit.commit.author.date # datetime object + month_key = date.strftime("%Y-%m") + histogram[month_key] += 1 + +link.commit_histogram = dict(histogram) +``` + +This adds zero extra API calls. The commit objects are already being fetched. + +**Existing fields are unchanged.** `commit_count`, `commit_sec_min`, `commit_sec_max`, `contribution_span_days`, and `is_recent_contributor` remain as-is for backward compatibility. + +--- + +### 2. New CSV: `commit_activity.csv` + +**File:** `python/contributor_network/cli.py` (inside the `csvs` command) + +Create a fourth CSV file with one row per contributor-repo-month: + +``` +author_name,repo,month,commit_count +Vincent Sarago,developmentseed/titiler,2024-01,12 +Vincent Sarago,developmentseed/titiler,2024-02,8 +Vincent Sarago,developmentseed/titiler,2024-03,15 +Kyle Barron,stac-utils/rustac,2024-01,22 +... +``` + +**Columns:** + +| Column | Type | Description | +|--------|------|-------------| +| `author_name` | string | Contributor display name (matches `links.csv`) | +| `repo` | string | Full repo name `owner/repo` (matches `links.csv`) | +| `month` | string | `YYYY-MM` format | +| `commit_count` | int | Number of commits in that month | + +**Generation logic:** + +```python +# Inside the csvs command, after writing links.csv: +activity_rows = [] +for link_file in links_dir.glob("*.json"): + link = Link.model_validate_json(link_file.read_text()) + for month, count in link.commit_histogram.items(): + activity_rows.append({ + "author_name": link.author_name, + "repo": link.repo, + "month": month, + "commit_count": count, + }) + +# Sort for readability +activity_rows.sort(key=lambda r: (r["author_name"], r["repo"], r["month"])) + +# Write CSV +with open(output_dir / "commit_activity.csv", "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=["author_name", "repo", "month", "commit_count"]) + writer.writeheader() + writer.writerows(activity_rows) +``` + +**Expected size:** ~369 links × ~30 months average = ~11,000 rows. Well within browser limits. + +**Also update the `build` command** to copy `commit_activity.csv` to `assets/data/` alongside the other CSVs. + +--- + +### 3. JavaScript: Load and Index the Activity Data + +**File:** `index.html` + +Add the fourth CSV to the existing `Promise.all`: + +```javascript +const promises = [ + d3.csv('assets/data/top_contributors.csv'), + d3.csv('assets/data/repositories.csv'), + d3.csv('assets/data/links.csv'), + d3.csv('assets/data/commit_activity.csv') // NEW +]; +``` + +Pass `values[3]` through to the chart constructor. + +**File:** `js/chart.js` + +Build a lookup map during initialization, keyed by `author_name~repo`: + +```javascript +// Build activity index: "author_name~repo" → Map +const activityIndex = new Map(); +activityData.forEach(row => { + const key = `${row.author_name}~${row.repo}`; + if (!activityIndex.has(key)) { + activityIndex.set(key, new Map()); + } + activityIndex.get(key).set(row.month, +row.commit_count); +}); +``` + +**File:** `js/data/prepare.js` + +During link normalization, attach the histogram to each link: + +```javascript +// After parsing commit_count, commit_sec_min, commit_sec_max: +const linkKey = `${d.contributor_name}~${d.repo}`; +d.commit_histogram = activityIndex.get(linkKey) || new Map(); +``` + +--- + +### 4. Time Range Filtering with Accurate Counts + +**File:** `js/chart.js` (inside `applyFilters()`) + +Replace the overlap-based time filter described in the roadmap (Feature 4) with a count-based filter: + +```javascript +if (activeFilters.timeRangeMin !== null || activeFilters.timeRangeMax !== null) { + visibleLinks = visibleLinks.map(link => { + // Sum commits only within the selected month range + let filteredCount = 0; + for (const [month, count] of link.commit_histogram.entries()) { + // Parse "YYYY-MM" to a comparable date (first of month) + const monthDate = new Date(month + "-01"); + if (activeFilters.timeRangeMin && monthDate < activeFilters.timeRangeMin) continue; + if (activeFilters.timeRangeMax && monthDate > activeFilters.timeRangeMax) continue; + filteredCount += count; + } + // Return a copy with the filtered count + return { ...link, commit_count: filteredCount }; + }).filter(link => link.commit_count > 0); // Remove links with zero commits in range + + // Re-derive visible repos from remaining links + const repoIdsFromLinks = new Set(visibleLinks.map(l => l.repo)); + visibleRepos = visibleRepos.filter(r => repoIdsFromLinks.has(r.repo)); +} +``` + +The existing cascade (Steps 3-4 in `applyFilters()`) then handles filtering contributors and re-filtering links. + +--- + +### 5. Dynamic Link Width Scaling + +**File:** `js/data/prepare.js` (inside scale domain calculation) + +The `scale_link_width` domain is currently set once from the global max `commit_count`. After time-range filtering replaces commit counts, the domain must update. + +**Recommended approach — fixed domain with context:** + +Keep the global max as the scale domain so link widths are always relative to the full dataset. A link that had 100 commits total but only 10 in the selected range will appear proportionally thinner. This makes comparisons across time ranges meaningful. + +```javascript +// During prepareData(), the domain is set from current (possibly filtered) data: +const maxCommitCount = d3.max(links, d => d.commit_count); +scale_link_width.domain([1, 10, maxCommitCount]); +``` + +This already happens in `prepareData()` which runs during `chart.rebuild()`. Since `commit_count` values are replaced by the filtered counts before `prepareData()` runs, the scale will use the filtered max. If you want a fixed global reference instead, store `originalMaxCommitCount` before any filtering and use that as the domain ceiling. + +--- + +### 6. Dynamic Contributor Node Sizing + +**File:** `js/data/prepare.js` + +Contributor radius is scaled by `total_commits`, which is calculated as the sum of their link `commit_count` values. Since links now carry filtered counts after Step 4, this recalculation happens naturally during `prepareData()`: + +```javascript +// Already exists in prepare.js — recalculates from current link data: +contributors.forEach(d => { + d.total_commits = d3.sum( + d.links_original.filter(l => /* link is visible */), + l => l.commit_count + ); +}); +``` + +Verify that `links_original` on each contributor is updated to reference the filtered links (not the pre-filter originals). If `links_original` still points to unfiltered data, either update it during `applyFilters()` or compute `total_commits` from `visibleLinks` instead. + +--- + +### 7. Owner Node Aggregation + +**File:** `js/data/prepare.js` + +Owner nodes aggregate stats from their child repos. When link counts change, the owner-level aggregations (total commits through that owner) should reflect the filtered values. The existing owner link deduplication and aggregation in `prepareData()` (lines 364-436) already sums `commit_count` from the current links, so this should work automatically. Verify by checking that owner link widths shrink when the time range narrows. + +--- + +## Animation Hook (Future) + +With monthly histograms on each link, animation becomes a UI controller problem: + +```javascript +// Pseudocode for animation controller +const months = getAllMonthsSorted(); // ["2019-06", "2019-07", ...] +let frameIndex = 0; + +function animateFrame() { + const currentMonth = months[frameIndex]; + chart.setTimeRange( + new Date(months[0] + "-01"), // Cumulative: from start + new Date(currentMonth + "-01") // Up to current frame + ); + frameIndex++; + if (frameIndex < months.length) { + requestAnimationFrame(animateFrame); + } +} +``` + +Nodes appear when their first commit month is reached. Links grow as commit counts accumulate. The ring fills up over time. No additional data pipeline changes are needed — only a UI play/pause/scrub control and the animation loop. + +--- + +## Implementation Order + +1. **Python model change** — add `commit_histogram` field to Link +2. **Python CLI change** — bucket commits by month during `Link.from_github()` +3. **CSV generation** — write `commit_activity.csv` in the `csvs` command +4. **Build command** — copy new CSV to `assets/data/` +5. **Re-run data collection** — `python -m contributor_network data` then `csvs` then `build` +6. **JS data loading** — load fourth CSV, build activity index +7. **JS data preparation** — attach histograms to link objects +8. **Time range filtering** — implement count-based filtering in `applyFilters()` +9. **Scale updates** — verify link widths and contributor radii update correctly +10. **UI slider** — build the time range control + +Steps 1-5 are Python/data work (~half day). Steps 6-10 are JS/visualization work (~1-2 days), mostly layered on top of the Feature 4 time range slider from the roadmap. + +--- + +## Risks and Considerations + +**API rate limits:** No additional API calls are needed. The commit data is already being fetched — we're just retaining the dates instead of discarding them. + +**Data freshness:** Existing JSON files in `links/` won't have `commit_histogram` until re-fetched. The field defaults to `{}` so old data won't break, but will produce empty histograms. A full re-fetch is needed for complete temporal data. + +**Month boundary precision:** Commits are bucketed by calendar month (UTC). A commit at 11:59pm on Jan 31 and one at 12:01am on Feb 1 land in different buckets. This is acceptable for the visualization's granularity. + +**Scale behavior:** When the time range is very narrow (e.g., one month), most links will have small counts and the scale domain shrinks. This can make thin links appear thick. Consider setting a minimum domain ceiling (e.g., 10) to prevent scale distortion on narrow ranges. + +**Backward compatibility:** `links.csv` is unchanged. The new `commit_activity.csv` is additive. If the JS can't find it, fall back to overlap-based filtering. + +--- + +**Last Updated**: February 2026 diff --git a/.claude/PRD.md b/.claude/PRD.md new file mode 100644 index 0000000..9c5483d --- /dev/null +++ b/.claude/PRD.md @@ -0,0 +1,290 @@ +# Product Requirements Document: Contributor Network Visualization + +## Executive Summary + +**Contributor Network** is an interactive web visualization that showcases Development Seed's contributions to open-source projects. It displays the relationships between Development Seed team members, the repositories they contribute to, and the broader ecosystem of collaborators on those projects. + +The tool serves three core purposes: +1. **Showcase OSS Impact** - Demonstrate the value and reach of repositories DevSeed contributes to +2. **Prove Community Effort** - Show that these are truly community-driven projects, not just DevSeed initiatives +3. **Track Contributions Over Time** - Visualize when and how DevSeed has contributed to the ecosystem + +--- + +## Product Overview + +### What It Is + +A D3.js-based interactive network visualization that: +- Displays DevSeed contributors as nodes arranged in a circle +- Shows repositories as nodes positioned based on collaboration patterns +- Visualizes connections (links) between contributors and repos +- Provides filtering by organization and repository metrics +- Offers rich tooltips showing contributor and repository statistics +- Enables interactive exploration through hover and click interactions + +### Live Demo + +https://developmentseed.org/contributor-network + +### Core Features + +#### 1. **Network Visualization** +- Contributors arranged alphabetically around a central ring +- Repositories grouped by ownership pattern (single owner, multiple contributors, collaborations) +- Force-directed layout creates natural clustering of related projects +- Visual flows show which contributors have worked on which repos + +#### 2. **Filtering & Discovery** +- Filter by organization (e.g., show only repos where "Conservation Labs" contributed) +- Filter repositories by: + - Minimum stars + - Minimum forks + - Minimum watchers + - Programming language +- Clear all filters with one click + +#### 3. **Interactive Exploration** +- **Hover**: Highlight a contributor or repo to see its connections +- **Click**: Select a contributor to see detailed stats about their contributions +- **Hover + Click**: When a contributor is selected, hover over repos to see the specific link details (commits, dates) + +#### 4. **Rich Information Display** +- **Contributor Tooltips**: Name, organization, contribution count, date range +- **Repository Tooltips**: Name, stars, forks, watchers, languages, open issues, community metrics +- **Statistics**: Shows commit counts, contribution spans, community involvement ratios + +#### 5. **Visual Design** +- Uses Development Seed brand colors +- Clear typography with readable labels +- Responsive canvas that adapts to window size +- Optimized for both desktop exploration and presentation use + +--- + +## Technical Stack + +### Backend (Python) +- **Language**: Python 3.10+ +- **Package Management**: `uv` (fast Python package installer) +- **CLI Framework**: Click (for command-line interface) +- **Data Validation**: Pydantic +- **GitHub API Client**: PyGithub +- **Data Format**: TOML config, JSON data files, CSV exports + +### Frontend (JavaScript) +- **Visualization**: D3.js (v7) +- **Canvas Rendering**: HTML5 Canvas (for performance) +- **Bundler**: esbuild (via npm scripts) +- **Testing**: Vitest +- **Architecture**: Modular ES6 modules + +### Deployment +- **Hosting**: Static site (GitHub Pages or CDN) +- **Build**: GitHub Actions workflow +- **Source**: GitHub repository (`developmentseed/contributor-network`) + +--- + +## Data Flow + +``` +GitHub API + ↓ +Python CLI (client.py) + ↓ +JSON Files (assets/data/) + ↓ +CSV Generation (csvs command) + ↓ +Configuration (config.toml) + ↓ +D3.js Visualization (index.html) +``` + +### Data Collection Process + +1. **Configuration** (`config.toml`): + - Specify repositories to track: `owner/repo` format + - Define contributors: Current DevSeed team, alumni, external collaborators + - Tag teams/organizations for filtering + +2. **Data Fetching** (`uv run contributor-network data`): + - Queries GitHub API for each configured repo + - Collects: commits, contributors, stars, forks, languages, topics, etc. + - Stores raw JSON in `assets/data/` + +3. **CSV Generation** (`uv run contributor-network csvs`): + - Converts JSON to CSV format for web consumption + - Creates two main files: + - `repositories.csv` - Repo metadata and metrics + - `contributors.csv` - Contributor-to-repo relationships and commit details + +4. **Site Build** (`uv run contributor-network build`): + - Bundles JavaScript modules + - Generates static HTML + - Outputs to `dist/` for deployment + +--- + +## Current Capabilities + +### What Works Today + +#### Configuration-Driven +- Edit `config.toml` to add/remove repositories and contributors +- Auto-discover new repositories where multiple DevSeed members contributed +- Support for team/organization grouping + +#### Data Collection +- Fetches commit counts, dates, and contributor lists +- Collects repository metrics (stars, forks, watchers, languages, topics, etc.) +- Calculates community health metrics: + - Total contributors per repo + - DevSeed vs external contributor split + - Community contribution ratio + +#### Visualization +- Force-directed layout with optimized positioning +- Color-coded by contributor type and repository ownership +- Responsive to window resizing +- Hover states with gradient highlighting +- Click to select for detailed stats +- Smooth animations and transitions + +#### Filtering +- Organization-based filtering +- Repository metrics filters (stars, forks, watchers) +- Language filters +- Clear filters button + +--- + +## Target Users & Use Cases + +### Primary Users +- **Development Seed Team**: Show impact and community involvement to stakeholders +- **Potential Clients/Partners**: Demonstrate expertise in open-source ecosystems +- **Community Members**: Discover how to contribute to funded projects +- **Media/Press**: Visual story about DevSeed's open-source commitment + +### Key Use Cases + +1. **Impact Storytelling** + - "DevSeed contributed to 50+ repositories with 300+ external collaborators" + - Show the breadth of ecosystem impact + +2. **Team Highlights** + - Interactive way to showcase team member contributions + - Identify cross-project collaboration patterns + +3. **Community Health Assessment** + - Visualize which projects have active external communities + - Identify projects that need more community investment + +4. **Contribution Discovery** + - Help new community members find where to contribute + - See which projects align with their interests + +--- + +## Success Metrics + +### Technical Metrics +- **Load Time**: < 3 seconds on typical broadband +- **Interaction Responsiveness**: < 100ms on hover/click +- **Accessibility**: WCAG 2.1 AA compliance +- **Performance**: 60 FPS on modern browsers + +### Business/Product Metrics +- **Engagement**: Avg session duration on the visualization page +- **Discovery**: Click-through rate to individual repositories +- **Reach**: Views per month, geographic distribution +- **Feedback**: User comments/shares on social media + +--- + +## Technical Constraints & Considerations + +### Rate Limiting +- GitHub API: 5,000 requests/hour (authenticated) +- Search API: 30 requests/minute +- Statistics API: Subject to REST rate limits with 202 retry behavior +- **Solution**: Aggressive caching, incremental updates, batch operations + +### Data Volume +- Current: ~50 repositories, ~30 contributors +- Current approach (JSON/CSV files) works well at this scale +- Migration to SQLite recommended if: + - > 200 repositories + - > 500 contributors + - Need Phase 3+ timeline data + +### Browser Compatibility +- Modern browsers (Chrome, Firefox, Safari, Edge) +- Canvas support required +- ES6 module support required (no transpilation) +- Not optimized for mobile (desktop-first design) + +--- + +## Configuration & Customization + +### Repository Configuration (`config.toml`) +```toml +[repositories] +"owner/repo-name" = "Display Name" + +[contributors.devseed] +github_username = "Display Name" + +[contributors.alumni] +github_username = "Display Name" +``` + +### Visualization Customization +- **Colors**: Defined in `src/js/config/theme.js` +- **Layout**: Force simulation parameters, collision detection +- **Font sizes**: Theme configuration (currently under refactoring to increase) +- **Filters**: Defined in filter state management modules + +--- + +## Project Status & Roadmap + +See [`CLAUDE.md`](./CLAUDE.md) for current project status and developer orientation. + +--- + +## Development Guidelines + +### Code Quality Standards +- **Python**: Typed with mypy, formatted with ruff, tested with pytest +- **JavaScript**: Modular architecture, unit tests with Vitest, <300 lines per file (target) +- **Both**: Clear separation of concerns, single responsibility principle + +### Git Workflow +- Main branch: Always deployable +- Feature branches: Descriptive names +- PRs required for all changes +- CI/CD validates tests, linting, type checking before merge + +### Documentation +- Code comments for complex logic +- Docstrings for all public functions (Python) +- JSDoc comments for exported functions (JavaScript) +- Runbooks for common operations + +--- + +## License & Attribution + +**License**: Mozilla Public License (MPL) 2.0 +**Original Work**: [ORCA top-contributor-network](https://github.com/nbremer/ORCA/tree/main/top-contributor-network) by Nadieh Bremer +**Modifications**: Development Seed (2025) + +--- + +*Document Version: 1.0* +*Last Updated: February 2026* +*Maintained by: Development Seed Team* diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 93b448e..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,82 +0,0 @@ -# CLAUDE.md - -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. - -## Project Overview - -This is a Python CLI tool with a D3.js frontend that generates interactive network visualizations of GitHub contributors. The CLI fetches data from GitHub, generates CSVs, and builds a static site for deployment. - -## Commands - -### Development - -```bash -# Install dependencies -uv sync - -# Run CLI commands -uv run contributor-network data # Fetch contribution data from GitHub -uv run contributor-network csvs # Generate CSVs from JSON -uv run contributor-network build # Build static site to dist/ -uv run contributor-network discover # Find new repositories -uv run contributor-network list-contributors # Display all contributors -``` - -### Quality Checks - -```bash -# Run all checks (as in CI) -uv run ruff format --check . -uv run ruff check . -uv run mypy - -# Auto-fix formatting and lint issues -uv run ruff format . -uv run ruff check --fix . - -# Run tests -uv run pytest -uv run pytest python/tests/test_config.py::test_function_name # Single test -``` - -## Architecture - -**Data flow**: GitHub API → Python CLI → JSON files → CSV files → D3.js visualization - -### Folder Structure - -``` -python/ # Python backend - contributor_network/ # CLI package - tests/ # Python tests - templates/ # Jinja2 HTML templates -js/ # JavaScript frontend - chart.js # Main D3.js visualization - config/ # Theme and scale configuration - data/ # Data preparation and filtering - render/ # Canvas rendering modules - simulations/ # D3 force simulations - ... -assets/ # Static assets - css/ # Stylesheets - data/ # CSV data files - img/ # Images - lib/ # Vendored D3 libraries -``` - -### Key Files - -- `python/contributor_network/cli.py` - Click-based CLI with 5 subcommands -- `python/contributor_network/config.py` - Pydantic models for TOML configuration -- `python/contributor_network/models.py` - Data models (Link, Repository) -- `python/contributor_network/client.py` - GitHub API client wrapper -- `js/chart.js` - D3.js visualization entry point -- `python/templates/` - Jinja2 HTML templates -- `config.toml`, `veda.toml` - Repository and contributor configuration - -## Code Style - -- Use `ruff` for linting and formatting -- Type hints required (mypy runs in CI) -- Pydantic for data validation -- Click for CLI commands diff --git a/assets/css/style.css b/assets/css/style.css index 9f80868..9ea2aad 100644 --- a/assets/css/style.css +++ b/assets/css/style.css @@ -508,7 +508,9 @@ li#item-remaining::before { margin: 0; } -#org-select { +#org-select, +#stars-select, +#forks-select { padding: 8px 12px; font-size: 14px; border: 1px solid #ddd; @@ -519,11 +521,15 @@ li#item-remaining::before { transition: border-color 0.2s ease; } -#org-select:hover { +#org-select:hover, +#stars-select:hover, +#forks-select:hover { border-color: #CF3F02; } -#org-select:focus { +#org-select:focus, +#stars-select:focus, +#forks-select:focus { outline: none; border-color: #CF3F02; box-shadow: 0 0 0 2px rgba(207, 63, 2, 0.1); @@ -545,7 +551,9 @@ li#item-remaining::before { gap: 10px; } - #org-select { + #org-select, + #stars-select, + #forks-select { width: 100%; max-width: 300px; } diff --git a/index.html b/index.html index b7851d9..530edbc 100644 --- a/index.html +++ b/index.html @@ -44,10 +44,31 @@

...

- + + + + + + + +

@@ -167,12 +188,40 @@

...

updateFilterStats(); }); + // Stars filter + const starsSelect = document.getElementById("stars-select"); + starsSelect.addEventListener("change", function () { + const value = this.value === "" ? null : parseInt(this.value, 10); + contributorNetworkVisual.setRepoFilter("starsMin", value); + updateFilterStats(); + }); + + // Forks filter + const forksSelect = document.getElementById("forks-select"); + forksSelect.addEventListener("change", function () { + const value = this.value === "" ? null : parseInt(this.value, 10); + contributorNetworkVisual.setRepoFilter("forksMin", value); + updateFilterStats(); + }); + function updateFilterStats() { const statsElement = document.getElementById("filter-stats"); - if (currentSelectedOrg === null) { + const parts = []; + + if (currentSelectedOrg !== null) { + parts.push(`org: ${currentSelectedOrg}`); + } + if (starsSelect.value !== "") { + parts.push(`stars: ${starsSelect.value}+`); + } + if (forksSelect.value !== "") { + parts.push(`forks: ${forksSelect.value}+`); + } + + if (parts.length === 0) { statsElement.textContent = `Showing all ${sortedOrgs.length} organizations`; } else { - statsElement.textContent = `Filtered to: ${currentSelectedOrg}`; + statsElement.textContent = `Filtered by ${parts.join(", ")}`; } } updateFilterStats(); diff --git a/js/__tests__/filter.test.js b/js/__tests__/filter.test.js index 6b557f9..0413151 100644 --- a/js/__tests__/filter.test.js +++ b/js/__tests__/filter.test.js @@ -10,6 +10,8 @@ import { deepClone, getRepoOwner, filterReposByOrganization, + filterReposByStars, + filterReposByForks, filterLinksByRepos, filterLinksByContributors, filterContributorsByLinks, @@ -19,11 +21,11 @@ import { // Sample test data const sampleRepos = [ - { repo: 'developmentseed/titiler', stars: 100 }, - { repo: 'developmentseed/rio-cogeo', stars: 50 }, - { repo: 'stac-utils/stac-fastapi', stars: 200 }, - { repo: 'radiantearth/stac-spec', stars: 300 }, - { repo: 'DevSeed Team', stars: 0 } // Central pseudo-repo + { repo: 'developmentseed/titiler', stars: 100, repo_stars: '1036', repo_forks: '216' }, + { repo: 'developmentseed/rio-cogeo', stars: 50, repo_stars: '50', repo_forks: '10' }, + { repo: 'stac-utils/stac-fastapi', stars: 200, repo_stars: '304', repo_forks: '116' }, + { repo: 'radiantearth/stac-spec', stars: 300, repo_stars: '875', repo_forks: '188' }, + { repo: 'DevSeed Team', stars: 0, repo_stars: '0', repo_forks: '0' } // Central pseudo-repo ]; const sampleContributors = [ @@ -151,6 +153,56 @@ describe('filterContributorsByLinks', () => { }); }); +describe('filterReposByStars', () => { + it('should filter repos below the star threshold', () => { + const result = filterReposByStars(sampleRepos, 100); + + // titiler (1036), stac-fastapi (304), stac-spec (875) pass + expect(result).toHaveLength(3); + expect(result.every(r => +r.repo_stars >= 100)).toBe(true); + }); + + it('should return all repos when threshold is 0', () => { + const result = filterReposByStars(sampleRepos, 0); + expect(result).toHaveLength(sampleRepos.length); + }); + + it('should return empty array when threshold exceeds all repos', () => { + const result = filterReposByStars(sampleRepos, 5000); + expect(result).toHaveLength(0); + }); + + it('should handle string repo_stars values from CSV', () => { + const repos = [ + { repo: 'test/a', repo_stars: '150' }, + { repo: 'test/b', repo_stars: '50' } + ]; + const result = filterReposByStars(repos, 100); + expect(result).toHaveLength(1); + expect(result[0].repo).toBe('test/a'); + }); +}); + +describe('filterReposByForks', () => { + it('should filter repos below the fork threshold', () => { + const result = filterReposByForks(sampleRepos, 100); + + // titiler (216), stac-fastapi (116), stac-spec (188) pass + expect(result).toHaveLength(3); + expect(result.every(r => +r.repo_forks >= 100)).toBe(true); + }); + + it('should return all repos when threshold is 0', () => { + const result = filterReposByForks(sampleRepos, 0); + expect(result).toHaveLength(sampleRepos.length); + }); + + it('should return empty array when threshold exceeds all repos', () => { + const result = filterReposByForks(sampleRepos, 5000); + expect(result).toHaveLength(0); + }); +}); + describe('applyFilters', () => { let originalData; @@ -220,6 +272,34 @@ describe('applyFilters', () => { expect(result.links).toEqual([]); }); + it('should filter by minimum stars', () => { + const result = applyFilters(originalData, { organizations: [], starsMin: 500, forksMin: null }); + + // titiler (1036) and stac-spec (875) pass; stac-fastapi (304) and rio-cogeo (50) don't + expect(result.repos.length).toBeLessThan(sampleRepos.length); + expect(result.repos.every(r => +r.repo_stars >= 500)).toBe(true); + }); + + it('should filter by minimum forks', () => { + const result = applyFilters(originalData, { organizations: [], starsMin: null, forksMin: 100 }); + + // titiler (216), stac-fastapi (116), stac-spec (188) pass + expect(result.repos).toHaveLength(3); + expect(result.repos.every(r => +r.repo_forks >= 100)).toBe(true); + }); + + it('should compose organization and metric filters', () => { + const result = applyFilters(originalData, { + organizations: ['developmentseed'], + starsMin: 100, + forksMin: null + }); + + // Only developmentseed repos with 100+ stars: titiler (1036) passes, rio-cogeo (50) doesn't + expect(result.repos).toHaveLength(1); + expect(result.repos[0].repo).toBe('developmentseed/titiler'); + }); + it('should correctly chain filters (repos → links → contributors → links)', () => { // Filter to radiantearth only const result = applyFilters(originalData, { organizations: ['radiantearth'] }); @@ -240,7 +320,7 @@ describe('applyFilters', () => { describe('createFilterManager', () => { it('should start with empty filters', () => { const manager = createFilterManager(); - expect(manager.getFilters()).toEqual({ organizations: [] }); + expect(manager.getFilters()).toEqual({ organizations: [], starsMin: null, forksMin: null }); }); it('should add organization when setOrganization called with true', () => { @@ -301,4 +381,65 @@ describe('createFilterManager', () => { manager.clearOrganizations(); expect(manager.hasActiveFilters()).toBe(false); }); + + it('should set metric filters', () => { + const manager = createFilterManager(); + manager.setMetricFilter('starsMin', 100); + + expect(manager.getFilters().starsMin).toBe(100); + expect(manager.hasActiveFilters()).toBe(true); + }); + + it('should clear metric filters with null', () => { + const manager = createFilterManager(); + manager.setMetricFilter('starsMin', 100); + manager.setMetricFilter('starsMin', null); + + expect(manager.getFilters().starsMin).toBeNull(); + expect(manager.hasActiveFilters()).toBe(false); + }); + + it('should report hasActiveFilters for metric filters', () => { + const manager = createFilterManager(); + + manager.setMetricFilter('forksMin', 50); + expect(manager.hasActiveFilters()).toBe(true); + + manager.setMetricFilter('forksMin', null); + expect(manager.hasActiveFilters()).toBe(false); + }); + + it('should clear all filters including metrics', () => { + const manager = createFilterManager(); + manager.setOrganization('developmentseed', true); + manager.setMetricFilter('starsMin', 100); + manager.setMetricFilter('forksMin', 50); + manager.clearAll(); + + expect(manager.getFilters()).toEqual({ + organizations: [], + starsMin: null, + forksMin: null + }); + expect(manager.hasActiveFilters()).toBe(false); + }); + + it('should call onChange when metric filter changes', () => { + let lastFilters = null; + const manager = createFilterManager((filters) => { + lastFilters = filters; + }); + + manager.setMetricFilter('starsMin', 500); + + expect(lastFilters.starsMin).toBe(500); + }); + + it('should ignore invalid metric names', () => { + const manager = createFilterManager(); + manager.setMetricFilter('invalidMetric', 100); + + const filters = manager.getFilters(); + expect(filters.invalidMetric).toBeUndefined(); + }); }); diff --git a/js/chart.js b/js/chart.js index b9325c7..1b4646b 100644 --- a/js/chart.js +++ b/js/chart.js @@ -64,7 +64,8 @@ import { removeOrganization, clearFilters, hasOrganization, - hasActiveFilters + hasActiveFilters, + setMetricFilter } from './state/filterState.js'; import { prepareData } from './data/prepare.js'; import { positionContributorNodes } from './layout/positioning.js'; @@ -609,7 +610,6 @@ const createContributorNetworkVisual = ( // NOTE: Pure filter logic has been extracted to src/js/data/filter.js // This function handles integration with the visualization's mutable state. // For new features (e.g., blog charts), import { applyFilters } from './data/filter.js' - // See ARCHITECTURE_RECOMMENDATIONS.md for migration guide. function applyFilters() { // Guard against uninitialized data if (!originalRepos || !originalLinks || !originalContributors) { @@ -623,13 +623,27 @@ const createContributorNetworkVisual = ( visibleRepos = JSON.parse(JSON.stringify(originalRepos)); // If organizations are selected, filter to those organizations - if (hasActiveFilters(activeFilters)) { + if (activeFilters.organizations.length > 0) { visibleRepos = visibleRepos.filter((repo) => { const owner = repo.repo.substring(0, repo.repo.indexOf("/")); return hasOrganization(activeFilters, owner); }); } + // Apply minimum stars filter + if (activeFilters.starsMin !== null) { + visibleRepos = visibleRepos.filter( + (repo) => +repo.repo_stars >= activeFilters.starsMin + ); + } + + // Apply minimum forks filter + if (activeFilters.forksMin !== null) { + visibleRepos = visibleRepos.filter( + (repo) => +repo.repo_forks >= activeFilters.forksMin + ); + } + // Get visible repo names for quick lookup const visibleRepoNames = new Set(visibleRepos.map((r) => r.repo)); @@ -668,7 +682,8 @@ const createContributorNetworkVisual = ( // Debug: Log filtering results (enable via localStorage) if (localStorage.getItem('debug-contributor-network') === 'true') { console.debug('=== APPLY FILTERS ==='); - console.debug(`Filters applied: ${activeFilters.organizations.join(", ") || "none"}`); + console.debug(`Org filters: ${activeFilters.organizations.join(", ") || "none"}`); + console.debug(`Stars min: ${activeFilters.starsMin ?? "none"}, Forks min: ${activeFilters.forksMin ?? "none"}`); console.debug(`Data before: ${originalContributors.length} contributors, ${originalRepos.length} repos, ${originalLinks.length} links`); console.debug(`Data after: ${visibleContributors.length} contributors, ${visibleRepos.length} repos, ${visibleLinks.length} links`); console.debug('Visible repos:', visibleRepos.map(r => r.repo)); @@ -1255,6 +1270,13 @@ const createContributorNetworkVisual = ( ); RADIUS_CONTRIBUTOR = positioningResult.RADIUS_CONTRIBUTOR; CONTRIBUTOR_RING_WIDTH = positioningResult.CONTRIBUTOR_RING_WIDTH; + + // Pre-compute SF now that RADIUS_CONTRIBUTOR is known. + // This ensures SF is consistent before any downstream code references it. + // resize() will re-derive the same value, but setting it early prevents + // any intermediate code from seeing a stale SF. + SF = calculateScaleFactor(WIDTH, DEFAULT_SIZE, RADIUS_CONTRIBUTOR, CONTRIBUTOR_RING_WIDTH); + nodes_central = runCollaborationSimulation( nodes, links, @@ -1339,14 +1361,19 @@ const createContributorNetworkVisual = ( } }); - // Re-setup interaction handlers + // Calculate SF early so it's finalized before interaction handlers capture it. + // positionContributorNodes() determines RADIUS_CONTRIBUTOR and CONTRIBUTOR_RING_WIDTH, + // which resize() uses to compute SF. By calling resize() first, we ensure setupHover() + // and setupClick() capture the final SF value — preventing the hit-detection offset bug + // that occurred when SF changed between setupHover() and resize(). + // This matches the order in the initial chart() function (resize before interactions). + chart.resize(); + + // Re-setup interaction handlers AFTER resize so they have correct WIDTH/HEIGHT/SF values setupHover(); setupClick(); setupZoom(); - // Redraw with new scale factors - chart.resize(); - return chart; }; @@ -1367,6 +1394,18 @@ const createContributorNetworkVisual = ( return chart; }; + /** + * Updates a metric-based repo filter and rebuilds the chart + * @param {string} metric - Metric name ('starsMin' or 'forksMin') + * @param {number|null} value - Minimum threshold value, or null to clear + * @returns {Object} - The chart instance + */ + chart.setRepoFilter = function (metric, value) { + setMetricFilter(activeFilters, metric, value); + chart.rebuild(); + return chart; + }; + chart.getActiveFilters = function () { return { ...activeFilters }; }; diff --git a/js/config/theme.js b/js/config/theme.js index 71be337..9ae2fb7 100644 --- a/js/config/theme.js +++ b/js/config/theme.js @@ -45,9 +45,9 @@ export const FONTS = { bold: 700, // Default sizes (scaled dynamically in visualization) - baseSizeContributor: 11, - baseSizeRepo: 10, - baseSizeOwner: 12 + baseSizeContributor: 14, + baseSizeRepo: 13, + baseSizeOwner: 15 }; /** @@ -76,7 +76,7 @@ export const LAYOUT = { // Contributor ring positioning contributorPadding: 20, // Default, overridden by config - maxContributorWidth: 55, // The maximum width (at SF = 1) of the contributor name before it gets wrapped + maxContributorWidth: 70, // The maximum width (at SF = 1) of the contributor name before it gets wrapped // Canvas sizing defaultSize: 1500, // Default canvas size @@ -86,7 +86,7 @@ export const LAYOUT = { linkWidthExponent: 0.75, // Collision detection - bboxPadding: 2 + bboxPadding: 4 }; /** diff --git a/js/data/filter.js b/js/data/filter.js index 50fa847..e7228b4 100644 --- a/js/data/filter.js +++ b/js/data/filter.js @@ -52,6 +52,28 @@ export function filterReposByOrganization(repos, organizations, centralRepo = nu }); } +/** + * Filter repositories by minimum star count. + * + * @param {Array} repos - Array of repository objects with 'repo_stars' property + * @param {number} minStars - Minimum star count threshold + * @returns {Array} Filtered repositories + */ +export function filterReposByStars(repos, minStars) { + return repos.filter(repo => +repo.repo_stars >= minStars); +} + +/** + * Filter repositories by minimum fork count. + * + * @param {Array} repos - Array of repository objects with 'repo_forks' property + * @param {number} minForks - Minimum fork count threshold + * @returns {Array} Filtered repositories + */ +export function filterReposByForks(repos, minForks) { + return repos.filter(repo => +repo.repo_forks >= minForks); +} + /** * Filter links to only those pointing to visible repositories. * @@ -120,6 +142,14 @@ export function applyFilters(originalData, activeFilters, options = {}) { visibleRepos = filterReposByOrganization(visibleRepos, activeFilters.organizations, centralRepo); } + // Apply metric filters + if (activeFilters.starsMin != null) { + visibleRepos = filterReposByStars(visibleRepos, activeFilters.starsMin); + } + if (activeFilters.forksMin != null) { + visibleRepos = filterReposByForks(visibleRepos, activeFilters.forksMin); + } + // Build set of visible repo names for quick lookup const visibleRepoNames = new Set(visibleRepos.map(r => r.repo)); @@ -162,7 +192,9 @@ export function applyFilters(originalData, activeFilters, options = {}) { */ export function createFilterManager(onChange) { let activeFilters = { - organizations: [] + organizations: [], + starsMin: null, + forksMin: null }; return { @@ -195,6 +227,21 @@ export function createFilterManager(onChange) { } }, + /** + * Set a metric filter (starsMin, forksMin) + * @param {string} metric - Metric name + * @param {number|null} value - Minimum threshold, or null to clear + */ + setMetricFilter(metric, value) { + if (metric === 'starsMin' || metric === 'forksMin') { + activeFilters[metric] = value; + } + + if (onChange) { + onChange(this.getFilters()); + } + }, + /** * Clear all organization filters */ @@ -206,12 +253,29 @@ export function createFilterManager(onChange) { } }, + /** + * Clear all filters (organizations and metrics) + */ + clearAll() { + activeFilters.organizations = []; + activeFilters.starsMin = null; + activeFilters.forksMin = null; + + if (onChange) { + onChange(this.getFilters()); + } + }, + /** * Check if any filters are active * @returns {boolean} True if any filters are active */ hasActiveFilters() { - return activeFilters.organizations.length > 0; + return ( + activeFilters.organizations.length > 0 || + activeFilters.starsMin !== null || + activeFilters.forksMin !== null + ); } }; } diff --git a/js/render/repoCard.js b/js/render/repoCard.js index 1b53617..588d161 100644 --- a/js/render/repoCard.js +++ b/js/render/repoCard.js @@ -16,10 +16,10 @@ import { min } from '../utils/helpers.js'; */ export const REPO_CARD_CONFIG = { lineHeight: 1.4, - sectionSpacing: 20, // Balanced spacing (was 24, reduced to 18, now 20 for better readability) - labelFontSize: 11, - valueFontSize: 11.5, - headerFontSize: 12, + sectionSpacing: 24, // Balanced spacing (scaled up for larger font sizes) + labelFontSize: 14, + valueFontSize: 14, + headerFontSize: 15, labelOpacity: 0.6, valueOpacity: 0.9, warningOpacity: 0.7, diff --git a/js/render/shapes.js b/js/render/shapes.js index 03f1290..4cf8dc2 100644 --- a/js/render/shapes.js +++ b/js/render/shapes.js @@ -284,12 +284,12 @@ export function drawContributorRing(context, SF, RADIUS_CONTRIBUTOR, CONTRIBUTOR const center_x = 0; const center_y = 0; - // Small offset for visual refinement (matches original ORCA implementation) - const O = 4; + // Position the ring so contributor dots sit at 1/3 from the inner edge + // (more ring space on the outer/name side, less empty space inside the dots) const LW = CONTRIBUTOR_RING_WIDTH; - const radius_inner = (RADIUS_CONTRIBUTOR - LW / 2 + O) * SF; - const radius_outer = (RADIUS_CONTRIBUTOR + LW / 2) * SF; + const radius_inner = (RADIUS_CONTRIBUTOR - LW / 3) * SF; + const radius_outer = (RADIUS_CONTRIBUTOR + 2 * LW / 3) * SF; context.save(); diff --git a/js/render/text.js b/js/render/text.js index 151acd3..02b08e1 100644 --- a/js/render/text.js +++ b/js/render/text.js @@ -39,7 +39,7 @@ export function setFont(context, fontSize, fontWeight, fontStyle = 'normal', fon * @param {number} SF - Scale factor * @param {number} fontSize - Base font size */ -export function setRepoFont(context, SF = 1, fontSize = 12) { +export function setRepoFont(context, SF = 1, fontSize = 15) { setFont(context, fontSize * SF, 400, 'normal'); } @@ -61,7 +61,7 @@ export function setCentralRepoFont(context, SF = 1, fontSize = 15) { * @param {number} SF - Scale factor * @param {number} fontSize - Base font size */ -export function setOwnerFont(context, SF = 1, fontSize = 12) { +export function setOwnerFont(context, SF = 1, fontSize = 15) { setFont(context, fontSize * SF, 700, 'normal'); } @@ -72,7 +72,7 @@ export function setOwnerFont(context, SF = 1, fontSize = 12) { * @param {number} SF - Scale factor * @param {number} fontSize - Base font size */ -export function setContributorFont(context, SF = 1, fontSize = 13) { +export function setContributorFont(context, SF = 1, fontSize = 16) { setFont(context, fontSize * SF, 700, 'italic'); } diff --git a/js/render/tooltip.js b/js/render/tooltip.js index eaea19c..2782fb2 100644 --- a/js/render/tooltip.js +++ b/js/render/tooltip.js @@ -31,22 +31,22 @@ function calculateRepoTooltipHeight(d, interactionState, SF, formatDate, formatD let height = 0; // Header section - height += 18; // Top padding (balanced) - height += 12 * line_height; // "Repository" label (12px font * 1.2 line height = 14.4px) - height += 18; // Spacing (balanced) + height += 22; // Top padding (balanced) + height += 15 * line_height; // "Repository" label (15px font * 1.2 line height = 18px) + height += 22; // Spacing (balanced) // Title section (owner/name) - two lines - height += 15 * line_height; // Owner line (15px font * 1.2 = 18px) - height += 15 * line_height; // Name line (15px font * 1.2 = 18px) - height += 42; // Spacing to dates (matches render: y += 42 accounts for name at y+18 plus padding) + height += 19 * line_height; // Owner line (19px font * 1.2 = 22.8px) + height += 19 * line_height; // Name line (19px font * 1.2 = 22.8px) + height += 50; // Spacing to dates (matches render: y += 50 accounts for name at y+22.8 plus padding) // Dates section - height += 11 * line_height; // Created date (11px font * 1.2 = 13.2px) - height += 11 * line_height; // Updated date (11px font * 1.2 = 13.2px) - height += 20; // Spacing before stats (balanced) + height += 14 * line_height; // Created date (14px font * 1.2 = 16.8px) + height += 14 * line_height; // Updated date (14px font * 1.2 = 16.8px) + height += 24; // Spacing before stats (balanced) // Stats line - height += config.headerFontSize * line_height; // Stats line (12px font * 1.2 = 14.4px) + height += config.headerFontSize * line_height; // Stats line (15px font * 1.2 = 18px) // Note: renderLanguages will add its own sectionSpacing (24px) before it // Languages section (if present) @@ -90,12 +90,12 @@ function calculateRepoTooltipHeight(d, interactionState, SF, formatDate, formatD if (interactionState.clickActive && interactionState.clickedNode && interactionState.clickedNode.type === "contributor") { const link = interactionState.clickedNode.data.links_original?.find((l) => l.repo === d.id); if (link) { - height += 28; // Spacing - height += 11 * line_height; // "X commits by" line (11px font * 1.2 = 13.2px) - height += 16; // Spacing - height += 11.5 * line_height; // Contributor name (11.5px font * 1.2 = 13.8px) - height += 18; // Spacing - height += 11 * line_height; // Date range line (11px font * 1.2 = 13.2px) + height += 34; // Spacing + height += 14 * line_height; // "X commits by" line (14px font * 1.2 = 16.8px) + height += 20; // Spacing + height += 14 * line_height; // Contributor name (14px font * 1.2 = 16.8px) + height += 22; // Spacing + height += 14 * line_height; // Date range line (14px font * 1.2 = 16.8px) } } @@ -121,14 +121,14 @@ function calculateRepoTooltipWidth(context, d, interactionState, SF, formatDate, let maxWidth = 0; // Measure title text - setFont(context, 14 * SF, 700, "normal"); + setFont(context, 18 * SF, 700, "normal"); let width = context.measureText(d.data.owner).width * 1.25; if (width > maxWidth) maxWidth = width; width = context.measureText(d.data.name).width * 1.25; if (width > maxWidth) maxWidth = width; // Measure date text - setFont(context, 11 * SF, 400, "normal"); + setFont(context, 14 * SF, 400, "normal"); width = context.measureText(`Created in ${formatDate(d.data.createdAt)}`).width * 1.25; if (width > maxWidth) maxWidth = width; width = context.measureText(`Last updated in ${formatDate(d.data.updatedAt)}`).width * 1.25; @@ -197,16 +197,16 @@ function calculateRepoTooltipWidth(context, d, interactionState, SF, formatDate, if (interactionState.clickActive && interactionState.clickedNode && interactionState.clickedNode.type === "contributor") { const link = interactionState.clickedNode.data.links_original?.find((l) => l.repo === d.id); if (link) { - setFont(context, 11 * SF, 400, "italic"); + setFont(context, 14 * SF, 400, "italic"); const commitText = link.commit_count === 1 ? '1 commit by' : `${link.commit_count} commits by`; width = context.measureText(commitText).width * 1.25; if (width > maxWidth) maxWidth = width; - setFont(context, 11.5 * SF, 700, "normal"); + setFont(context, 14 * SF, 700, "normal"); width = context.measureText(interactionState.clickedNode.data.contributor_name).width * 1.25; if (width > maxWidth) maxWidth = width; - setFont(context, 11 * SF, 400, "normal"); + setFont(context, 14 * SF, 400, "normal"); let dateText = ''; if (formatDateExact(link.commit_sec_min) === formatDateExact(link.commit_sec_max)) { dateText = `On ${formatDateExact(link.commit_sec_max)}`; @@ -224,7 +224,7 @@ function calculateRepoTooltipWidth(context, d, interactionState, SF, formatDate, maxWidth = maxWidth / SF + 80; // Ensure minimum width - return Math.max(maxWidth, 280); + return Math.max(maxWidth, 320); } /** @@ -263,12 +263,12 @@ export function drawTooltip(context, d, config, interactionState, central_repo, if (d.type === "contributor") { // Contributor tooltip - H = 80; - W = 280; + H = 100; + W = 320; } else if (d.type === "owner") { // Owner tooltip - keep existing logic for now - H = 93; - W = 280; + H = 116; + W = 320; } else if (d.type === "repo") { // Repository tooltip - use dynamic calculations // Calculate height dynamically based on all content @@ -276,13 +276,13 @@ export function drawTooltip(context, d, config, interactionState, central_repo, // Calculate width dynamically based on all text content W = calculateRepoTooltipWidth(context, d, interactionState, SF, formatDate, formatDateExact, formatDigit); } else { - H = 93; - W = 280; + H = 116; + W = 320; } // Write all the repos for the "owner" nodes, but make sure they are not wider than the box and save each line to write out if (d.type === "owner") { - font_size = 11.5; + font_size = 14; setFont(context, font_size * SF, 400, "normal"); d.text_lines = []; text = ""; @@ -308,10 +308,10 @@ export function drawTooltip(context, d, config, interactionState, central_repo, // Recalculate width for owner tooltips based on text lines let tW = 0; - setFont(context, 15 * SF, 700, "normal"); + setFont(context, 20 * SF, 700, "normal"); tW = context.measureText(d.data.owner).width * 1.25; // Check if any of the "repo lines" are longer than the owner's name - setFont(context, 11.5 * SF, 400, "normal"); + setFont(context, 14 * SF, 400, "normal"); d.text_lines.forEach((t) => { let line_width = context.measureText(t).width * 1.25; if (line_width > tW) tW = line_width; @@ -320,7 +320,7 @@ export function drawTooltip(context, d, config, interactionState, central_repo, if (tW + 40 * SF > W * SF) W = tW / SF + 40; } else if (d.type === "contributor") { // Recalculate width for contributor tooltips - setFont(context, 15 * SF, 700, "normal"); + setFont(context, 20 * SF, 700, "normal"); text = d.data ? d.data.contributor_name : d.author_name; let tW = context.measureText(text).width * 1.25; // Update the max width if the text is wider @@ -365,8 +365,8 @@ export function drawTooltip(context, d, config, interactionState, central_repo, context.textBaseline = "middle"; // Contributor, owner or repo - y = 18; // Balanced - font_size = 12; + y = 22; // Balanced + font_size = 15; setFont(context, font_size * SF, 400, "italic"); context.fillStyle = COL; text = ""; @@ -376,29 +376,29 @@ export function drawTooltip(context, d, config, interactionState, central_repo, renderText(context, text, x * SF, y * SF, 2.5 * SF); context.fillStyle = COLOR_TEXT; - y += 18; // Balanced + y += 22; // Balanced if (d.type === "contributor") { // The contributor's name - font_size = 16; + font_size = 20; setFont(context, font_size * SF, 700, "normal"); text = d.data ? d.data.contributor_name : d.author_name; renderText(context, text, x * SF, y * SF, 1.25 * SF); } else if (d.type === "owner") { // The name - font_size = 16; + font_size = 20; setFont(context, font_size * SF, 700, "normal"); renderText(context, d.data.owner, x * SF, y * SF, 1.25 * SF); // Which repos fall under this owner in this visual - y += 28; - font_size = 11; + y += 34; + font_size = 14; context.globalAlpha = 0.6; setFont(context, font_size * SF, 400, "italic"); renderText(context, "Included repositories", x * SF, y * SF, 2 * SF); // Write out all the repositories - font_size = 11.5; + font_size = 14; y += font_size * line_height + 4; context.globalAlpha = 0.9; setFont(context, font_size * SF, 400, "normal"); @@ -408,7 +408,7 @@ export function drawTooltip(context, d, config, interactionState, central_repo, }); // forEach } else if (d.type === "repo") { // The repo's name and owner - font_size = 15; + font_size = 19; setFont(context, font_size * SF, 700, "normal"); renderText(context, `${d.data.owner}/`, x * SF, y * SF, 1.25 * SF); renderText( @@ -420,9 +420,9 @@ export function drawTooltip(context, d, config, interactionState, central_repo, ); // The creation date - // Note: name was rendered at y + 18, so we need to move past it (18) plus add spacing (24) = 42 - y += 42; - font_size = 11; + // Note: name was rendered at y + line_height*font_size, so we need to move past it plus add spacing + y += 50; + font_size = 14; context.globalAlpha = 0.7; setFont(context, font_size * SF, 400, "normal"); renderText( @@ -447,7 +447,7 @@ export function drawTooltip(context, d, config, interactionState, central_repo, // ============================================================ // Stats line: stars, forks, watchers - y += 20; // Balanced + y += 24; // Balanced renderStatsLine(context, d.data, x, y, SF, formatDigit); // Languages section @@ -476,15 +476,15 @@ export function drawTooltip(context, d, config, interactionState, central_repo, if (!link) return; let num_commits = link.commit_count; - y += 20; // Reduced from 28 - font_size = 11; + y += 24; // Spacing before clicked section + font_size = 14; context.globalAlpha = 0.6; setFont(context, font_size * SF, 400, "italic"); text = num_commits === 1 ? "1 commit by" : `${num_commits} commits by`; renderText(context, text, x * SF, y * SF, 2 * SF); - y += 12; // Reduced from 16 - font_size = 11.5; + y += 15; // Spacing to contributor name + font_size = 14; context.globalAlpha = 0.9; setFont(context, font_size * SF, 700, "normal"); renderText( @@ -495,8 +495,8 @@ export function drawTooltip(context, d, config, interactionState, central_repo, 1.25 * SF, ); - y += 14; // Reduced from 18 - font_size = 11; + y += 17; // Spacing to date range + font_size = 14; context.globalAlpha = 0.6; setFont(context, font_size * SF, 400, "normal"); if ( diff --git a/js/simulations/collaborationSimulation.js b/js/simulations/collaborationSimulation.js index d4d6c45..a4e8e04 100644 --- a/js/simulations/collaborationSimulation.js +++ b/js/simulations/collaborationSimulation.js @@ -104,7 +104,7 @@ export function runCollaborationSimulation( let r = d.type === "owner" ? d.max_radius : d.r; let top = max(r, d.r + text_height); - let w = max(r * 2, text_size.width * 1.25) + 10; + let w = max(r * 2, text_size.width * 1.25) + 14; d.bbox = [ [-w / 2, -top], diff --git a/js/state/filterState.js b/js/state/filterState.js index 28bc233..15e1648 100644 --- a/js/state/filterState.js +++ b/js/state/filterState.js @@ -5,11 +5,13 @@ /** * Creates a new filter state object - * @returns {Object} Filter state with organizations array + * @returns {Object} Filter state with organizations array and metric thresholds */ export function createFilterState() { return { organizations: [], // e.g., ["developmentseed", "stac-utils"] + starsMin: null, // Minimum stars threshold (null = no filter) + forksMin: null, // Minimum forks threshold (null = no filter) }; } @@ -37,13 +39,29 @@ export function removeOrganization(state, org) { return state; } +/** + * Sets a numeric metric filter (e.g., starsMin, forksMin) + * @param {Object} state - The filter state object + * @param {string} metric - Metric name ('starsMin' or 'forksMin') + * @param {number|null} value - Minimum threshold value, or null to clear + * @returns {Object} Updated filter state + */ +export function setMetricFilter(state, metric, value) { + if (metric === 'starsMin' || metric === 'forksMin') { + state[metric] = value; + } + return state; +} + /** * Clears all active filters * @param {Object} state - The filter state object - * @returns {Object} Updated filter state with empty organizations array + * @returns {Object} Updated filter state with empty organizations array and null metrics */ export function clearFilters(state) { state.organizations = []; + state.starsMin = null; + state.forksMin = null; return state; } @@ -63,5 +81,9 @@ export function hasOrganization(state, org) { * @returns {boolean} True if any filters are active */ export function hasActiveFilters(state) { - return state.organizations.length > 0; + return ( + state.organizations.length > 0 || + state.starsMin !== null || + state.forksMin !== null + ); }