Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f3fe407
Reword and reprioritize main.py TODO list
robertsamples Jun 29, 2026
9feb9ec
Add search-tab run-check, clean up main.py's dead imports
robertsamples Jun 29, 2026
b5a804c
Add GroupSetModel.move() for groupset reordering (model layer only)
robertsamples Jun 29, 2026
504410f
Clarify stale PCA TODO note
robertsamples Jun 29, 2026
f06a971
Merge remote-tracking branch 'origin/main' into todo-cleanup
robertsamples Jun 29, 2026
e110fa1
Merge branch 'main' into todo-cleanup
robertsamples Jun 29, 2026
c0c160c
Add Qt-free multivariate ordination backend (PCA/NMDS/PLS-DA)
robertsamples Jun 29, 2026
9c15631
Rework the mislabeled "PCA" plot into a multivariate ordination tab
robertsamples Jun 29, 2026
c7b6a01
Fix ordination feedback: scaling, axis limits, NMDS %explained, bar s…
robertsamples Jun 29, 2026
ede1968
Add dendrogram purity coloring: technical/biological replicate QC view
robertsamples Jun 29, 2026
573cbfa
Dendrogram: polyphyletic branches in red, add a no-coloring option
robertsamples Jun 29, 2026
bab713d
Replace treemap/upset PNG round-trip with real canvas plots
robertsamples Jun 29, 2026
3be44dc
Dendrogram: bridge-only red coloring; move bootstrap/collapse checkbo…
robertsamples Jun 29, 2026
5ca9baf
Fix dendrogram coloring: red = proven non-monophyly (label-set overlap)
robertsamples Jun 29, 2026
66b3e02
Dendrogram: add Use Sample/Group Names labels; fix AU/BP label scaling
robertsamples Jun 29, 2026
9697aa3
Docs: update mkdocs guide for ordination rework and dendrogram improv…
robertsamples Jun 29, 2026
52c1d37
correlation matrix control improvements
robertsamples Jun 30, 2026
6a28902
Update tests.yml
robertsamples Jun 30, 2026
03e6d22
bugfixes, docs update
robertsamples Jun 30, 2026
6a65373
implement npatlas fetch, bug report, update mgmt
robertsamples Jun 30, 2026
698af6f
efficiency updates
robertsamples Jun 30, 2026
0caf396
Merge branch 'main' into dev
robertsamples Jun 30, 2026
5f9a119
resolve ci errors on python 3.11 and 3.9 macos
robertsamples Jun 30, 2026
d9857e6
Merge branch 'dev' of https://github.com/robertsamples/mpact into dev
robertsamples Jun 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
448 changes: 1 addition & 447 deletions .gitignore

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions code/MSFaST.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from groupsets import normalize_graphfilters
from datetime import datetime
import time
from pathlib import Path

#---Classes---

Expand Down Expand Up @@ -187,6 +186,11 @@ def run_MSFaST(params):
# Filtering and error propagation
print('Filtering data')
ionfilters = {}
# Initialise here (not only inside `if analysis_params.grpave:`) so the
# unconditional groupionlists[...] writes further down (and the blank
# filter, which reads it) can't raise NameError if grpave is ever off.
# The GUI currently forces grpave=True, but loaded sessions/tests need not.
groupionlists = {}
if analysis_params.relfil:
ionfilters = filter.relationalfilter(analysis_params, ionfilters)
if analysis_params.merge:
Expand Down Expand Up @@ -254,7 +258,7 @@ def run_MSFaST(params):
msdata_filtered = pd.read_csv(analysis_params.outputdir / (analysis_params.filename.stem + '_filtered.csv'), sep = ',', header = [0, 1, 2], index_col = [0, 1, 2])
analysisrec = open(analysis_params.outputdir / 'analysisinfo.txt',"w")
analysisrec.writelines(['Analysis Date: ' + str(datetime.now()) + '\n',
'Runetime: ' + str(round(runtime, 2)) + ' seconds\n',
'Runtime: ' + str(round(runtime, 2)) + ' seconds\n',
'Input file: ' + str(analysis_params.filename) + '\n',
'Sample list: ' + str(analysis_params.samplelistfilename) + '\n',
'Extract metadata file: ' + str(analysis_params.extractmetadatafilename) + '\n',
Expand All @@ -280,10 +284,10 @@ def run_MSFaST(params):
text = ''
if analysis_params.relfil:
text += 'Features failing peak correction filtering: ' + str(len(ionfilters['relfil'].ions)) + '/' + str(len(msdata_unformatted.index)) + ' ' + str(round(100 * len(ionfilters['relfil'].ions) / len(msdata_unformatted.index), 2)) + '%\n'
if analysis_params.blnkfltr: #FIX THIS REF TO "BLANKS"
if analysis_params.blnkfltr:
text += 'Features failing blank filtering: ' + str(len(groupionlists[analysis_params.blnkgrp])) + '/' + str(len(msdata_unformatted.index)) + ' ' + str(round(100 * len(groupionlists[analysis_params.blnkgrp]) / len(msdata_unformatted.index), 2)) + '%\n'
if analysis_params.decon:
text += 'Features failing blank filtering: ' + str(len(ionfilters['insource'].ions)) + '/' + str(len(msdata_unformatted.index)) + ' ' + str(round(100 * len(ionfilters['insource'].ions) / len(msdata_unformatted.index), 2)) + '%\n'
text += 'Features failing in-source/deconvolution filtering: ' + str(len(ionfilters['insource'].ions)) + '/' + str(len(msdata_unformatted.index)) + ' ' + str(round(100 * len(ionfilters['insource'].ions) / len(msdata_unformatted.index), 2)) + '%\n'
if analysis_params.CVfil:
text += 'Features failing CV filtering: ' + str(len(ionfilters['cv'].ions)) + '/' + str(len(msdata_unformatted.index)) + ' ' + str(round(100 * len(ionfilters['cv'].ions) / len(msdata_unformatted.index), 2)) + '%\n'
text += 'Features failing any filters: ' + str(len(msdata_unformatted.index) - len(msdata_filtered.index)) + '/' + str(len(msdata_unformatted.index)) + ' ' + str(round(100 * (len(msdata_unformatted.index) - len(msdata_filtered.index)) / len(msdata_unformatted.index), 2)) + '%\n'
Expand All @@ -310,7 +314,7 @@ def run_MSFaST(params):
'RT/mz/FC: ' + str(analysis_params.FC3Dplt) + ' ' + str(analysis_params.statstgrps) + '\n',
'KMD/mz ' + str(analysis_params.KMD) + '\n',
#'KMD/mz/RT ' + str(analysis_params.___) + '\n',
'PCA unfitlered: ' + str(analysis_params.PCA) + '\n',
'PCA unfiltered: ' + str(analysis_params.PCA) + '\n',
'PCA filtered: ' + str(analysis_params.PCA) + '\n',
'Dendrogram (ward) unfiltered: ' + str(analysis_params.Dendrogram) + '\n',
'Dendrogram (ward) Filtered: ' + str(analysis_params.Dendrogram) + '\n',
Expand Down
185 changes: 185 additions & 0 deletions code/crashreport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
"""
MPACT
Copyright 2022, Robert M. Samples, Sara P. Puckett, and Marcy J. Balunas

Qt-free crash/error reporting. Installs a ``sys.excepthook`` that, on any
otherwise-unhandled exception:

1. formats a full report (traceback + environment: MPACT/Python/platform
versions, timestamp, optional context such as the tail of the run log),
2. writes it to a timestamped file under a crash-log directory (so there's a
durable record even if the user dismisses the dialog), and
3. hands the report to a GUI callback that asks the user whether to send it.

The "send" path is deliberately backend-free: it builds a pre-filled GitHub
*new issue* URL (title + body) for the MPACT repo, so reporting is one click
in the browser and nothing leaves the user's machine until they choose to
submit it. That satisfies "prompt the user before sending" without any cloud
egress, DSN, or account.

Why not Sentry (the obvious off-the-shelf option): ``sentry-sdk`` is excellent
for hosted/web services but (a) sends events to a Sentry project by default --
exactly the silent-egress this tool should avoid for a desktop research app,
(b) needs a DSN/account to be provisioned, and (c) still needs a custom
``before_send`` hook + dialog to honour "ask first." For a single-user desktop
tool the local-log + pre-filled-GitHub-issue flow gives the same practical
benefit (a complete traceback in the maintainer's hands) with no infrastructure
and no privacy surprise. If MPACT ever ships to many non-technical users and a
central error feed becomes worth it, Sentry with ``before_send`` gating is the
documented upgrade path.

This module is Qt-free and unit-tested (see ``tests/test_crashreport.py``); the
GUI dialog is injected as a plain callback.
"""

import os
import platform
import sys
import time
import traceback
import urllib.parse

DEFAULT_REPO = 'robertsamples/mpact'
# GitHub rejects extremely long issue URLs; keep the prefilled body well under
# the practical limit so the link always opens (the full report is always in
# the log file regardless).
_MAX_ISSUE_BODY = 6000


def _app_version():
try:
from mpactupdate import __version__
return __version__
except Exception:
return 'unknown'


def format_report(exc_type, exc_value, exc_tb, context=None, now=None):
"""Build the human-readable crash report text.

Args:
exc_type/exc_value/exc_tb: the ``sys.exc_info()``-style triple.
context: optional extra text appended under a "Context" heading
(e.g. the last lines of the run log, the current dataset name).
now: epoch seconds for the timestamp (injectable for tests).

Returns:
A multi-section plain-text report.
"""
now = time.time() if now is None else now
stamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(now))
tb_text = ''.join(traceback.format_exception(exc_type, exc_value, exc_tb))
lines = [
'MPACT crash report',
'==================',
'Time: ' + stamp,
'MPACT version: ' + _app_version(),
'Python: ' + sys.version.split()[0],
'Platform: ' + platform.platform(),
'',
'Traceback:',
tb_text.rstrip(),
]
if context:
lines += ['', 'Context:', str(context).rstrip()]
return '\n'.join(lines) + '\n'


def write_log(report, log_dir, now=None):
"""Write ``report`` to a timestamped file under ``log_dir``.

Creates ``log_dir`` if needed. Returns the path written, or ``None`` if the
write failed (reporting must never raise from inside an excepthook).
"""
now = time.time() if now is None else now
try:
os.makedirs(log_dir, exist_ok=True)
fname = 'mpact_crash_' + time.strftime('%Y%m%d_%H%M%S', time.localtime(now)) + '.log'
path = os.path.join(log_dir, fname)
with open(path, 'w', encoding='utf-8', errors='replace') as handle:
handle.write(report)
return path
except Exception:
return None


def one_line_summary(exc_type, exc_value):
"""A concise ``TypeName: message`` for use as an issue title."""
name = getattr(exc_type, '__name__', str(exc_type))
message = str(exc_value).strip().splitlines()[0] if str(exc_value).strip() else ''
return (name + ': ' + message).strip().rstrip(':').strip() if message else name


def build_issue_url(report, title, repo=DEFAULT_REPO):
"""Build a GitHub 'new issue' URL with a prefilled title and body.

The body is the report wrapped in a code fence and truncated to
:data:`_MAX_ISSUE_BODY` so the URL stays openable. The full untruncated
report always lives in the on-disk log.
"""
body = report
if len(body) > _MAX_ISSUE_BODY:
body = body[:_MAX_ISSUE_BODY] + '\n...\n[truncated -- see attached crash log]'
body_md = ('**Describe what you were doing when this happened:**\n\n\n'
'---\n```\n' + body + '\n```\n')
query = urllib.parse.urlencode({'title': title, 'body': body_md})
return 'https://github.com/' + repo + '/issues/new?' + query


def make_excepthook(report_handler, log_dir=None, repo=DEFAULT_REPO,
context_provider=None, prev_hook=None):
"""Build (but don't install) an excepthook.

Args:
report_handler: callable ``handler(report, log_path, issue_url)`` that
shows the user the report and offers to send it. Exceptions raised
by the handler are swallowed (an excepthook must not itself raise).
log_dir: directory for crash logs (skipped if None).
repo: GitHub repo for the prefilled issue URL.
context_provider: optional zero-arg callable returning extra context
text to embed (called defensively; failure is ignored).
prev_hook: a previous excepthook to chain to (defaults to the standard
``sys.__excepthook__`` so the traceback still reaches the console).

Returns:
A function with the ``(exc_type, exc_value, exc_tb)`` signature.
"""
prev_hook = prev_hook if prev_hook is not None else sys.__excepthook__

def _hook(exc_type, exc_value, exc_tb):
# Always let the default hook print to stderr first (and never let our
# own reporting suppress that or raise over it).
try:
prev_hook(exc_type, exc_value, exc_tb)
except Exception:
pass
try:
context = None
if context_provider is not None:
try:
context = context_provider()
except Exception:
context = None
report = format_report(exc_type, exc_value, exc_tb, context=context)
log_path = write_log(report, log_dir) if log_dir else None
title = 'Crash: ' + one_line_summary(exc_type, exc_value)
issue_url = build_issue_url(report, title, repo=repo)
if report_handler is not None:
report_handler(report, log_path, issue_url)
except Exception:
# Reporting failed -- the default hook already printed the real
# traceback, so just give up quietly rather than masking it.
pass

return _hook


def install_excepthook(report_handler, log_dir=None, repo=DEFAULT_REPO,
context_provider=None):
"""Install the crash excepthook as ``sys.excepthook``; return the previous
hook (so callers can restore it)."""
prev = sys.excepthook
sys.excepthook = make_excepthook(
report_handler, log_dir=log_dir, repo=repo,
context_provider=context_provider, prev_hook=prev)
return prev
59 changes: 47 additions & 12 deletions code/dbsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
"""

import numpy as np
import pandas as pd

from csvcache import cached_read_csv, invalidate

Expand Down Expand Up @@ -39,18 +38,54 @@ def search_npatlas(outputdir, filename_stem, atlas, ppm_threshold):
msdata = cached_read_csv(outputdir / (filename_stem + '_filtered.csv'),
sep=',', header=[2], index_col=None).iloc[:, :3]

for _, mrow in msdata.iterrows():
# Iterates over iondict, filters DB matches within window.
# Repeats for adducts, uses length of concat DF for feature hits
mass = mrow['m/z']
hits_h = atlas[abs(1000000 * (atlas['compound_m_plus_h'] - mass) / atlas['compound_m_plus_h']) < ppm_threshold].copy()
hits_h['ppm'] = abs(1000000 * (hits_h['compound_m_plus_h'] - mass) / hits_h['compound_m_plus_h'])
hits_na = atlas[abs(1000000 * (atlas['compound_m_plus_na'] - mass) / atlas['compound_m_plus_na']) < ppm_threshold].copy()
hits_na['ppm'] = abs(1000000 * (hits_na['compound_m_plus_na'] - mass) / hits_na['compound_m_plus_na'])
hits = pd.concat([hits_h, hits_na])
# Pre-sort the two adduct-mass columns once so each feature only tests a
# tiny m/z window (via searchsorted) instead of scanning all ~36k atlas
# rows twice -- the old per-feature ``atlas[boolean_mask]`` over the whole
# table was O(features x atlas_rows). The exact original ppm test
# (``abs(1e6*(atlas_mz - mass)/atlas_mz) < ppm_threshold``) is re-applied to
# the windowed candidates, so the matched set is bit-for-bit identical; the
# window (mass*(1 +/- 2*t)) is a safe superset of the true ppm window for
# the small tolerances used here. Verified output-identical (hitdb frames,
# incl. row order + ppm, and the iondict 'hits' column) against the old
# implementation on the real example dataset (~5x faster there).
mph = atlas['compound_m_plus_h'].to_numpy(dtype=float)
mna = atlas['compound_m_plus_na'].to_numpy(dtype=float)
order_h = np.argsort(mph, kind='stable'); sorted_h = mph[order_h]
order_na = np.argsort(mna, kind='stable'); sorted_na = mna[order_na]
t = ppm_threshold / 1e6

def _match(mass, sorted_vals, order, col_vals):
# Atlas positions whose ppm error vs `mass` is below threshold, in
# ascending atlas-position (i.e. original boolean-mask) order, plus
# their ppm values.
lo = np.searchsorted(sorted_vals, mass * (1 - 2 * t), side='left')
hi = np.searchsorted(sorted_vals, mass * (1 + 2 * t), side='right')
cand = order[lo:hi]
if cand.size == 0:
return cand, cand.astype(float)
cv = col_vals[cand]
sel = np.sort(cand[np.abs(1e6 * (cv - mass) / cv) < ppm_threshold])
sel_cv = col_vals[sel]
return sel, np.abs(1e6 * (sel_cv - mass) / sel_cv)

masses = msdata['m/z'].to_numpy(dtype=float)
compounds = msdata.iloc[:, 0].to_numpy()
counts = np.empty(len(masses), dtype=float)
for i in range(len(masses)):
mass = masses[i]
# m+h matches then m+na matches, concatenated in that order (matching
# the old ``pd.concat([hits_h, hits_na])``) and slicing the atlas once.
pos_h, ppm_h = _match(mass, sorted_h, order_h, mph)
pos_na, ppm_na = _match(mass, sorted_na, order_na, mna)
positions = np.concatenate([pos_h, pos_na])
hits = atlas.iloc[positions].copy()
hits['ppm'] = np.concatenate([ppm_h, ppm_na])
hits = hits.sort_values(by=['ppm'])
hitdb[mrow['Compound']] = hits
iondict.loc[mrow['Compound'], 'hits'] = hits.shape[0]
hitdb[compounds[i]] = hits
counts[i] = positions.size
# One vectorised column assignment instead of a per-feature ``.loc`` scalar
# set (msdata's Compound ids are unique, a subset of iondict's index).
iondict.loc[compounds, 'hits'] = counts

iondict.to_csv(outputdir / 'iondict.csv', header=True, index=True)
# iondict.csv just changed on disk (gained/updated the 'hits' column) --
Expand Down
Loading
Loading