sccn · arnodelorme · Jun 29, 2026 · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026
diff --git a/src/eegprep/functions/miscfunc/eeg_ms2f.py b/src/eegprep/functions/miscfunc/eeg_ms2f.py
@@ -0,0 +1,40 @@
+"""EEGLAB-compatible conversion of epoch latency (ms) to nearest epoch frame."""
+
+from __future__ import annotations
+
+import math
+from typing import Any
+
+
+def eeg_ms2f(EEG: dict[str, Any], ms: float) -> int:
+    """Convert an epoch latency in milliseconds to the nearest epoch frame.
+
+    Ports EEGLAB's eeg_ms2f.m. The returned frame number is **1-based** to match
+    EEGLAB; convert to a 0-based index at the point of use.
+
+    Args:
+        EEG: EEG structure with ``xmin``/``xmax`` (epoch limits, seconds) and
+            ``pnts`` (samples per epoch).
+        ms: Epoch latency in milliseconds.
+
+    Returns:
+        The nearest 1-based epoch frame number.
+
+    Raises:
+        ValueError: If the latency falls outside ``[xmin, xmax]``.
+    """
+    seconds = ms / 1000.0
+    xmin = EEG["xmin"]
+    xmax = EEG["xmax"]
+    if seconds < xmin or seconds > xmax:
+        raise ValueError("time out of range")
+    frac = (EEG["pnts"] - 1) * (seconds - xmin) / (xmax - xmin)
+    return 1 + _round_half_away_from_zero(frac)
+
+
+def _round_half_away_from_zero(value: float) -> int:
+    """Round to the nearest integer with ties going away from zero (MATLAB ``round``)."""
+    return math.floor(value + 0.5) if value >= 0 else math.ceil(value - 0.5)
+
+
+__all__ = ["eeg_ms2f"]
diff --git a/src/eegprep/functions/miscfunc/numdim.py b/src/eegprep/functions/miscfunc/numdim.py
@@ -0,0 +1,34 @@
+"""EEGLAB-compatible estimate of the effective number of sources (numdim)."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+
+
+def numdim(data: Any) -> float:
+    """Estimate a lower bound on the number of discrete sources in the data.
+
+    Ports EEGLAB's ``numdim`` (Wackermann's measure): the effective
+    dimensionality is the exponential of the Shannon entropy of the normalized
+    eigenvalue spectrum of the channel second-order matrix ``data @ data.T / 100``.
+    The measure is invariant to overall scaling of the data.
+
+    Args:
+        data: 2-D array shaped ``(channels, samples)``.
+
+    Returns:
+        The estimated effective number of sources, a real scalar.
+    """
+    a = np.asarray(data, dtype=float).T  # MATLAB: a = a';
+    b = a.T @ a / 100.0  # MATLAB: b = a'*a/100;
+    eigenvalues = np.linalg.eigvals(b)  # MATLAB: [v d] = eig(b);
+    weights = (eigenvalues / np.sum(eigenvalues)).astype(complex)
+    # Complex log mirrors MATLAB's real(exp(...)): tiny/negative eigenvalues
+    # from finite precision contribute ~0 instead of producing NaN.
+    lambda_ = np.exp(-np.sum(weights * np.log(weights)))
+    return float(np.real(lambda_))
+
+
+__all__ = ["numdim"]
diff --git a/src/eegprep/functions/sigprocfunc/spher.py b/src/eegprep/functions/sigprocfunc/spher.py
@@ -0,0 +1,34 @@
+"""EEGLAB-compatible sphering (whitening) matrix (spher)."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+from scipy.linalg import sqrtm
+
+
+def spher(data: Any) -> np.ndarray:
+    """Return the sphering (whitening) matrix for the given data.
+
+    Ports EEGLAB's spher.m: ``2 * inv(sqrtm(cov(data.T)))``. The result is a
+    symmetric matrix ``S`` that whitens the channel covariance ``C = cov(data.T)``
+    so that ``S @ C @ S.T == 4 * I``.
+
+    Args:
+        data: 2-D array shaped ``(channels, samples)``.
+
+    Returns:
+        The ``(channels, channels)`` sphering matrix.
+    """
+    array = np.asarray(data, dtype=float)
+    # np.cov (rowvar=True, ddof=1) matches MATLAB cov(data'); atleast_2d keeps
+    # the single-channel case as a 1x1 matrix.
+    covariance = np.atleast_2d(np.cov(array))
+    sphere = 2.0 * np.linalg.inv(sqrtm(covariance))
+    # sqrtm returns a complex dtype with ~0 imaginary part for real PSD input;
+    # MATLAB's result is real, so drop the negligible imaginary component.
+    return np.real_if_close(sphere)
+
+
+__all__ = ["spher"]
diff --git a/tests/matlab/test_eeg_ms2f.m b/tests/matlab/test_eeg_ms2f.m
@@ -0,0 +1,66 @@
+function tests = test_eeg_ms2f
+% TEST_EEG_MS2F  Tests for eeg_ms2f.m (epoch latency in ms -> nearest 1-based
+% epoch frame). outf = 1 + round((pnts-1)*(ms/1000 - xmin)/(xmax - xmin)).
+% Closed-form, release-invariant expectations.
+tests = functiontests(localfunctions);
+end
+
+function eeg = makeEEG(xmin, xmax, pnts)
+eeg = struct('xmin', xmin, 'xmax', xmax, 'pnts', pnts);
+end
+
+function testFirstFrameAtXmin(testCase)
+% ms at xmin -> first frame (1-based).
+EEG = makeEEG(0, 1, 1001);
+verifyEqual(testCase, eeg_ms2f(EEG, 0), 1);
+end
+
+function testLastFrameAtXmax(testCase)
+% ms at xmax -> last frame == pnts.
+EEG = makeEEG(0, 1, 1001);
+verifyEqual(testCase, eeg_ms2f(EEG, 1000), 1001);
+end
+
+function testMidpoint(testCase)
+% 500 ms -> 0.5 s -> frame 1 + 1000*0.5 = 501.
+EEG = makeEEG(0, 1, 1001);
+verifyEqual(testCase, eeg_ms2f(EEG, 500), 501);
+end
+
+function testRoundsToNearest(testCase)
+% 499.4 ms -> 1 + round(499.4) = 500.
+EEG = makeEEG(0, 1, 1001);
+verifyEqual(testCase, eeg_ms2f(EEG, 499.4), 500);
+end
+
+function testEpochCenterNegativeXmin(testCase)
+% Epoched data xmin<0: ms=0 -> centre frame.
+EEG = makeEEG(-1, 1, 2001);
+verifyEqual(testCase, eeg_ms2f(EEG, 0), 1001);
+end
+
+function testBelowRangeErrors(testCase)
+% ms below xmin -> error('time out of range').
+EEG = makeEEG(0, 1, 1001);
+threw = false;
+try
+    eeg_ms2f(EEG, -1);
+catch ME
+    threw = true;
+    verifyTrue(testCase, contains(ME.message, 'out of range'));
+end
+verifyTrue(testCase, threw);
+end
+
+function testAboveRangeErrors(testCase)
+% ms above xmax -> error('time out of range').
+EEG = makeEEG(0, 1, 1001);
+threw = false;
+try
+    eeg_ms2f(EEG, 2000);
+catch ME
+    threw = true;
+    verifyTrue(testCase, contains(ME.message, 'out of range'));
+end
+verifyTrue(testCase, threw);
+end
diff --git a/tests/matlab/test_numdim.m b/tests/matlab/test_numdim.m
@@ -0,0 +1,52 @@
+function tests = test_numdim
+% TEST_NUMDIM  Tests for numdim.m (effective number of sources via
+% eigenvalue-entropy of the channel second-order matrix A*A'/100).
+%
+% Expectations are closed-form / release-invariant:
+%   - single channel             -> lambda == 1
+%   - orthogonal equal-energy    -> lambda == nchan (uniform eigenvalues)
+%   - identical channels (rank-1) -> lambda == 1 (one dominant eigenvalue)
+%   - full-rank                  -> real, 1 < lambda < nchan
+tests = functiontests(localfunctions);
+end
+
+function testSingleChannel(testCase)
+% One channel -> one (normalized) eigenvalue == 1 -> entropy 0 -> lambda 1.
+a = rand(1, 50) + 0.5;            % nonzero energy
+verifyEqual(testCase, numdim(a), 1, 'AbsTol', 1e-10);
+end
+
+function testTwoChannelOrthogonal(testCase)
+% A*A' = 2*I -> equal eigenvalues -> lambda == nchan == 2.
+A = [1 1; 1 -1];
+verifyEqual(testCase, numdim(A), 2, 'AbsTol', 1e-10);
+end
+
+function testOrthogonalEqualEnergy(testCase)
+% Hadamard rows are orthogonal with equal norm -> A*A' = 4*I -> lambda == 4.
+A = hadamard(4);
+verifyEqual(testCase, numdim(A), 4, 'AbsTol', 1e-9);
+end
+
+function testRankDeficientApproxOne(testCase)
+% Identical channels -> rank-1 -> one dominant eigenvalue -> ~1 effective dim
+% (eig yields tiny nonzero eigenvalues, so the result is finite, not NaN).
+A = ones(3, 10);
+verifyEqual(testCase, numdim(A), 1, 'AbsTol', 1e-6);
+end
+
+function testFullRankBounds(testCase)
+% Full-rank random data: real scalar strictly between 1 and nchan.
+rng(42);
+A = randn(5, 200);
+v = numdim(A);
+verifyTrue(testCase, isreal(v));
+verifyGreaterThan(testCase, v, 1);
+verifyLessThan(testCase, v, 5);
+end
+
+function testNoArgShowsHelp(testCase)
+% nargin<1 branch: prints help and returns without error.
+out = evalc('numdim()');
+verifyTrue(testCase, ~isempty(out));
+end
diff --git a/tests/matlab/test_spher.m b/tests/matlab/test_spher.m
@@ -0,0 +1,34 @@
+function tests = test_spher
+% TEST_SPHER  Tests for spher.m (sphering matrix: 2*inv(sqrtm(cov(data')))).
+% Properties: symmetric; whitens the channel covariance so that
+% S*C*S' = 4*I (since the factor is 2). Single channel has a closed form.
+tests = functiontests(localfunctions);
+end
+
+function testSingleChannelClosedForm(testCase)
+% 1 channel: cov(data') = var(data) (N-1 normalized) = 2.5 -> 2/sqrt(2.5).
+data = [1 2 3 4 5];
+expected = 2 / sqrt(2.5);
+verifyEqual(testCase, spher(data), expected, 'RelTol', 1e-10);
+end
+
+function testSymmetric(testCase)
+% Sphering matrix of a symmetric PSD covariance is symmetric.
+data = [2 1 0 1 3 2; 0 2 1 3 1 0; 1 0 3 1 2 1];
+S = spher(data);
+verifyLessThan(testCase, norm(S - S.'), 1e-9);
+end
+
+function testWhitensCovarianceToFourI(testCase)
+% S = 2*inv(sqrtm(C)) -> S*C*S' = 4*I.
+data = [2 1 0 1 3 2; 0 2 1 3 1 0; 1 0 3 1 2 1];
+S = spher(data);
+C = cov(data');
+verifyLessThan(testCase, norm(S * C * S.' - 4 * eye(3)), 1e-6);
+end
+
+function testNoArgShowsHelp(testCase)
+% nargin<1 branch: prints help and returns without error.
+out = evalc('spher()');
+verifyTrue(testCase, ~isempty(out));
+end
diff --git a/tests/test_eeg_ms2f.py b/tests/test_eeg_ms2f.py
@@ -0,0 +1,53 @@
+"""Parity tests for eeg_ms2f - epoch latency (ms) to nearest epoch frame.
+
+Ports EEGLAB eeg_ms2f.m: ``outf = 1 + round((pnts-1)*(ms/1000 - xmin)/(xmax - xmin))``
+with an out-of-range error. Frame numbers are 1-based to match EEGLAB. Expected
+values are closed-form and mirror tests/matlab/test_eeg_ms2f.m.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from eegprep.functions.miscfunc.eeg_ms2f import eeg_ms2f
+
+pytestmark = pytest.mark.parity
+
+
+def _eeg(xmin, xmax, pnts):
+    return {"xmin": xmin, "xmax": xmax, "pnts": pnts}
+
+
+def test_first_frame_at_xmin():
+    # ms at xmin -> first frame (1-based).
+    assert eeg_ms2f(_eeg(0, 1, 1001), 0) == 1
+
+
+def test_last_frame_at_xmax():
+    # ms at xmax -> last frame == pnts.
+    assert eeg_ms2f(_eeg(0, 1, 1001), 1000) == 1001
+
+
+def test_midpoint():
+    # 500 ms -> 0.5 s -> frame 1 + 1000*0.5 = 501.
+    assert eeg_ms2f(_eeg(0, 1, 1001), 500) == 501
+
+
+def test_rounds_to_nearest():
+    # 499.4 ms -> 1 + round(499.4) = 500.
+    assert eeg_ms2f(_eeg(0, 1, 1001), 499.4) == 500
+
+
+def test_epoch_center_negative_xmin():
+    # Epoched data xmin<0: ms=0 -> centre frame.
+    assert eeg_ms2f(_eeg(-1, 1, 2001), 0) == 1001
+
+
+def test_below_range_raises():
+    with pytest.raises(ValueError, match="out of range"):
+        eeg_ms2f(_eeg(0, 1, 1001), -1)
+
+
+def test_above_range_raises():
+    with pytest.raises(ValueError, match="out of range"):
+        eeg_ms2f(_eeg(0, 1, 1001), 2000)
diff --git a/tests/test_numdim.py b/tests/test_numdim.py
@@ -0,0 +1,69 @@
+"""Parity tests for numdim - effective number of sources (EEGLAB numdim.m).
+
+numdim estimates a lower bound on the number of discrete sources via the
+eigenvalue entropy of the channel second-order matrix ``A @ A.T / 100``. The
+measure is scale-invariant (eigenvalues are normalized), so expectations below
+are closed-form / property-based.
+
+Expected values match the MATLAB reference tests in tests/matlab/test_numdim.m
+(confirmed on MATLAB R2025a). Regenerate ground truth with
+``matlab -batch "addpath('.../functions/miscfunc'); numdim(<input>)"``.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from eegprep.functions.miscfunc.numdim import numdim
+
+pytestmark = pytest.mark.parity
+
+
+def test_single_channel_is_one():
+    # One channel -> one normalized eigenvalue -> entropy 0 -> lambda == 1.
+    rng = np.random.default_rng(0)
+    a = rng.random((1, 50)) + 0.5
+    np.testing.assert_allclose(numdim(a), 1.0, atol=1e-10)
+
+
+def test_two_channel_orthogonal_is_nchan():
+    # A @ A.T = 2*I -> equal eigenvalues -> lambda == nchan == 2.
+    a = np.array([[1.0, 1.0], [1.0, -1.0]])
+    np.testing.assert_allclose(numdim(a), 2.0, atol=1e-10)
+
+
+def test_hadamard_equal_energy_is_nchan():
+    # Hadamard(4): orthogonal equal-norm rows -> A @ A.T = 4*I -> lambda == 4.
+    h4 = np.array(
+        [
+            [1, 1, 1, 1],
+            [1, -1, 1, -1],
+            [1, 1, -1, -1],
+            [1, -1, -1, 1],
+        ],
+        dtype=float,
+    )
+    np.testing.assert_allclose(numdim(h4), 4.0, atol=1e-9)
+
+
+def test_rank_deficient_is_approx_one():
+    # Identical channels -> rank-1 -> one dominant eigenvalue -> ~1 effective dim.
+    a = np.ones((3, 10))
+    np.testing.assert_allclose(numdim(a), 1.0, atol=1e-6)
+
+
+def test_full_rank_matches_matlab():
+    # Full-rank deterministic integer matrix: identical in numpy and MATLAB, so
+    # pin to the exact MATLAB R2025a value (confirmed via matlab -batch). The
+    # result is real and strictly between 1 and nchan (=3).
+    a = np.array(
+        [
+            [2.0, 1.0, 0.0, 1.0],
+            [1.0, 3.0, 1.0, 0.0],
+            [0.0, 1.0, 2.0, 1.0],
+        ]
+    )
+    v = numdim(a)
+    assert np.isreal(v)
+    np.testing.assert_allclose(v, 2.147746217856, rtol=1e-6)