#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Raman Substrate Analysis GUI — control + paired modes (v1.4)
=====================================================

Mode 1: Control-only (legacy v1.1 behavior)
Paste:
    x    baseline    substrate1    substrate2 ...

Mode 2: Paired analysis
Paste a header row with triplets:
    x    PairA__baseline    PairA__control    PairA__sample    PairB__baseline    PairB__control    PairB__sample ...

The script auto-detects the mode from the headers.

Outputs:
- TXT report
- TSV export
  * Control mode: residual columns
  * Paired mode: d_control, d_sample, delta, smooth_bg, peaks for each pair
"""

import configparser
import io
import math
import os
import re
from pathlib import Path
import tkinter as tk
from tkinter import ttk, messagebox, filedialog, scrolledtext, font as tkfont
import numpy as np

APP_TITLE = "Raman Substrate Analysis GUI v1.4"

PI_K = 3.0
MAX_SUBPPRATES = 20

DEFAULT_THRESHOLDS = {
    "clean_rms_ratio_max": 1.20,
    "clean_pi_max": 0.005,
    "intrinsic_rms_ratio_min": 1.90,
    "intrinsic_pi_min": 0.025,
}

THRESHOLD_LABELS = {
    "clean_rms_ratio_max": (
        "CLEAN: maximum RMS_d / sigma_ref",
        "Spectrum is classified as CLEAN only when RMS_d / sigma_ref is at or below this value."
    ),
    "clean_pi_max": (
        "CLEAN: maximum PI fraction",
        "Spectrum is classified as CLEAN only when PI is at or below this fraction. Example: 0.005 = 0.5%."
    ),
    "intrinsic_rms_ratio_min": (
        "INTRINSIC: minimum RMS_d / sigma_ref",
        "Spectrum is classified as INTRINSIC SIGNAL PRESENT when RMS_d / sigma_ref is at or above this value."
    ),
    "intrinsic_pi_min": (
        "INTRINSIC: minimum PI fraction",
        "Spectrum is classified as INTRINSIC SIGNAL PRESENT when PI is at or above this fraction. Example: 0.025 = 2.5%."
    ),
}

CONFIG_DIR_NAME = "RamanSubstrateAnalysisGUI"
CONFIG_FILE_NAME = "settings.ini"

def get_config_path() -> Path:
    """Return a per-user path suitable for settings written by an .exe."""
    appdata = os.environ.get("APPDATA")
    if appdata:
        return Path(appdata) / CONFIG_DIR_NAME / CONFIG_FILE_NAME
    return Path.home() / f".{CONFIG_DIR_NAME}" / CONFIG_FILE_NAME

CONFIG_PATH = get_config_path()

def load_thresholds() -> dict:
    thresholds = DEFAULT_THRESHOLDS.copy()
    config = configparser.ConfigParser()
    try:
        if CONFIG_PATH.exists():
            config.read(CONFIG_PATH, encoding="utf-8")
            if config.has_section("thresholds"):
                for key in DEFAULT_THRESHOLDS:
                    if config.has_option("thresholds", key):
                        value = config.getfloat("thresholds", key)
                        if math.isfinite(value):
                            thresholds[key] = float(value)
    except Exception:
        # Fall back silently to defaults; the GUI remains usable even if the INI is damaged.
        thresholds = DEFAULT_THRESHOLDS.copy()
    return thresholds

def save_thresholds(thresholds: dict) -> None:
    config = configparser.ConfigParser()
    config["thresholds"] = {key: repr(float(thresholds[key])) for key in DEFAULT_THRESHOLDS}
    CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
    with open(CONFIG_PATH, "w", encoding="utf-8") as f:
        config.write(f)

def apply_thresholds(new_thresholds: dict) -> None:
    THRESHOLDS.clear()
    THRESHOLDS.update({key: float(new_thresholds[key]) for key in DEFAULT_THRESHOLDS})

THRESHOLDS = load_thresholds()

USER_MANUAL = "*****************************************************************************************************************************************\n***********************User manual for Raman Substrate Analysis GUI v1.4  (filename:RamanSubstrateAnalysisGUI_v1_4)**********************\n*****************************************************************************************************************************************\n1. Purpose of the script\n\nThis script is intended for Raman substrate evaluation in two different workflows:\n\n\tA. Control-only mode (COM)\n\tUsed for direct assessment of substrate-intrinsic background by comparing each substrate spectrum against a baseline spectrum.\n\n\tB. Paired signal-to-parasite ratio mode (SPR mode)\n\tUsed for practical sample-on-substrate measurements, where each dataset contains a matched baseline, control, and sample \t\t\n\tspectrum. In this mode, the script calculates residual components and the Signal-to-Parasite Ratio (SPR).\n \n2. General workflow\n\nAfter the script is launched, the user pastes a table of spectral data into the upper text field labeled “Paste your table here:”. The program then detects the input format automatically, performs the appropriate analysis after pressing Analyze, shows a text summary in the lower “Results / report preview:” field, and enables export of the numerical results as TSV and the report as TXT. The User manual can be shown or hidden as a right-side panel; it is hidden by default to leave more working space. The Hints panel above the input area is shown by default and can also be hidden. The Threshold settings button opens editable classification parameters.\n\n3. Input requirements common to both modes\n\nThe script expects a plain text table pasted into the GUI. It can interpret columns separated by tab, comma, semicolon, vertical bar, or whitespace. At least two rows must be present, and at least three numeric rows are required for successful analysis. Non-numeric rows that do not match the expected table structure are ignored during parsing. Decimal commas are accepted and internally converted to decimal points.\n\nThe first column must always contain the Raman shift axis (x). The remaining columns depend on the selected mode, which is determined automatically from the column headers. If the first row contains headers with suffixes such as __baseline, __control, or __sample, the script switches to paired mode; otherwise it uses control-only mode.\n\n4. Control-only mode (COM)\n\n4.1 When to use this mode\n\nUse control-only mode when the goal is to quantify intrinsic substrate background or parasitic signal without an analyte-containing sample. In this workflow, one baseline spectrum is compared against one or more substrate spectra.\n\n4.2 Required input format\n\nPaste a table in the following layout:\n\nx    baseline    substrate1    substrate2    substrate3 ...\n\nA header row is optional. If present, the first column should be the Raman shift axis, the second column should be the baseline spectrum, and all following columns should be substrate spectra. If no valid headers are present, the program assigns default names such as Substrate_1, Substrate_2, etc. The script accepts up to 20 substrate columns in this mode.\n\n4.3 Step-by-step procedure\n• Launch the script.\n• Copy the spectral table from Excel, a spreadsheet, or a text editor.\n• Paste the table into the upper input field.\n• Check that the first column contains Raman shift values, the second column contains the baseline spectrum, and the remaining columns \t\tcontain substrate spectra.\n• Click Analyze.\n• Inspect the report shown in the lower preview field.\n• To save the numerical residual spectra, click Export TSV.\n• To save the text summary, click Save Report (TXT).\n• To start a new analysis, click Clear.\n\n4.4 What the script computes in COM\n\nFor each substrate spectrum, the script fits the model:\n\n\ts ≈ α·b + β·1 + γ·x\n\nwhere b is the baseline spectrum and x is the Raman shift axis. It then calculates the residual spectrum d = s − (α·b + β + γ·x), estimates the baseline/reference noise using MAD-based robust statistics, and reports the following metrics:\n\n• α, β, γ\n• σ_b\n• σ_ref\n• RMS_d\n• RMS_d / σ_ref\n• L1_d\n• PI\n• classification as CLEAN, MINOR PARASITICS, or INTRINSIC SIGNAL PRESENT\n\n4.5 Output files in COM\n\nIn control-only mode, Export TSV produces a file named by default control_residuals.tsv. The first column is x, and each following column contains one residual spectrum labeled as residual_<substrate_name>. The text report can be saved as control_report.txt.\n\n5. Paired Signal-to-Parasite Ratio mode (SPR mode)\n\n5.1 When to use this mode\n\nUse SPR mode when each analyte measurement is accompanied by a matched baseline and control measurement. This is the appropriate mode for practical sample-on-substrate comparisons where the goal is to separate useful Raman signal from smooth background and substrate-derived parasitic contributions.\n\n5.2 Required input format\n\nPaired mode requires a header row with named triplets. The format is:\n\nx    PairA__baseline    PairA__control    PairA__sample    PairB__baseline    PairB__control    PairB__sample ...\n\nEach triplet must share the same prefix, such as PairA, and must contain exactly the suffixes __baseline, __control, and __sample. The program groups columns by this prefix and analyzes each complete triplet as one pair. If no valid triplets are found, the script returns a parse error.\n\n5.3 Step-by-step procedure\n\n• Launch the script.\n• Prepare a table with a header row.\n• Put the Raman shift axis in the first column (x).\n• For each matched dataset, create three columns named PairName__baseline, PairName__control, and PairName__sample.\n• Paste the full table into the upper input field.\n• Click Analyze.\n• Read the paired-analysis report in the lower preview field.\n• Click Export TSV to save the numerical residual components.\n• Click Save Report (TXT) to save the textual summary.\n• Click Clear before starting a new dataset.\n\n5.4 What the script computes in SPR mode\n\nFor each pair, the control and sample spectra are first baseline-projected separately using the same underlying fitting routine as in control-only mode. The paired residual is then defined as:\n\n\tΔ = d_sample − d_control\n\nThis paired residual is decomposed into:\n\na smooth background term obtained by moving-average filtering\na peak-rich term defined as Δ − smooth_bg\n\nFrom these components, the script reports:\n\n• α_control\n• α_sample\n• control classification\n• Useful RMS\n• Background RMS\n• Substrate RMS\n• Parasite RSS\n• SPR\n• Useful L1\n\nThe built-in paired-mode summary explicitly states that:\n\nuseful Raman signal is quantified as RMS(peaks)\nparasitic contribution is quantified as RSS(RMS(background), RMS(control residual))\nSPR = Useful RMS / Parasite RSS\nlarger SPR values indicate stronger dominance of structured sample signal over smooth background and substrate-induced parasitics\n\n5.5 Output files in SPR mode\n\nIn paired mode, Export TSV produces a file named by default paired_analysis.tsv. For each pair, the output includes five columns:\n\nPairName__d_control\nPairName__d_sample\nPairName__delta\nPairName__smooth_bg\nPairName__peaks\n\nThe text report can be saved as paired_report.txt.\n\n6. Practical notes for preparing input tables\n\n6.1 Copying from Excel\n\nThe most reliable workflow is to prepare the data in Excel or another spreadsheet program and copy the relevant columns directly into the script. Tab-delimited clipboard data is handled naturally by the parser.\n\n6.2 Consistent x-axis\n\nAll spectra within one pasted table must correspond row-by-row to the same Raman shift axis in the first column. The script does not perform interpolation or axis matching internally; it assumes that the input rows are already aligned. This follows from the fact that the code directly reads all numeric rows into one array and uses the first column as a common x axis for all subsequent calculations.\n\n6.3 Header naming in paired mode\n\nThe paired mode is sensitive to the exact naming pattern. The suffixes must be written as:\n\n__baseline\n__control\n__sample\n\nThe detection is case-insensitive for these suffixes, but the double underscore structure is required.\n\n7. Threshold settings\n\n7.1 Purpose of threshold settings\n\nThe Threshold settings dialog allows the user to tune the classification criteria used in control-only mode and in the control part of paired SPR mode. These thresholds determine whether a residual spectrum is classified as CLEAN, MINOR PARASITICS, or INTRINSIC SIGNAL PRESENT.\n\n7.2 Opening the settings\n\nClick the Threshold settings button in the upper toolbar. A separate dialog opens with four editable numerical fields. The currently active values are loaded automatically when the program starts.\n\n7.3 Editable threshold values\n\nThe following values can be changed:\n\n• clean_rms_ratio_max\n  Maximum RMS_d / σ_ref value still allowed for CLEAN classification.\n\n• clean_pi_max\n  Maximum PI fraction still allowed for CLEAN classification. PI is entered as a fraction, not as percent. For example, 0.005 means 0.5%.\n\n• intrinsic_rms_ratio_min\n  Minimum RMS_d / σ_ref value that triggers INTRINSIC SIGNAL PRESENT classification.\n\n• intrinsic_pi_min\n  Minimum PI fraction that triggers INTRINSIC SIGNAL PRESENT classification. PI is entered as a fraction, not as percent. For example, 0.025 means 2.5%.\n\n7.4 Saving and persistence\n\nAfter changing the threshold values, click Apply and save or Save and close. The values are saved into a user-specific settings.ini file and are automatically loaded during the next program launch. On Windows, the file is normally stored under the user's AppData/Roaming directory in a RamanSubstrateAnalysisGUI subfolder.\n\n7.5 Resetting thresholds\n\nClick Reset thresholds to default to restore the original built-in values:\n\n• clean_rms_ratio_max = 1.20\n• clean_pi_max = 0.005\n• intrinsic_rms_ratio_min = 1.90\n• intrinsic_pi_min = 0.025\n\nThe reset values are saved to the settings file immediately. If an analysis report is already displayed, re-run Analyze after changing thresholds so that the report and classifications are recalculated with the new values.\n\n8. Troubleshooting\n\n8.1 “Please paste your table first.”\n\nThis message appears when Analyze is pressed with an empty input field. Paste the dataset before running the analysis.\n\n8.2 “Please paste at least two rows.”\n\nThis error indicates that the pasted content is too short to be interpreted as a table. Add the full dataset, including multiple numeric rows.\n\n8.3 “Not enough numeric rows found.”\n\nThis means the parser could not extract enough valid numeric rows for control-only mode. Check whether the pasted table contains text in numeric areas, missing values, or incompatible formatting.\n\n8.4 “Not enough numeric rows found for paired mode.”\n\nThis means the paired table was detected, but too few valid numeric rows could be read. Verify that the body of the table contains properly formatted numbers.\n\n8.5 “No valid paired triplets found.”\n\nThis occurs when the headers do not contain complete prefix__baseline, prefix__control, prefix__sample triplets. Check spelling, double underscores, and whether all three columns are present for each pair.\n\n8.6 Buttons for export remain inactive\n\nThe Export TSV and Save Report (TXT) buttons are enabled only after a successful analysis. If they remain disabled, the analysis did not complete successfully.\n\n9. Minimal input examples\n\nExample A: control-only mode\nx\tbaseline\tGlass\tQuartz\tCaF2\n324\t12.1\t12.8\t12.4\t12.0\n325\t11.9\t12.7\t12.5\t12.1\n326\t12.0\t12.6\t12.4\t12.0\n...\nExample B: paired SPR mode\nx\tS1__baseline\tS1__control\tS1__sample\tS2__baseline\tS2__control\tS2__sample\n324\t10.2\t12.1\t18.4\t11.0\t13.2\t16.8\n325\t10.1\t12.0\t18.1\t10.9\t13.0\t16.5\n326\t10.3\t12.2\t18.6\t11.1\t13.1\t16.7\n...\n\nThese examples follow exactly the formats described in the script header and parsing logic.\n\n10. Short summary\n\nThe script provides two complementary evaluation modes in one interface. Control-only mode quantifies substrate-intrinsic residual background after projection against a baseline spectrum, whereas paired mode compares matched control and sample spectra to estimate useful Raman signal, parasitic background, and the resulting Signal-to-Parasite Ratio (SPR). The program accepts pasted tabular input, automatically detects the mode from column headers, displays a text summary, and exports the processed residual data for further inspection.\n"

INPUT_HELP_TEXT = (
    "Basic workflow: paste a plain text table copied from Excel/spreadsheet, then click Analyze.\n"
    "Control-only mode: columns should be x, baseline, substrate1, substrate2, ... Header row is optional.\n"
    "Paired SPR mode: use headers such as x, PairA__baseline, PairA__control, PairA__sample. The double underscores are required.\n"
    "Decimal commas are accepted. Export TSV and Save Report become active after successful analysis."
)

def robust_std_mad(y: np.ndarray) -> float:
    y = np.asarray(y, dtype=float)
    med = np.median(y)
    mad = np.median(np.abs(y - med))
    return 1.4826 * mad

def detect_delimiter(line: str) -> str:
    candidates = ['\t', ',', ';', '|']
    counts = {d: line.count(d) for d in candidates}
    if counts['\t'] > 0:
        return '\t'
    best = max(counts, key=counts.get)
    return best if counts[best] > 0 else ' '

def split_line(line: str, delim: str):
    if delim == ' ':
        return [p for p in re.split(r"\s+", line.strip()) if p]
    parts = [p.strip() for p in line.split(delim)]
    if len(parts) < 2:
        parts = [p for p in re.split(r"[,\t;| ]+", line.strip()) if p]
    return parts

def is_floatish(s: str) -> bool:
    try:
        float(str(s).replace(",", "."))
        return True
    except Exception:
        return False

def moving_average(y, window=51):
    y = np.asarray(y, dtype=float)
    w = max(5, int(window))
    if w % 2 == 0:
        w += 1
    if len(y) < w:
        w = max(3, (len(y) // 2) * 2 + 1)
    pad = w // 2
    yp = np.pad(y, pad_width=pad, mode="edge")
    kernel = np.ones(w) / w
    return np.convolve(yp, kernel, mode="valid")

def fit_one(x: np.ndarray, b: np.ndarray, s: np.ndarray):
    x = np.asarray(x, dtype=float)
    b = np.asarray(b, dtype=float)
    s = np.asarray(s, dtype=float)
    X = np.column_stack([b, np.ones(len(x)), x])
    theta, *_ = np.linalg.lstsq(X, s, rcond=None)
    alpha, beta, gamma = theta.tolist()
    d = s - X @ theta
    sigma_b = robust_std_mad(b)
    sigma_d0 = robust_std_mad(d)
    sigma0 = max(sigma_b, sigma_d0, 1e-12)
    mask = np.abs(d) <= 3.0 * sigma0
    sigma_d_clip = robust_std_mad(d[mask]) if np.any(mask) else sigma_d0
    sigma_ref = max(sigma_b, sigma_d_clip, 1e-12)
    rms_d = float(np.sqrt(np.mean(d ** 2)))
    l1_d = float(np.mean(np.abs(d)))
    pi = float(np.mean(np.abs(d) > PI_K * sigma_ref))
    rms_ratio = rms_d / sigma_ref if sigma_ref > 0 else float("inf")
    if (rms_ratio <= THRESHOLDS["clean_rms_ratio_max"]) and (pi <= THRESHOLDS["clean_pi_max"]):
        classification = "CLEAN"
    elif (rms_ratio >= THRESHOLDS["intrinsic_rms_ratio_min"]) or (pi >= THRESHOLDS["intrinsic_pi_min"]):
        classification = "INTRINSIC SIGNAL PRESENT"
    else:
        classification = "MINOR PARASITICS"
    return {
        "alpha": float(alpha), "beta": float(beta), "gamma": float(gamma),
        "residual": d, "sigma_b": float(sigma_b), "sigma_ref": float(sigma_ref),
        "rms_d": float(rms_d), "rms_ratio": float(rms_ratio),
        "l1_d": float(l1_d), "pi": float(pi), "classification": classification,
    }

def paired_one(x, baseline, control, sample, window=51):
    cfit = fit_one(x, baseline, control)
    sfit = fit_one(x, baseline, sample)
    delta = sfit["residual"] - cfit["residual"]
    smooth_bg = moving_average(delta, window=window)
    peaks = delta - smooth_bg
    useful_rms = float(np.sqrt(np.mean(peaks ** 2)))
    useful_l1 = float(np.mean(np.abs(peaks)))
    background_rms = float(np.sqrt(np.mean(smooth_bg ** 2)))
    substrate_rms = float(cfit["rms_d"])
    parasite_rss = float(np.sqrt(background_rms ** 2 + substrate_rms ** 2))
    spr = useful_rms / parasite_rss if parasite_rss > 0 else float("inf")
    return {
        "control_fit": cfit,
        "sample_fit": sfit,
        "delta": delta,
        "smooth_bg": smooth_bg,
        "peaks": peaks,
        "useful_rms": useful_rms,
        "useful_l1": useful_l1,
        "background_rms": background_rms,
        "substrate_rms": substrate_rms,
        "parasite_rss": parasite_rss,
        "spr": spr,
    }

def parse_text_table(text: str):
    lines = [ln for ln in text.strip().splitlines() if ln.strip()]
    if len(lines) < 2:
        raise ValueError("Please paste at least two rows.")
    delim = detect_delimiter(lines[0])
    rows = [split_line(ln, delim) for ln in lines]
    return rows

def parse_control_mode(rows):
    header_present = any(not is_floatish(tok) for tok in rows[0][2:])
    headers = rows[0] if header_present else None
    data_rows = rows[1:] if header_present else rows

    numeric = []
    max_cols = max(len(r) for r in data_rows)
    for r in data_rows:
        if len(r) < 3:
            continue
        r2 = r + [""] * (max_cols - len(r))
        try:
            numeric.append([float(v.replace(",", ".")) for v in r2[:max_cols]])
        except Exception:
            continue
    if len(numeric) < 3:
        raise ValueError("Not enough numeric rows found.")
    arr = np.asarray(numeric, dtype=float)
    x = arr[:, 0]
    b = arr[:, 1]
    subs = []
    for j in range(2, min(arr.shape[1], 2 + MAX_SUBPPRATES)):
        if headers and j < len(headers) and not is_floatish(headers[j]):
            name = str(headers[j]).strip()
        else:
            name = f"Substrate_{j-1}"
        name = re.sub(r"\s+", " ", name).strip()[:30]
        subs.append({"name": name, "values": arr[:, j]})
    return {"mode": "control", "x": x, "b": b, "subs": subs}

def parse_paired_mode(rows):
    headers = rows[0]
    data_rows = rows[1:]
    max_cols = max(len(r) for r in data_rows)
    numeric = []
    for r in data_rows:
        r2 = r + [""] * (max_cols - len(r))
        try:
            numeric.append([float(v.replace(",", ".")) for v in r2[:max_cols]])
        except Exception:
            continue
    if len(numeric) < 3:
        raise ValueError("Not enough numeric rows found for paired mode.")
    arr = np.asarray(numeric, dtype=float)
    # find triplets based on header suffixes
    groups = {}
    for j, h in enumerate(headers):
        hh = str(h).strip()
        if "__" not in hh:
            continue
        prefix, suffix = hh.rsplit("__", 1)
        suffix = suffix.lower()
        groups.setdefault(prefix, {})
        groups[prefix][suffix] = j
    pairs = []
    for prefix, mapping in groups.items():
        if all(k in mapping for k in ("baseline", "control", "sample")):
            pairs.append({
                "name": prefix,
                "baseline": arr[:, mapping["baseline"]],
                "control": arr[:, mapping["control"]],
                "sample": arr[:, mapping["sample"]],
            })
    if not pairs:
        raise ValueError("No valid paired triplets found. Expected headers like Pair__baseline / Pair__control / Pair__sample.")
    x = arr[:, 0]
    return {"mode": "paired", "x": x, "pairs": pairs}

def detect_mode(rows):
    first = rows[0]
    if any("__baseline" in str(h).lower() or "__control" in str(h).lower() or "__sample" in str(h).lower() for h in first):
        return "paired"
    return "control"

def make_control_report(results):
    cols = ["Substrate","α","β","γ","σ_b","σ_ref","RMS_d","RMS_d/σ_ref","L1_d",f"PI(>|{PI_K}·σ_ref|) %","Class"]
    widths = [max(11, min(30, max(len(k) for k in results.keys()) + 2)), 10,10,10,10,10,10,14,10,16,24]
    lines = []
    lines.append("Raman baseline-projection report")
    lines.append("=" * 78)
    header = "".join(c.ljust(w) for c, w in zip(cols, widths))
    lines.append(header)
    lines.append("-" * len(header))
    for name, res in results.items():
        row = [name, f"{res['alpha']:.6g}", f"{res['beta']:.6g}", f"{res['gamma']:.6g}",
               f"{res['sigma_b']:.6g}", f"{res['sigma_ref']:.6g}", f"{res['rms_d']:.6g}",
               f"{res['rms_ratio']:.4g}", f"{res['l1_d']:.6g}", f"{100*res['pi']:.3f}", res["classification"]]
        lines.append("".join(v.ljust(w) for v, w in zip(row, widths)))
    lines.append("")
    lines.append("Methodology Summary:")
    lines.append("  Fit s ≈ α·b + β·1 + γ·x (least squares) per substrate; residual d = s − (α·b + β + γ·x).")
    lines.append("  σ_b from MAD(b). Stabilized σ_ref = max(MAD(b), MAD(d after 3σ clip)).")
    lines.append("  Metrics: RMS_d, RMS_d/σ_ref, L1_d, PI (fraction of |d| > 3·σ_ref).")
    lines.append(f"  Classification: CLEAN if (RMS_d/σ_ref ≤ {THRESHOLDS['clean_rms_ratio_max']:.6g}) "
                 f"AND (PI ≤ {100*THRESHOLDS['clean_pi_max']:.6g}%);")
    lines.append(f"                 INTRINSIC if (RMS_d/σ_ref ≥ {THRESHOLDS['intrinsic_rms_ratio_min']:.6g}) "
                 f"OR (PI ≥ {100*THRESHOLDS['intrinsic_pi_min']:.6g}%);")
    lines.append("                 otherwise MINOR PARASITICS.")
    return "\n".join(lines)

def make_paired_report(results):
    cols = ["Pair","α_control","α_sample","Control class","Useful RMS","Background RMS","Substrate RMS","Parasite RSS","SPR","Useful L1"]
    widths = [max(12, min(34, max(len(k) for k in results.keys()) + 2)),10,10,18,12,15,14,14,10,12]
    lines = []
    lines.append("Raman paired-analysis report")
    lines.append("=" * 78)
    header = "".join(c.ljust(w) for c, w in zip(cols, widths))
    lines.append(header)
    lines.append("-" * len(header))
    for name, res in results.items():
        row = [name, f"{res['control_fit']['alpha']:.6g}", f"{res['sample_fit']['alpha']:.6g}",
               res['control_fit']['classification'], f"{res['useful_rms']:.6g}", f"{res['background_rms']:.6g}",
               f"{res['substrate_rms']:.6g}", f"{res['parasite_rss']:.6g}", f"{res['spr']:.4g}", f"{res['useful_l1']:.6g}"]
        lines.append("".join(v.ljust(w) for v, w in zip(row, widths)))
    lines.append("")
    lines.append("Methodology Summary:")
    lines.append("  For each pair, the control and sample spectra are baseline-projected separately against the supplied baseline.")
    lines.append("  The paired residual is Δ = d_sample − d_control. Δ is then decomposed into a smooth background term")
    lines.append("  (moving average) and a peak-rich component. Useful Raman signal is quantified as RMS(peaks).")
    lines.append("  Parasitic contribution is quantified as RSS(RMS(background), RMS(control residual)).")
    lines.append("  SPR = Useful RMS / Parasite RSS. Larger SPR indicates that the structured sample contribution dominates")
    lines.append("  over smooth background and substrate-induced parasitics.")
    return "\n".join(lines)


class App(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title(APP_TITLE)
        self.geometry("1400x840")
        self.minsize(1080, 680)
        self.last_mode = None
        self.last_results = None
        self.last_data = None
        self.manual_visible = False
        self.hints_visible = True
        self.threshold_window = None
        self._build_ui()

    def _build_ui(self):
        top = ttk.Frame(self, padding=8)
        top.pack(side=tk.TOP, fill=tk.X)
        ttk.Button(top, text="Analyze", command=self.on_analyze).pack(side=tk.LEFT)
        self.btn_export = ttk.Button(top, text="Export TSV", command=self.on_export, state=tk.DISABLED)
        self.btn_export.pack(side=tk.LEFT, padx=(8,0))
        self.btn_report = ttk.Button(top, text="Save Report (TXT)", command=self.on_report, state=tk.DISABLED)
        self.btn_report.pack(side=tk.LEFT, padx=(8,0))
        ttk.Button(top, text="Clear", command=self.on_clear).pack(side=tk.LEFT, padx=(8,0))
        ttk.Separator(top, orient=tk.VERTICAL).pack(side=tk.LEFT, fill=tk.Y, padx=10)
        self.btn_hints = ttk.Button(top, text="Hide Hints", command=self.toggle_hints)
        self.btn_hints.pack(side=tk.LEFT)
        self.btn_manual = ttk.Button(top, text="Show User manual", command=self.toggle_manual)
        self.btn_manual.pack(side=tk.LEFT, padx=(8,0))
        ttk.Button(top, text="Threshold settings", command=self.open_threshold_settings).pack(side=tk.LEFT, padx=(8,0))

        # Main horizontal split: analysis workspace on the left, readable manual on the right.
        self.main_pane = tk.PanedWindow(self, orient=tk.HORIZONTAL, sashwidth=6, sashrelief=tk.RAISED, bd=0)
        self.main_pane.pack(side=tk.TOP, fill=tk.BOTH, expand=True, padx=8, pady=(0,8))

        self.work_frame = ttk.Frame(self.main_pane)
        self.manual_frame = ttk.LabelFrame(self.main_pane, text="User manual", padding=8)

        self.main_pane.add(self.work_frame, minsize=650, stretch="always")
        # User manual is created but hidden by default. It can be shown as a right-side pane.
        # self.manual_frame is added to self.main_pane in toggle_manual().

        mid = ttk.LabelFrame(self.work_frame, text="Paste your table here:", padding=8)
        mid.pack(side=tk.TOP, fill=tk.BOTH, expand=True, pady=(0,8))

        mid.columnconfigure(0, weight=1)
        mid.rowconfigure(1, weight=1)

        self.hints_frame = ttk.LabelFrame(mid, text="Hints", padding=8)
        self.hints_frame.grid(row=0, column=0, sticky="ew", pady=(0,8))
        self.input_help = ttk.Label(self.hints_frame, text=INPUT_HELP_TEXT, justify=tk.LEFT, wraplength=760)
        self.input_help.pack(side=tk.TOP, fill=tk.X)

        self.input_holder = ttk.Frame(mid)
        self.input_holder.grid(row=1, column=0, sticky="nsew")
        self.txt_input = tk.Text(self.input_holder, wrap=tk.NONE, height=18, undo=True)
        self.txt_input.grid(row=0, column=0, sticky="nsew")
        yscroll = ttk.Scrollbar(self.input_holder, orient="vertical", command=self.txt_input.yview)
        yscroll.grid(row=0, column=1, sticky="ns")
        xscroll = ttk.Scrollbar(self.input_holder, orient="horizontal", command=self.txt_input.xview)
        xscroll.grid(row=1, column=0, sticky="ew")
        self.input_holder.rowconfigure(0, weight=1)
        self.input_holder.columnconfigure(0, weight=1)
        self.txt_input.configure(xscrollcommand=xscroll.set, yscrollcommand=yscroll.set)

        bot = ttk.LabelFrame(self.work_frame, text="Results / report preview:", padding=8)
        bot.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
        self.txt_results = tk.Text(bot, wrap=tk.WORD, height=14)
        self.txt_results.pack(side=tk.TOP, fill=tk.BOTH, expand=True)

        manual_font = tkfont.Font(family="Segoe UI", size=8)
        self.txt_manual = scrolledtext.ScrolledText(self.manual_frame, wrap=tk.WORD, width=56, font=manual_font)
        self.txt_manual.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
        self.txt_manual.insert("1.0", USER_MANUAL)
        self.txt_manual.configure(state=tk.DISABLED)

        self.status = tk.StringVar(value=f"Ready. Threshold settings file: {CONFIG_PATH}")
        ttk.Label(self, textvariable=self.status, anchor="w", padding=(8,4)).pack(side=tk.BOTTOM, fill=tk.X)

    def toggle_hints(self):
        if self.hints_visible:
            self.hints_frame.grid_remove()
            self.btn_hints.configure(text="Show Hints")
            self.hints_visible = False
            self.status.set("Hints hidden.")
        else:
            self.hints_frame.grid(row=0, column=0, sticky="ew", pady=(0,8))
            self.btn_hints.configure(text="Hide Hints")
            self.hints_visible = True
            self.status.set("Hints shown.")

    def toggle_manual(self):
        if self.manual_visible:
            self.main_pane.forget(self.manual_frame)
            self.btn_manual.configure(text="Show User manual")
            self.manual_visible = False
            self.status.set("User manual hidden.")
        else:
            self.main_pane.add(self.manual_frame, minsize=320, width=520, stretch="never")
            self.btn_manual.configure(text="Hide User manual")
            self.manual_visible = True
            self.status.set("User manual shown on the right side.")

    def open_threshold_settings(self):
        if self.threshold_window is not None and self.threshold_window.winfo_exists():
            self.threshold_window.lift()
            self.threshold_window.focus_force()
            return

        win = tk.Toplevel(self)
        self.threshold_window = win
        win.title("Threshold settings")
        win.transient(self)
        win.resizable(False, False)
        win.columnconfigure(0, weight=1)

        info = ttk.Label(
            win,
            text=(
                "These values control the CLEAN / MINOR PARASITICS / INTRINSIC SIGNAL PRESENT classification.\n"
                "PI values are stored as fractions, not percent: 0.005 = 0.5%, 0.025 = 2.5%.\n"
                f"Settings are saved to: {CONFIG_PATH}"
            ),
            justify=tk.LEFT,
            padding=10,
            wraplength=760,
        )
        info.grid(row=0, column=0, sticky="ew")

        frame = ttk.LabelFrame(win, text="Classification thresholds", padding=10)
        frame.grid(row=1, column=0, sticky="nsew", padx=10, pady=(0,10))

        entries = {}
        for row, key in enumerate(DEFAULT_THRESHOLDS):
            label, help_text = THRESHOLD_LABELS[key]
            ttk.Label(frame, text=label).grid(row=row, column=0, sticky="w", padx=(0,10), pady=4)
            var = tk.StringVar(value=f"{THRESHOLDS[key]:.12g}")
            ent = ttk.Entry(frame, textvariable=var, width=16)
            ent.grid(row=row, column=1, sticky="w", pady=4)
            ttk.Label(frame, text=help_text, foreground="#555555", wraplength=460).grid(row=row, column=2, sticky="w", padx=(10,0), pady=4)
            entries[key] = var

        button_row = ttk.Frame(win, padding=(10,0,10,10))
        button_row.grid(row=2, column=0, sticky="ew")
        button_row.columnconfigure(0, weight=1)

        def parse_entries():
            values = {}
            for key, var in entries.items():
                raw = var.get().strip().replace(",", ".")
                try:
                    value = float(raw)
                except Exception:
                    raise ValueError(f"Invalid number for {key}: {var.get()!r}")
                if not math.isfinite(value):
                    raise ValueError(f"Invalid non-finite value for {key}.")
                if value < 0:
                    raise ValueError(f"Threshold {key} must not be negative.")
                if "pi" in key and value > 1:
                    raise ValueError(f"PI threshold {key} should be a fraction between 0 and 1, e.g. 0.005 for 0.5%.")
                values[key] = value
            return values

        def apply_and_save(close_after=False):
            try:
                values = parse_entries()
                apply_thresholds(values)
                save_thresholds(THRESHOLDS)
            except Exception as e:
                messagebox.showerror("Threshold settings", str(e), parent=win)
                return
            self.status.set(f"Thresholds updated and saved to: {CONFIG_PATH}")
            if self.last_results is not None:
                self.status.set(self.status.get() + "  Re-run Analyze to update the current report.")
            if close_after:
                win.destroy()

        def reset_defaults():
            for key, value in DEFAULT_THRESHOLDS.items():
                entries[key].set(f"{value:.12g}")
            apply_thresholds(DEFAULT_THRESHOLDS)
            try:
                save_thresholds(THRESHOLDS)
                self.status.set(f"Thresholds reset to defaults and saved to: {CONFIG_PATH}")
            except Exception as e:
                messagebox.showerror("Threshold settings", f"Could not save defaults:\n{e}", parent=win)

        ttk.Button(button_row, text="Apply and save", command=lambda: apply_and_save(False)).grid(row=0, column=0, sticky="e", padx=(0,8))
        ttk.Button(button_row, text="Reset thresholds to default", command=reset_defaults).grid(row=0, column=1, sticky="e", padx=(0,8))
        ttk.Button(button_row, text="Save and close", command=lambda: apply_and_save(True)).grid(row=0, column=2, sticky="e", padx=(0,8))
        ttk.Button(button_row, text="Close", command=win.destroy).grid(row=0, column=3, sticky="e")

        win.protocol("WM_DELETE_WINDOW", win.destroy)
        win.grab_set()
        win.update_idletasks()
        x = self.winfo_rootx() + max(40, (self.winfo_width() - win.winfo_width()) // 2)
        y = self.winfo_rooty() + max(40, (self.winfo_height() - win.winfo_height()) // 2)
        win.geometry(f"+{x}+{y}")

    def on_clear(self):
        self.txt_input.delete("1.0", tk.END)
        self.txt_results.delete("1.0", tk.END)
        self.last_mode = None
        self.last_results = None
        self.last_data = None
        self.btn_export.configure(state=tk.DISABLED)
        self.btn_report.configure(state=tk.DISABLED)
        self.status.set("Cleared.")

    def on_analyze(self):
        txt = self.txt_input.get("1.0", tk.END)
        if not txt.strip():
            messagebox.showinfo(APP_TITLE, "Please paste your table first.")
            return
        try:
            rows = parse_text_table(txt)
            mode = detect_mode(rows)
            data = parse_paired_mode(rows) if mode == "paired" else parse_control_mode(rows)
        except Exception as e:
            messagebox.showerror(APP_TITLE, f"Parse error:\n{e}")
            return

        try:
            if data["mode"] == "control":
                results = {}
                for sub in data["subs"]:
                    results[sub["name"]] = fit_one(data["x"], data["b"], sub["values"])
                report = make_control_report(results)
            else:
                results = {}
                for pair in data["pairs"]:
                    results[pair["name"]] = paired_one(data["x"], pair["baseline"], pair["control"], pair["sample"])
                report = make_paired_report(results)
        except Exception as e:
            messagebox.showerror(APP_TITLE, f"Analysis error:\n{e}")
            return

        self.last_mode = data["mode"]
        self.last_results = results
        self.last_data = data
        self.txt_results.delete("1.0", tk.END)
        self.txt_results.insert(tk.END, report)
        self.btn_export.configure(state=tk.NORMAL)
        self.btn_report.configure(state=tk.NORMAL)
        self.status.set(f"Analysis complete ({self.last_mode} mode).")

    def on_export(self):
        if not self.last_results or not self.last_data:
            return
        out = io.StringIO()
        if self.last_mode == "control":
            x = self.last_data["x"]
            out.write("x")
            names = list(self.last_results.keys())
            for name in names:
                out.write(f"\tresidual_{name}")
            out.write("\n")
            for i in range(len(x)):
                row = [f"{x[i]}"] + [f"{self.last_results[name]['residual'][i]}" for name in names]
                out.write("\t".join(row) + "\n")
            initial = "control_residuals.tsv"
        else:
            x = self.last_data["x"]
            names = list(self.last_results.keys())
            header = ["x"]
            for name in names:
                header += [f"{name}__d_control", f"{name}__d_sample", f"{name}__delta", f"{name}__smooth_bg", f"{name}__peaks"]
            out.write("\t".join(header) + "\n")
            for i in range(len(x)):
                row = [f"{x[i]}"]
                for name in names:
                    rr = self.last_results[name]
                    row += [f"{rr['control_fit']['residual'][i]}", f"{rr['sample_fit']['residual'][i]}",
                            f"{rr['delta'][i]}", f"{rr['smooth_bg'][i]}", f"{rr['peaks'][i]}"]
                out.write("\t".join(row) + "\n")
            initial = "paired_analysis.tsv"
        path = filedialog.asksaveasfilename(title="Save TSV", defaultextension=".tsv", initialfile=initial,
                                            filetypes=[("TSV files","*.tsv"),("All files","*.*")])
        if path:
            with open(path, "w", encoding="utf-8") as f:
                f.write(out.getvalue())
            self.status.set(f"TSV saved to: {path}")

    def on_report(self):
        if not self.last_results:
            return
        text = make_paired_report(self.last_results) if self.last_mode == "paired" else make_control_report(self.last_results)
        initial = "paired_report.txt" if self.last_mode == "paired" else "control_report.txt"
        path = filedialog.asksaveasfilename(title="Save TXT report", defaultextension=".txt", initialfile=initial,
                                            filetypes=[("Text files","*.txt"),("All files","*.*")])
        if path:
            with open(path, "w", encoding="utf-8") as f:
                f.write(text)
            self.status.set(f"Report saved to: {path}")

def main():
    app = App()
    app.mainloop()

if __name__ == "__main__":
    main()
