Source code for wetlandmapper.wct

"""
wct.py

# Copyright (c) 2026, Manudeo Singh          #
# Author: Manudeo Singh, March 2026          #
------
Wetland Cover Type (WCT) classification from combined MNDWI, NDVI, and NDTI.

Two classification approaches are provided, both based on:
    Singh, M., Allaka, S., Gupta, P. K., Patel, J. G., & Sinha, R. (2022).
    Deriving wetland-cover types (WCTs) from integration of multispectral indices
    based on Earth observation data.
    Environmental Monitoring and Assessment, 194(12), 878.
    https://doi.org/10.1007/s10661-022-10541-7

Functions
---------
classify_wct_ema(indices, n_parts=4)
    Original EMA paper method.
    Each index is independently binned into n_parts equal intervals over its
    positive [0, 1] range (default 4, giving quartile step 0.25), producing
    a level 0-4 per index.  Every pixel then carries a three-way combination
    code  w_level × v_level × t_level  (e.g. "w2v3t1") and the WCT class
    is read from a pre-built 5×5×5 lookup table.  No priority ordering:
    each unique combination has exactly one class.

    Critical implication: a pixel with negative MNDWI (w0) but high NDVI
    (v3 or v4) is assigned WCT 4 (Emergent/Floating Veg) because dense
    aquatic vegetation completely suppresses the water signal in MNDWI.

classify_wct(indices, thresholds=None)
    Improved continuous-threshold method.
    Same five WCT classes, but boundaries are free-floating values that can
    be tuned per sensor, season, or region without changing the bin structure.

WCT class codes (shared by both methods)
 1  Open Clear Water         High MNDWI,  Low  NDVI,  Low  NDTI
 2  Turbid / Sediment-laden  High MNDWI,  Low  NDVI,  High NDTI
 3  Submerged Aquatic Veg.   High MNDWI,  Mod. NDVI,  Low  NDTI
 4  Emergent / Floating Veg. Any  MNDWI,  High NDVI,  Low  NDTI  ← includes w0
 5  Moist / Waterlogged Soil Low  MNDWI,  Low  NDVI,  Mod. NDTI
 0  Non-wetland / Dry
"""

from __future__ import annotations

import warnings

import numpy as np
import xarray as xr

try:
    import rioxarray  # noqa: F401

    _HAS_RIO = True
except ImportError:
    _HAS_RIO = False

__all__ = [
    "classify_wct_ema",
    "classify_wct",
    "WCT_CLASSES",
    "WCT_COLORS",
    "WCT_EMA_QUARTILE_BOUNDARIES",
    "build_ema_lookup_table",
]


# ---------------------------------------------------------------------------
# Class metadata
# ---------------------------------------------------------------------------

WCT_CLASSES: dict[int, str] = {
    0: "Non-wetland",
    1: "Open Clear Water",
    2: "Turbid / Sediment-laden Water",
    3: "Submerged Aquatic Vegetation",
    4: "Emergent / Floating Vegetation",
    5: "Moist / Waterlogged Soil",
}

WCT_COLORS: dict[int, str] = {
    0: "#f2f3f4",  # light grey   — Non-wetland
    1: "#1a78c2",  # blue         — Open Clear Water
    2: "#d4a017",  # amber/brown  — Turbid Water
    3: "#27ae60",  # medium green — Submerged Aquatic Veg.
    4: "#145a32",  # dark green   — Emergent / Floating Veg.
    5: "#a04000",  # brown        — Moist Soil
}

WCT_EMA_QUARTILE_BOUNDARIES: dict = {
    "description": "Equal-width bins over positive [0,1] range with n_parts=4",
    "boundaries": [0.0, 0.25, 0.50, 0.75, 1.00],
    "level_names": {
        0: "Negative/Dry (< 0)",
        1: "Low          [0.00, 0.25)",
        2: "Moderate-Low [0.25, 0.50)",
        3: "Moderate-High[0.50, 0.75)",
        4: "High         [0.75, 1.00]",
    },
}


# ---------------------------------------------------------------------------
# Default thresholds — improved (continuous) method
# ---------------------------------------------------------------------------

_DEFAULT_THRESHOLDS: dict[str, float] = {
    "mndwi_water": 0.0,  # MNDWI above which pixel is open-water-dominated
    "mndwi_moist": -0.2,  # MNDWI lower bound for moist-soil zone
    "ndvi_veg_high": 0.2,  # NDVI above which emergent/floating veg. is present
    "ndvi_veg_low": 0.05,  # NDVI lower bound for submerged vegetation signal
    "ndti_turbid": 0.0,  # NDTI above which water is turbid/sediment-laden
}


# ---------------------------------------------------------------------------
# EMA lookup table  (5 × 5 × 5 : mndwi_level × ndvi_level × ndti_level)
# ---------------------------------------------------------------------------



[docs]
def build_ema_lookup_table(n_parts: int = 4) -> np.ndarray:
    """Build the (n_parts+1)³ lookup table mapping index levels → WCT class.

    Every entry ``table[w, v, t]`` is the WCT class code (0–5) for the
    combination of MNDWI level w, NDVI level v, and NDTI level t.

    Level coding (same for all three indices):
        0  — negative value (dry / non-wetland)
        1  — [0,         1/n_parts)        Low
        2  — [1/n_parts, 2/n_parts)        Moderate-Low
        ...
        n  — [(n-1)/n_parts, 1.0]          High   (n = n_parts)

    WCT class rules (applied in this order; later entries overwrite earlier):

    WCT  MNDWI level   NDVI level     NDTI level   Cover type
    ---  -----------   ----------     ----------   ----------
     5   w == 1        v <= 1         t <= 2       Moist / Waterlogged Soil
     3   w >= 2        1 <= v <= 2    t <= 1       Submerged Aquatic Veg.
     2   w >= 2        v <= 1         t >= 2       Turbid / Sediment Water
     4   w >= 0        v >= 3         t <= 1       Emergent / Floating Veg. (*)
     1   w >= 3        v <= 1         t <= 1       Open Clear Water

    (*) WCT 4 matches even w == 0 (negative MNDWI): dense aquatic vegetation
    completely suppresses the MNDWI water signal, so the pixel reads as dry
    on MNDWI but shows high NDVI.  The combination code w0v3t1 (for example)
    is unambiguously aquatic-vegetated, not dry land.

    Parameters
    ----------
    n_parts : int
        Number of equal bins over [0, 1]. Default 4.

    Returns
    -------
    np.ndarray, shape (n_parts+1, n_parts+1, n_parts+1), dtype int8
        Lookup table indexed as [mndwi_level, ndvi_level, ndti_level].
    """
    n = n_parts
    size = n + 1
    table = np.zeros((size, size, size), dtype=np.int8)

    # WCT 5 — Moist / Waterlogged Soil
    # Low positive MNDWI (w==1), low NDVI (v<=1), low-moderate NDTI (t<=2)
    for w in [1]:
        for v in range(0, 2):  # 0, 1
            for t in range(0, 3):  # 0, 1, 2
                table[w, v, t] = 5

    # WCT 3 — Submerged Aquatic Vegetation
    # Moderate-high MNDWI (w>=2), moderate NDVI (v 1–2), clear water (t<=1)
    for w in range(2, n + 1):  # 2, 3, 4
        for v in range(1, 3):  # 1, 2
            for t in range(0, 2):  # 0, 1
                table[w, v, t] = 3

    # WCT 2 — Turbid / Sediment-laden Water
    # Moderate-high MNDWI (w>=2), low NDVI (v<=1), elevated NDTI (t>=2)
    for w in range(2, n + 1):  # 2, 3, 4
        for v in range(0, 2):  # 0, 1
            for t in range(2, n + 1):  # 2, 3, 4
                table[w, v, t] = 2

    # WCT 4 — Emergent / Floating Vegetation
    # ANY MNDWI including w==0 (veg masks water signal), high NDVI (v>=3), clear (t<=1)
    for w in range(0, n + 1):  # 0, 1, 2, 3, 4
        for v in range(3, n + 1):  # 3, 4
            for t in range(0, 2):  # 0, 1
                table[w, v, t] = 4

    # WCT 1 — Open Clear Water  (highest certainty: set last so it always wins
    # over WCT 3/5 at the boundary levels that both rules touch)
    # High MNDWI (w>=3), very low NDVI (v<=1), very low NDTI (t<=1)
    for w in range(3, n + 1):  # 3, 4
        for v in range(0, 2):  # 0, 1
            for t in range(0, 2):  # 0, 1
                table[w, v, t] = 1

    return table



# Cache the default table so it is built only once per process
_EMA_LOOKUP_4: np.ndarray = build_ema_lookup_table(n_parts=4)


# ---------------------------------------------------------------------------
# Original EMA method — combination-code lookup
# ---------------------------------------------------------------------------



[docs]
def classify_wct_ema(
    indices: xr.Dataset,
    n_parts: int = 4,
) -> xr.Dataset:
    """Classify Wetland Cover Types using the original Singh et al. (2022) EMA method.

    Each pixel is independently binned into a level (0–n_parts) for MNDWI,
    NDVI, and NDTI.  The three levels together form a **combination code**:

        w_level  v_level  t_level  →  WCT class
        -------  -------  -------     ---------
           0        4        0    →   4 (Emergent veg — water signal masked)
           3        0        0    →   1 (Open clear water)
           2        0        3    →   2 (Turbid water)
           2        2        0    →   3 (Submerged aquatic veg.)
           1        0        1    →   5 (Moist soil)

    The mapping is a pre-built lookup table (see :func:`build_ema_lookup_table`).
    There is **no priority ordering** — each combination has exactly one class.

    A vegetation-masked pixel (negative MNDWI but high NDVI) is classified as
    WCT 4 (Emergent / Floating Vegetation) even though its MNDWI alone would
    suggest dry land, because the dense canopy fully attenuates the water signal.

    Parameters
    ----------
    indices : xr.Dataset
        Dataset with variables ``"MNDWI"``, ``"NDVI"``, ``"NDTI"``.
        Typically produced by :func:`wetlandmapper.compute_indices`.
    n_parts : int
        Number of equal bins over the positive [0, 1] range.  Default 4
        (step 0.25; levels 1–4 at boundaries 0.25 / 0.50 / 0.75 / 1.00).

    Returns
    -------
    xr.Dataset
        Dataset with two variables:

        ``"wetland_cover_type"``
            Integer raster of WCT class codes (dtype int8).
            CRS preserved from input if rioxarray is available.
        ``"combination_code"``
            Integer raster encoding the three index levels that produced
            each WCT class. Hundreds digit = MNDWI level, tens = NDVI,
            units = NDTI (e.g. 401 → MNDWI High / NDVI Negative / NDTI
            Low → WCT 1). Useful for inspecting which index fractions
            drove the classification at each pixel.

    Notes
    -----
    The combination code is distinct from the class: two pixels with the
    same WCT class may have different combination codes, revealing how
    far each index was from the bin boundaries.

    Examples
    --------
    >>> from wetlandmapper import compute_indices, classify_wct_ema
    >>> indices = compute_indices(ds, green_band="B3", red_band="B4",
    ...                           nir_band="B5",  swir_band="B6")
    >>> wct = classify_wct_ema(indices)
    >>> wct.attrs['classification_method']
    'EMA-combination-lookup'

    References
    ----------
    Singh et al. (2022). Environmental Monitoring and Assessment, 194(12), 878.
    https://doi.org/10.1007/s10661-022-10541-7
    """
    _validate_input(indices)
    if not isinstance(n_parts, int) or n_parts < 2:
        raise ValueError(f"n_parts must be an integer >= 2, got {n_parts!r}")

    mndwi = indices["MNDWI"]
    ndvi = indices["NDVI"]
    ndti = indices["NDTI"]

    step = 1.0 / n_parts  # 0.25 for n_parts=4

    def _discretize(da: xr.DataArray) -> np.ndarray:
        """Return numpy int8 array of levels 0..n_parts for each pixel."""
        vals = da.values.astype(float)
        lvl = np.zeros(vals.shape, dtype=np.int8)  # 0 = negative
        for k in range(1, n_parts + 1):
            lo = (k - 1) * step
            hi = k * step if k < n_parts else 1.0 + 1e-9
            lvl = np.where((vals >= lo) & (vals < hi), np.int8(k), lvl)
        return lvl

    ml = _discretize(mndwi)  # shape (ny, nx)
    vl = _discretize(ndvi)
    tl = _discretize(ndti)

    # Build (or retrieve cached) lookup table
    if n_parts == 4:
        table = _EMA_LOOKUP_4
    else:
        table = build_ema_lookup_table(n_parts=n_parts)

    # Vectorised lookup: table[ml[i,j], vl[i,j], tl[i,j]] for every pixel
    wct_vals = table[ml, vl, tl]  # numpy fancy indexing, shape (ny, nx)

    # Combination code: encodes the three index levels at every pixel.
    # Digits: hundreds = MNDWI level, tens = NDVI level, units = NDTI level.
    # E.g. 401 → MNDWI High(4), NDVI Negative(0), NDTI Low(1) → WCT 1.
    combo_vals = (
        ml.astype(np.int16) * 100 + vl.astype(np.int16) * 10 + tl.astype(np.int16)
    )

    # Wrap both arrays as DataArrays preserving spatial coordinates
    wct = xr.DataArray(wct_vals, dims=mndwi.dims, coords=mndwi.coords)
    combo = xr.DataArray(combo_vals, dims=mndwi.dims, coords=mndwi.coords)

    combo.name = "combination_code"
    combo.attrs.update(
        long_name="EMA Index-Level Combination Code",
        encoding=(
            "hundreds digit = MNDWI level (0-n_parts), "
            "tens digit = NDVI level, units digit = NDTI level. "
            f"Level 0 = negative; levels 1-{n_parts} = equal bins "
            f"over [0, 1] with step {step:.2f}. "
            "Example: 401 = MNDWI High / NDVI Negative / NDTI Low"
            " = WCT 1 (Open Clear Water)."
        ),
        n_parts=n_parts,
        step=step,
    )

    wct = _finalise(
        wct,
        indices,
        method="EMA-combination-lookup",
        extra_attrs={
            "n_parts": n_parts,
            "step": step,
            "boundaries": [k * step for k in range(n_parts + 1)],
        },
    )

    return xr.Dataset(
        {"wetland_cover_type": wct, "combination_code": combo},
        attrs={
            "title": ("Wetland Cover Type - EMA combination-lookup classification"),
            "references": (
                "Singh et al. (2022). Environmental Monitoring and "
                "Assessment, 194(12), 878. "
                "https://doi.org/10.1007/s10661-022-10541-7"
            ),
        },
    )



# ---------------------------------------------------------------------------
# Improved continuous-threshold method
# ---------------------------------------------------------------------------



[docs]
def classify_wct(
    indices: xr.Dataset,
    thresholds: dict[str, float] | None = None,
) -> xr.DataArray:
    """Classify Wetland Cover Types using continuous, adjustable spectral thresholds.

    An improvement of :func:`classify_wct_ema`: boundaries are free-floating
    values (not locked to multiples of 1/n_parts), enabling sub-quartile
    calibration for different sensors, seasons, or geographic regions.
    The five WCT classes and their spectral logic are identical to the EMA method;
    only the boundary representation differs.

    WCT 4 (Emergent/Floating Veg.) is assigned whenever NDVI exceeds
    ``ndvi_veg_high`` regardless of the MNDWI sign, matching the EMA method's
    behaviour for vegetation-masked pixels.

    Parameters
    ----------
    indices : xr.Dataset
        Dataset with variables ``"MNDWI"``, ``"NDVI"``, ``"NDTI"``.
    thresholds : dict, optional
        Override any subset of the defaults (from Singh et al. 2022):

        ``"mndwi_water"``   (0.0)   MNDWI above which pixel is open water
        ``"mndwi_moist"``   (-0.2)  MNDWI lower bound for moist-soil zone
        ``"ndvi_veg_high"`` (0.2)   NDVI above which emergent veg. present
        ``"ndvi_veg_low"``  (0.05)  NDVI lower bound for submerged veg.
        ``"ndti_turbid"``   (0.0)   NDTI above which water is turbid

    Returns
    -------
    xr.DataArray
        Integer raster of WCT codes (dtype int8). Name: ``"wetland_cover_type"``.

    References
    ----------
    Singh et al. (2022). Environmental Monitoring and Assessment, 194(12), 878.
    """
    _validate_input(indices)

    thr = {**_DEFAULT_THRESHOLDS}
    if thresholds:
        unknown = set(thresholds) - set(_DEFAULT_THRESHOLDS)
        if unknown:
            warnings.warn(
                f"Unknown threshold key(s): {unknown}. "
                f"Valid keys: {set(_DEFAULT_THRESHOLDS)}",
                stacklevel=2,
            )
        thr.update(thresholds)

    mndwi = indices["MNDWI"]
    ndvi = indices["NDVI"]
    ndti = indices["NDTI"]

    is_water = mndwi > thr["mndwi_water"]
    is_moist = (mndwi > thr["mndwi_moist"]) & ~is_water
    is_turbid = ndti > thr["ndti_turbid"]
    has_high_veg = ndvi > thr["ndvi_veg_high"]
    has_low_veg = ndvi > thr["ndvi_veg_low"]

    wct = xr.zeros_like(mndwi, dtype=np.int8)

    # Assign in priority order — WCT 1 (most diagnostic) last so it wins
    wct = xr.where(is_moist & ~has_low_veg, np.int8(5), wct)  # Moist soil
    wct = xr.where(  # Submerged veg
        is_water & has_low_veg & ~has_high_veg,
        np.int8(3),
        wct,
    )
    wct = xr.where(  # Turbid water
        is_water & is_turbid & ~has_low_veg,
        np.int8(2),
        wct,
    )
    # WCT 4: any MNDWI (including negative) when NDVI is high — mirrors EMA
    wct = xr.where(has_high_veg & ~is_turbid, np.int8(4), wct)  # Emergent veg
    wct = xr.where(  # Clear water
        is_water & ~is_turbid & ~has_low_veg,
        np.int8(1),
        wct,
    )

    return _finalise(
        wct, indices, method="threshold", extra_attrs={"thresholds": str(thr)}
    )



# ---------------------------------------------------------------------------
# Shared private helpers
# ---------------------------------------------------------------------------


def _validate_input(indices: xr.Dataset) -> None:
    required = {"MNDWI", "NDVI", "NDTI"}
    missing = required - set(indices.data_vars)
    if missing:
        raise KeyError(
            f"Missing required variables: {missing}. "
            f"Present: {set(indices.data_vars)}. "
            "Use wetlandmapper.compute_indices() to generate all three."
        )


def _finalise(
    wct: xr.DataArray,
    indices: xr.Dataset,
    method: str,
    extra_attrs: dict,
) -> xr.DataArray:
    if _HAS_RIO:
        try:
            crs = indices["MNDWI"].rio.crs
            if crs is not None:
                wct = wct.rio.write_crs(crs)
        except Exception as e:
            warnings.warn(f"Could not write CRS to WCT output: {e}", stacklevel=3)

    wct.name = "wetland_cover_type"
    wct.attrs.update(
        long_name="Wetland Cover Type",
        classification_method=method,
        class_codes=str(WCT_CLASSES),
        references=(
            "Singh et al. (2022). Environmental Monitoring and Assessment, "
            "194(12), 878. https://doi.org/10.1007/s10661-022-10541-7"
        ),
        **extra_attrs,
    )
    return wct