#!/usr/bin/env python3
"""
Reproducible simulations for "Swarm Economies" (blog.pdj.dev).

Generates two figures (themed SVG, transparent background for the dark site):
  1. swarm-efficiency.svg  - realized welfare / optimum vs contention n/m,
                             market clearing vs naive greedy, with the exact
                             optimum computed by a 0/1-knapsack dynamic program.
  2. swarm-poa.svg         - empirical price of anarchy: welfare ratio of a
                             selfish (bid-shading) equilibrium to the optimum
                             vs a shading parameter, against a constant PoA bound.

Run:  python3 simulations.py
Deterministic (seeded). Requires numpy, matplotlib. No network, no data files.
"""

from __future__ import annotations
import pathlib
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

SEED = 7
RNG = np.random.default_rng(SEED)
OUT = pathlib.Path(__file__).resolve().parents[2] / "assets" / "figures"
OUT.mkdir(parents=True, exist_ok=True)

# ---- dark-site theme -------------------------------------------------------
ACCENT, C2, C3, C4 = "#4da3ff", "#ff7ab2", "#82d9ff", "#d2a8ff"
TEXT, MUTED, GRID = "#c9c9cf", "#8a8a90", "#2c2c2e"
plt.rcParams.update({
    "figure.facecolor": "none", "axes.facecolor": "none", "savefig.facecolor": "none",
    "font.family": "DejaVu Sans", "font.size": 11,
    "text.color": TEXT, "axes.labelcolor": TEXT,
    "xtick.color": MUTED, "ytick.color": MUTED,
    "axes.edgecolor": GRID, "grid.color": GRID,
    "axes.spines.top": False, "axes.spines.right": False,
    "legend.frameon": False, "figure.dpi": 110,
})

def style(ax):
    ax.grid(True, alpha=0.5, linewidth=0.7)
    ax.tick_params(length=3)
    for s in ("left", "bottom"):
        ax.spines[s].set_linewidth(0.8)

def save(fig, name):
    fig.tight_layout()
    fig.savefig(OUT / name, format="svg", transparent=True, bbox_inches="tight")
    plt.close(fig)
    print(f"wrote {OUT / name}")

# ---------------------------------------------------------------------------
# Allocation model.
#
# m identical units of a single divisible-in-units scarce resource and n agents.
# Agent i demands q_i units (drawn small) and has a private per-unit value w_i.
# Its valuation for receiving its full demand is v_i = w_i * q_i (unit-demand-ish,
# all-or-nothing); a partial grant g <= q_i is worth w_i * g (linear in units).
# Social welfare of an allocation x (units granted per agent) is sum_i w_i * x_i.
# Supply constraint: sum_i x_i <= m.  This is a small bounded-knapsack / fractional
# assignment whose continuous optimum is greedy-by-value-density but whose
# all-or-nothing optimum is genuinely combinatorial (knapsack), so the exact
# OPT below is computed by a 0/1-knapsack dynamic program over which agents
# are fully served (verified against exhaustive enumeration on small instances).
# ---------------------------------------------------------------------------

def draw_instance(n, m, rng):
    """Per-unit values w_i ~ U(0,1); integer demands q_i in {1,2,3}."""
    w = rng.random(n)
    q = rng.integers(1, 4, size=n)
    return w, q, m

def welfare_market(w, q, m, bids=None):
    """Greedy clearing by value density, all-or-nothing, with a hard supply cap.

    Mirrors the essay's `clear`: sort live bids by value-per-unit, grant in order
    while capacity remains, skip a bid that does not fit. `bids` is the reported
    per-unit value used for ordering (defaults to truthful w); realized welfare is
    always scored with true w.
    """
    order_key = w if bids is None else bids
    order = np.argsort(-order_key)        # descending value density
    remaining, served = m, np.zeros(len(w), dtype=bool)
    for i in order:
        if q[i] <= remaining:
            served[i] = True
            remaining -= q[i]
    return float((w[served] * q[served]).sum()), served

def welfare_naive(w, q, m):
    """Naive heuristic: serve agents in arrival order (no price signal) until full."""
    remaining, served = m, np.zeros(len(w), dtype=bool)
    for i in range(len(w)):               # arrival order = index order
        if q[i] <= remaining:
            served[i] = True
            remaining -= q[i]
    return float((w[served] * q[served]).sum())

def welfare_opt(w, q, m):
    """Exact welfare optimum: each agent is served fully or not at all (an
    all-or-nothing 0/1 knapsack with item weight q_i, value w_i*q_i, capacity m).
    Solved exactly by dynamic programming over integer capacity in O(n*m).
    """
    val = w * q
    cap = int(m)
    dp = np.zeros(cap + 1, dtype=float)    # dp[c] = best welfare using capacity c
    for i in range(len(w)):
        wt = int(q[i])
        if wt > cap:
            continue
        # 0/1 knapsack: iterate capacity downward so each agent is used at most once
        dp[wt:] = np.maximum(dp[wt:], dp[:cap + 1 - wt] + val[i])
    return float(dp[cap])

# ---------------------------------------------------------------------------
# Figure 1: efficiency ratio vs contention n/m. Market vs naive vs exact OPT.
# ---------------------------------------------------------------------------
def fig_efficiency():
    m = 6                                  # fixed supply; exact OPT (knapsack DP) tractable
    ns = np.arange(2, 17)                  # contention n/m from ~0.3 to ~2.7
    trials = 4000
    mkt_mean, mkt_lo, mkt_hi = [], [], []
    naive_mean, naive_lo, naive_hi = [], [], []
    for n in ns:
        mkt_r, naive_r = [], []
        for _ in range(trials):
            w, q, cap = draw_instance(n, m, RNG)
            opt = welfare_opt(w, q, cap)
            if opt <= 0:
                continue
            wm, _ = welfare_market(w, q, cap)
            wn = welfare_naive(w, q, cap)
            mkt_r.append(wm / opt)
            naive_r.append(wn / opt)
        mkt_r, naive_r = np.array(mkt_r), np.array(naive_r)
        mkt_mean.append(mkt_r.mean())
        mkt_lo.append(np.percentile(mkt_r, 10)); mkt_hi.append(np.percentile(mkt_r, 90))
        naive_mean.append(naive_r.mean())
        naive_lo.append(np.percentile(naive_r, 10)); naive_hi.append(np.percentile(naive_r, 90))
    x = ns / m
    mkt_mean = np.array(mkt_mean); naive_mean = np.array(naive_mean)

    fig, ax = plt.subplots(figsize=(7.0, 3.9))
    ax.axhline(1.0, color=MUTED, lw=0.8, ls=":", alpha=0.8)
    ax.fill_between(x, mkt_lo, mkt_hi, color=ACCENT, alpha=0.14, linewidth=0)
    ax.plot(x, mkt_mean, color=ACCENT, lw=1.8, marker="o", ms=4, label="market clearing")
    ax.fill_between(x, naive_lo, naive_hi, color=C2, alpha=0.12, linewidth=0)
    ax.plot(x, naive_mean, color=C2, lw=1.8, marker="s", ms=4, label="naive greedy (arrival order)")
    ax.set_xlabel("contention,  n / m")
    ax.set_ylabel("efficiency  (realized welfare / optimum)")
    ax.set_ylim(0.5, 1.03)
    ax.set_xlim(x.min(), x.max())
    ax.legend(loc="best")
    style(ax)
    save(fig, "swarm-efficiency.svg")

# ---------------------------------------------------------------------------
# Figure 2: empirical price of anarchy. Selfish bidders shade their reported
# per-unit value by an idiosyncratic factor, reporting w_i * (1 - phi * u_i)
# with u_i ~ U(0,1). At phi = 0 reporting is truthful and the clearing order
# is the true value-density order; as phi grows, heterogeneous shading scrambles
# that order, so the market clears on a degraded ranking. The market clears on
# reports but welfare is scored on true values. We plot welfare(selfish) / OPT
# vs the shading parameter, against the constant smoothness PoA bound e/(e-1).
# ---------------------------------------------------------------------------
def fig_poa():
    m, n = 8, 14                           # moderate contention
    phis = np.linspace(0.0, 0.95, 14)      # 0 = truthful; larger = more shading
    trials = 6000
    bound = np.e / (np.e - 1)              # ~1.582  => welfare ratio >= ~0.632
    ratio_floor = 1.0 / bound
    mean, lo, hi = [], [], []
    for phi in phis:
        rs = []
        for _ in range(trials):
            w, q, cap = draw_instance(n, m, RNG)
            opt = welfare_opt(w, q, cap)
            if opt <= 0:
                continue
            # idiosyncratic bid shading: each agent underbids by up to phi fraction
            reports = w * (1.0 - phi * RNG.random(n))   # shaded per-unit reports
            wsel, _ = welfare_market(w, q, cap, bids=reports)
            rs.append(wsel / opt)
        rs = np.array(rs)
        mean.append(rs.mean())
        lo.append(np.percentile(rs, 10)); hi.append(np.percentile(rs, 90))
    mean = np.array(mean)

    fig, ax = plt.subplots(figsize=(7.0, 3.9))
    ax.fill_between(phis, lo, hi, color=C4, alpha=0.14, linewidth=0)
    ax.plot(phis, mean, color=C4, lw=1.8, marker="o", ms=4, label="selfish equilibrium welfare / OPT")
    ax.axhline(ratio_floor, color=C3, lw=1.0, ls="--",
               label=r"smoothness floor  $1/(e/(e-1)) \approx 0.63$")
    ax.set_xlabel("strategic shading parameter,  $\\varphi$")
    ax.set_ylabel("welfare ratio  (selfish / optimum)")
    ax.set_ylim(0.5, 1.02)
    ax.set_xlim(phis.min(), phis.max())
    ax.legend(loc="best")
    style(ax)
    save(fig, "swarm-poa.svg")

if __name__ == "__main__":
    fig_efficiency()
    fig_poa()
    print("done.")