!pip install --upgrade "boofun[performance]" -q

import numpy as np
import matplotlib.pyplot as plt
import boofun as bf
from boofun.analysis.fourier import fourier_level_lp_norm, fourier_tail_profile
from boofun.analysis.gaussian import multilinear_extension
from boofun.analysis.gf2 import gf2_degree
from scipy.stats import norm
from itertools import combinations

np.random.seed(42)
print(f"boofun version: {bf.__version__}")

# Check acceleration backends
from boofun.core.numba_optimizations import is_numba_available

print(f"Numba available: {is_numba_available()}")

n = 16
d = 2


def random_f2_poly(n, d, rng):
    """Generate a random degree-d polynomial over F2 on n variables."""
    monomials = []
    for deg in range(1, d + 1):
        for combo in combinations(range(n), deg):
            if rng.random() < 0.5:
                monomials.append(set(combo))
    if not monomials:
        monomials = [{0, 1}]  # avoid the constant function
    return bf.f2_polynomial(n, monomials)


rng = np.random.default_rng(seed=42)
test_fns = {f"quad_{i}": random_f2_poly(n, d, rng) for i in range(5)}

# Compute Fourier tails using fourier_tail_profile (computes all L_{1,k} at once).
# This function was built for the CHLT'19 PRG framework -- see its docstring.
print(f"Random degree-{d} F2 polynomials on n = {n} variables")
print(f"Known bound (CHHL'18): L_{{1,2}}(Poly_{{n,{d}}}) <= 4 * 2^(6*{d}) = {4 * 2**(6*d)}")
print(f"Conjecture 3:          L_{{1,2}} = O(d^2) = O({d**2})")
print()

display_levels = 5
header = f"{'Function':<12}{'GF2 deg':<10}" + "".join(
    f"{'L_{1,' + str(k) + '}':<12}" for k in range(display_levels)
)
print(header)
print("-" * 82)

l12_values = {}
for name, func in test_fns.items():
    gf2_d = gf2_degree(func)
    profile = fourier_tail_profile(func, p=1)  # {k: L_{1,k}} for all k
    row = f"{name:<12}{gf2_d:<10}"
    for k in range(display_levels):
        row += f"{profile.get(k, 0.0):<12.4f}"
    l12_values[name] = profile.get(2, 0.0)
    print(row)

t = max(l12_values.values())
print(f"\nEmpirical t = max L_{{1,2}} across our instances: {t:.4f}")
print(f"Known bound for d={d}: {4 * 2**(6*d)}  (exponential, from [CHHL'18])")
print(f"Conjectured bound for d={d}: O({d**2})  (Conjecture 3)")

# --- Sanity checks using the library's multilinear_extension ---
# multilinear_extension(f) returns a callable p: R^n -> R such that
# p(x) = sum_S f_hat(S) * prod_{i in S} x_i.
# Key property: p(0) = f_hat(empty) = E[f].

print("Sanity check: f_tilde(0) = f_hat(empty) = E[f]")
for name, func in test_fns.items():
    mle = multilinear_extension(func)
    f_hat_0 = func.fourier()[0]
    f_tilde_0 = mle(np.zeros(n))
    assert abs(f_tilde_0 - f_hat_0) < 1e-12, f"{name}: mismatch!"
    print(f"  {name}: f_hat(empty) = {f_hat_0:+.6f},  f_tilde(0) = {f_tilde_0:+.6f}  OK")

# Parseval: sum f_hat(S)^2 = 1 for +/-1 functions.
for name, func in test_fns.items():
    parseval = np.sum(func.fourier() ** 2)
    assert abs(parseval - 1.0) < 1e-10, f"{name}: Parseval violated! sum = {parseval}"
print(f"\nParseval check passed for all {len(test_fns)} functions.")

# The library's multilinear_extension evaluates one point at a time.
# For the PRG verification below (thousands of samples at n=16), we need
# a vectorized batch evaluator. This uses the "monomial-building trick":
# incrementally build all 2^n subset products in O(2^n) per point,
# then dot with Fourier coefficients.


def eval_multilinear_batch(fourier_coeffs, n_vars, X, batch_size=100):
    """Evaluate multilinear extension at rows of X (vectorized)."""
    m = X.shape[0]
    size = len(fourier_coeffs)
    results = np.zeros(m)
    for start in range(0, m, batch_size):
        end = min(start + batch_size, m)
        batch = X[start:end]
        b = batch.shape[0]
        vals = np.zeros((b, size))
        vals[:, 0] = 1.0  # empty product
        for i in range(n_vars):
            step = 1 << i
            vals[:, step : 2 * step] = vals[:, :step] * batch[:, i : i + 1]
        results[start:end] = vals @ fourier_coeffs
    return results


# Cross-check: batch evaluator agrees with the library's multilinear_extension.
func0 = list(test_fns.values())[0]
mle0 = multilinear_extension(func0)
test_point = np.random.randn(n) * 0.1  # small random point in R^n
lib_val = mle0(test_point)
batch_val = eval_multilinear_batch(func0.fourier(), n, test_point.reshape(1, -1))[0]
assert abs(lib_val - batch_val) < 1e-10, f"Mismatch: lib={lib_val}, batch={batch_val}"
print(f"Cross-check: library MLE vs batch evaluator at a random point")
print(f"  Library:  {lib_val:+.10f}")
print(f"  Batch:    {batch_val:+.10f}")
print(f"  Match: OK")

# Parameters from the bound: 4*delta*(t+1) <= eps
# => delta = eps / (4*(t+1))
eps = 0.3
delta = eps / (4 * (t + 1))
p_var = 1.0 / (8 * np.log(n / delta))  # = 1/(8 ln(n/delta))
sigma = np.sqrt(p_var)

# Full Theorem 21 bound: 4*delta*t + 4*n*exp(-1/(8*sigma^2))
# With sigma^2 = 1/(8 ln(n/delta)), the exp term equals 4*delta.
trunc_term = 4 * n * np.exp(-1 / (8 * p_var))
full_bound = 4 * delta * t + trunc_term

print("Theorem 9 parameters:")
print(f"  n = {n},  t = {t:.4f},  eps = {eps}")
print(f"  delta = eps/(4(t+1)) = {delta:.6f}")
print(f"  p = 1/(8 ln(n/delta)) = {p_var:.6f},  sigma = {sigma:.6f}")
print(f"  Bound term 1: 4*delta*t = {4*delta*t:.4f}")
print(f"  Bound term 2: 4n*exp(-1/(8p)) = {trunc_term:.4f}  (= 4*delta by construction)")
print(f"  Full Theorem 21 bound: {full_bound:.4f}")
print()

# --- Naive baseline: n independent Gaussians ---
# Note: the library provides multilinear_extension_gaussian_expectation(f) in
# analysis/invariance.py for the standard Gaussian case (sigma=1). Here we need
# N(0, p) with a specific small p, so we sample directly.
n_samples = 2000
naive_results = {}
print(f"Naive baseline ({n_samples} samples, {n} independent Gaussians each):")
for name, func in test_fns.items():
    fourier = func.fourier()
    E_f = fourier[0]
    Z = np.random.randn(n_samples, n) * sigma
    X = np.clip(Z, -1, 1)
    vals = eval_multilinear_batch(fourier, n, X)
    err = abs(np.mean(vals) - E_f)
    naive_results[name] = {"E_f": E_f, "mean": np.mean(vals), "error": err}
    print(f"  {name}: E[f] = {E_f:+.4f}, mean = {np.mean(vals):+.4f}, |error| = {err:.6f}")

print(f"\nWith independent coordinates, delta = 0 and the fooling error is 0 in theory.")
print(f"The small empirical errors above are finite-sample noise.")

# Dimension reduction: n -> ell
ell_theory = int(np.ceil((max(t, 1) / eps) ** 2 * np.log2(n) * 10))
ell = 10  # 16 -> 10

print(f"Dimension reduction: {n} -> {ell}")
print(f"  Theoretical ell for delta-balanced code ~ {ell_theory}")
print(f"  At this scale ell > n; savings appear for large n.")
print()

# --- Balanced code ---
# The paper uses explicit small-biased spaces (Ta-Shma 2017).
# Here: random codewords, ensuring all are distinct.
# Find a seed that gives all-unique codewords with good separation
for _seed in range(100):
    code_rng = np.random.default_rng(seed=_seed)
    codewords = code_rng.integers(0, 2, size=(n, ell))
    if len(set(map(tuple, codewords))) == n:
        _dists = [np.sum(codewords[i] != codewords[j]) for i, j in combinations(range(n), 2)]
        if min(_dists) >= 2:
            break
assert len(set(map(tuple, codewords))) == n, "Duplicate codewords!"

# Report code quality
hamming_dists = []
for i, j in combinations(range(n), 2):
    hamming_dists.append(np.sum(codewords[i] != codewords[j]))
print(f"Code quality ({n} codewords, length {ell}):")
print(f"  Pairwise Hamming distances: min={min(hamming_dists)}, max={max(hamming_dists)}, mean={np.mean(hamming_dists):.1f}")
print(f"  Ideal: all distances = {ell//2} (half the length)")
print()

# Matrix A: A_{i,j} = sqrt(p / ell) * (-1)^{c_{i,j}}
scale = np.sqrt(p_var / ell)
A = scale * (1 - 2 * codewords.astype(float))

# Covariance structure
cov_Z = A @ A.T
diag = np.diag(cov_Z)
off_diag = cov_Z[np.triu_indices(n, k=1)]
effective_delta = np.max(np.abs(off_diag))

print(f"Covariance structure of Z = AY:")
print(f"  Var[Z_i] = {diag[0]:.6f}  (= p, by construction)")
print(f"  max |Cov[Z_i, Z_j]| = {effective_delta:.6f}  (effective delta)")
print(f"  mean |Cov[Z_i, Z_j]| = {np.mean(np.abs(off_diag)):.6f}")
# Full bound using this code's effective delta:
# 4*delta_eff*t + 4*delta_eff (truncation term scales with delta_eff too)
eff_trunc = 4 * n * np.exp(-1 / (8 * p_var))
eff_bound = 4 * effective_delta * t + eff_trunc
print(f"\nFooling bound with this code: 4*delta_eff*t + trunc = {4*effective_delta*t:.4f} + {eff_trunc:.4f} = {eff_bound:.4f}")

fig, axes = plt.subplots(1, 3, figsize=(16, 4.5))

im0 = axes[0].imshow(A, cmap="RdBu", aspect="auto")
axes[0].set_xlabel(f"Seed dimension ($\ell$ = {ell})")
axes[0].set_ylabel(f"Output dimension (n = {n})")
axes[0].set_title(f"Matrix $A$ ({n} x {ell})")
plt.colorbar(im0, ax=axes[0])

im1 = axes[1].imshow(cov_Z, cmap="RdBu", vmin=-p_var, vmax=p_var)
axes[1].set_title(f"Covariance $AA^T$ ({n} x {n})")
plt.colorbar(im1, ax=axes[1], label="Cov")

axes[2].hist(off_diag, bins=25, color="steelblue", edgecolor="white", alpha=0.8)
axes[2].axvline(x=0, color="black", linewidth=0.8)
axes[2].axvline(x=effective_delta, color="red", linestyle="--", label=f"max |Cov| = {effective_delta:.4f}")
axes[2].axvline(x=-effective_delta, color="red", linestyle="--")
axes[2].set_xlabel("Cov($Z_i$, $Z_j$)")
axes[2].set_ylabel("Count")
axes[2].set_title("Off-diagonal covariances")
axes[2].legend(fontsize=8)

plt.tight_layout()
plt.show()

# --- Discretization ---
bits_per_coord = 8
num_levels = 2**bits_per_coord
total_seed_bits = ell * bits_per_coord

# Quantized Gaussian: 256-level approximation of N(0,1)
quantized_gaussian = norm.ppf((np.arange(num_levels) + 0.5) / num_levels)

print(f"Discretization: {bits_per_coord} bits/coord, {num_levels} levels")
print(f"Total seed: {ell} x {bits_per_coord} = {total_seed_bits} bits")
print()

# === Complete Fractional PRG ===
n_trials = 2000
theoretical_bound = 4 * effective_delta * t + eff_trunc

print(f"Verification ({n_trials} trials)")
print(f"  Seed: {total_seed_bits} bits -> {ell} Gaussians -> {n} fractional values")
print(f"  Theoretical bound (Thm 21): {theoretical_bound:.4f}")
print()

prg_results = {}
for name, func in test_fns.items():
    fourier = func.fourier()
    E_f = fourier[0]

    seeds = np.random.randint(0, num_levels, size=(n_trials, ell))
    Y_batch = quantized_gaussian[seeds]
    Z_batch = Y_batch @ A.T
    X_batch = np.clip(Z_batch, -1, 1)

    vals = eval_multilinear_batch(fourier, n, X_batch)
    prg_mean = np.mean(vals)
    prg_err = abs(prg_mean - E_f)

    prg_results[name] = {"E_f": E_f, "mean": prg_mean, "error": prg_err}

    print(f"  {name}:")
    print(f"    E[f] = {E_f:+.6f}")
    print(f"    PRG mean  = {prg_mean:+.6f}  (error = {prg_err:.6f})")
    print(f"    Naive mean = {naive_results[name]['mean']:+.6f}  (error = {naive_results[name]['error']:.6f})")
    print()

# --- Comparison plot ---
names = list(test_fns.keys())
x_pos = np.arange(len(names))
width = 0.25

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

naive_errs = [naive_results[nm]["error"] for nm in names]
prg_errs = [prg_results[nm]["error"] for nm in names]

axes[0].bar(x_pos - width / 2, naive_errs, width, label=f"Naive ({n} independent vals)", color="steelblue")
axes[0].bar(x_pos + width / 2, prg_errs, width, label=f"PRG ({ell} correlated vals)", color="orange")
axes[0].axhline(
    y=theoretical_bound,
    color="red",
    linestyle="--",
    label=f"$4\\delta_{{eff}} \\cdot t$ = {theoretical_bound:.4f}",
)
axes[0].set_xticks(x_pos)
axes[0].set_xticklabels(names, fontsize=8, rotation=15)
axes[0].set_ylabel("|mean - E[f]|")
axes[0].set_title("Fooling Error")
axes[0].legend(fontsize=7)
axes[0].grid(True, alpha=0.3, axis="y")

# Seed length scaling (qualitative -- the formula hides constants and o(1) terms)
ns = [16, 64, 256, 1024, 4096, 16384, 65536]
ell_values = [int(np.ceil((max(t, 1) / eps) ** 2 * np.log2(nn) * 5)) for nn in ns]
axes[1].plot(ns, ns, "k--", label="n (naive)", linewidth=2)
axes[1].plot(ns, ell_values, "o-", color="orange", label=r"$\ell \sim \mathrm{polylog}(n)$ (PRG)", linewidth=2)
axes[1].set_xlabel("n (number of variables)")
axes[1].set_ylabel("Random values needed")
axes[1].set_title(r"Seed dimension $\ell$ vs $n$ (qualitative)")
axes[1].set_xscale("log")
axes[1].set_yscale("log")
axes[1].annotate(
    "PRG wins\nfor large n",
    xy=(40000, 200),
    fontsize=9,
    color="gray",
    ha="center",
)
axes[1].legend(fontsize=8)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# === Experiment 1: L_{1,2} vs degree d (fixed n = 12) ===
# Higher degrees have many monomials (C(n,d) grows fast), so we keep n moderate.
n_conj = 12
degrees = [1, 2, 3, 4, 5]
n_per_degree = 120
conj_rng = np.random.default_rng(seed=123)

results_by_degree = {dv: [] for dv in degrees}

for dv in degrees:
    for _ in range(n_per_degree):
        monomials = []
        for deg in range(1, dv + 1):
            for combo in combinations(range(n_conj), deg):
                if conj_rng.random() < 0.3:
                    monomials.append(set(combo))
        if not monomials:
            monomials = [{0}]
        f = bf.f2_polynomial(n_conj, monomials)
        results_by_degree[dv].append(fourier_level_lp_norm(f, 2))

print(f"Experiment 1: L_{{1,2}} vs degree d  (n = {n_conj}, {n_per_degree} samples/degree)")
print()
print(f"{'d':<6}{'mean L_{1,2}':<16}{'max L_{1,2}':<16}{'known bound':<16}{'d^2':<8}")
print("-" * 62)
for dv in degrees:
    vals = results_by_degree[dv]
    print(f"{dv:<6}{np.mean(vals):<16.4f}{np.max(vals):<16.4f}{4 * 2**(6*dv):<16}{dv**2:<8}")

# === Experiment 2: L_{1,2} vs n (fixed d = 2) ===
# This is the key test: the conjecture says L_{1,2} depends only on d, not n.
# C(n,2) grows quadratically, but L_{1,2} should stay flat.
d_fixed = 2
n_values = [4, 6, 8, 10, 12, 14, 16, 18]
n_per_n = 120
n_rng = np.random.default_rng(seed=456)

results_by_n = {nv: [] for nv in n_values}

for nv in n_values:
    for _ in range(n_per_n):
        monomials = []
        for deg in range(1, d_fixed + 1):
            for combo in combinations(range(nv), deg):
                if n_rng.random() < 0.3:
                    monomials.append(set(combo))
        if not monomials:
            monomials = [{0, 1}]
        f = bf.f2_polynomial(nv, monomials)
        results_by_n[nv].append(fourier_level_lp_norm(f, 2))

print(f"Experiment 2: L_{{1,2}} vs n  (d = {d_fixed}, {n_per_n} samples/n)")
print()
print(f"{'n':<8}{'C(n,2)':<10}{'mean L_{1,2}':<16}{'max L_{1,2}':<16}")
print("-" * 50)
for nv in n_values:
    vals = results_by_n[nv]
    print(f"{nv:<8}{nv*(nv-1)//2:<10}{np.mean(vals):<16.4f}{np.max(vals):<16.4f}")

print(f"\nC(n,2) grows quadratically with n, but L_{{1,2}} stays bounded (and actually decreases).")
print(f"The decrease is expected for *random* polynomials: as n grows, more monomials")
print(f"cause the function to concentrate, shrinking individual Fourier coefficients.")
print(f"The conjecture is about the *worst case* over all degree-d polynomials, not random ones.")
print(f"Finding adversarial polynomials with large L_{{1,2}} at large n would be more informative.")

fig, axes = plt.subplots(1, 3, figsize=(17, 5))

# Left: box plot of L_{1,2} by degree
box_data = [results_by_degree[dv] for dv in degrees]
bp = axes[0].boxplot(box_data, labels=[str(dv) for dv in degrees], patch_artist=True)
for patch in bp["boxes"]:
    patch.set_facecolor("steelblue")
    patch.set_alpha(0.6)
axes[0].set_xlabel("GF(2) degree $d$")
axes[0].set_ylabel("$L_{1,2}(f)$")
axes[0].set_title(f"$L_{{1,2}}$ vs degree (n = {n_conj})")
axes[0].grid(True, alpha=0.3, axis="y")

# Center: max L_{1,2} vs d, compared to d^2
max_vals = [np.max(results_by_degree[dv]) for dv in degrees]
mean_vals = [np.mean(results_by_degree[dv]) for dv in degrees]
axes[1].plot(degrees, max_vals, "s-", color="steelblue", label="max $L_{1,2}$ (empirical)", linewidth=2)
axes[1].plot(degrees, mean_vals, "o-", color="orange", label="mean $L_{1,2}$ (empirical)", linewidth=2)
axes[1].plot(degrees, [dv**2 for dv in degrees], "k--", label="$d^2$ (Conjecture 3)", linewidth=1.5)
axes[1].set_xlabel("GF(2) degree $d$")
axes[1].set_ylabel("$L_{1,2}$")
axes[1].set_title("Empirical $L_{1,2}$ vs $d^2$")
axes[1].legend(fontsize=8)
axes[1].grid(True, alpha=0.3)

# Right: L_{1,2} vs n for fixed d=2
max_by_n = [np.max(results_by_n[nv]) for nv in n_values]
mean_by_n = [np.mean(results_by_n[nv]) for nv in n_values]
axes[2].plot(n_values, max_by_n, "s-", color="steelblue", label="max $L_{1,2}$", linewidth=2)
axes[2].plot(n_values, mean_by_n, "o-", color="orange", label="mean $L_{1,2}$", linewidth=2)
axes[2].plot(n_values, [nv * (nv - 1) / 200 for nv in n_values], "k:", alpha=0.4, label="$\\binom{n}{2}/100$ (for scale)")
axes[2].set_xlabel("$n$ (number of variables)")
axes[2].set_ylabel("$L_{1,2}$")
axes[2].set_title(f"$L_{{1,2}}$ vs $n$ (d = {d_fixed})")
axes[2].legend(fontsize=8)
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"The known bound 4 * 2^{{6d}} is not plotted -- it is off the chart.")
print(f"For d=2 alone, the known bound is {4 * 2**12}; empirical max is {np.max(results_by_degree[2]):.4f}.")

Stage	Input	Output
Quantize	$s = \ell \cdot k$ random bits	$Y \in \mathbb{R}^\ell$ (approx. Gaussians)
Dimension expansion	$Y$	$Z = AY \in \mathbb{R}^n$ (correlated, small covariance)
Truncation	$Z$	$X = \text{trnc}(Z) \in [-1,1]^n$ (fractional PRG output)

Step	What	Paper reference
Fourier tails	Compute $L_{1,2}$ for the test class	Theorem 2
Gaussian fooling	Small-variance, small-covariance Gaussians fool $\tilde{f}$	Theorem 9 (Appendix A)
Balanced code	Matrix $A$ maps $\ell$ Gaussians to $n$ with controlled covariance	Section 2, Step I
Discretization	Approximate each Gaussian with $k$ bits	Lemma 10-11
Truncation	Clip to $[-1,1]^n$	Claim 17

Building a Fractional Pseudorandom Generator¶

What the paper does¶

Roadmap¶

1. Background¶

Function classes¶

Fourier tails¶

What is $t$ for our class?¶

2. Test Functions: Random Degree-2 $\mathbb{F}_2$ Polynomials¶

3. Theorem 9: Gaussians Fool Bounded-Tail Functions¶

4. Dimension Reduction via Balanced Codes¶

5. The Complete Fractional PRG¶

6. Poking at Conjecture 3¶

Summary¶

Parameter chain¶

Reference¶