# Setup — install boofun and CuPy
!pip install --upgrade boofun -q
!pip install cupy-cuda12x -q 2>/dev/null || echo "CuPy install skipped (no CUDA)"

import time
import numpy as np
import matplotlib.pyplot as plt

import boofun as bf
from boofun.core.gpu import (
    is_gpu_available,
    is_gpu_enabled,
    enable_gpu,
    get_gpu_info,
    gpu_walsh_hadamard,
    gpu_influences,
    gpu_noise_stability,
    gpu_spectral_weight_by_degree,
    GPUBooleanFunctionOps,
)
from boofun.core.optimizations import fast_walsh_hadamard

print(f"boofun version: {bf.__version__}")
print(f"GPU available:  {is_gpu_available()}")
print(f"GPU enabled:    {is_gpu_enabled()}")
if is_gpu_available():
    info = get_gpu_info()
    for d in info.get('devices', []):
        print(f"  Device: {d['name']} ({d['memory_gb']:.1f} GB)")

def benchmark_wht(n_values, n_trials=3):
    """Benchmark WHT on CPU vs GPU for various n."""
    cpu_times = []
    gpu_times = []

    for n in n_values:
        size = 2 ** n
        # Random ±1 function
        values = np.random.choice([-1.0, 1.0], size=size)

        # CPU benchmark
        t_cpu = []
        for _ in range(n_trials):
            v = values.copy()
            start = time.perf_counter()
            fast_walsh_hadamard(v)
            t_cpu.append(time.perf_counter() - start)
        cpu_times.append(np.median(t_cpu))

        # GPU benchmark (falls back to CPU if no GPU)
        if is_gpu_enabled():
            enable_gpu(True)
            t_gpu = []
            for _ in range(n_trials):
                start = time.perf_counter()
                gpu_walsh_hadamard(values.copy())
                t_gpu.append(time.perf_counter() - start)
            gpu_times.append(np.median(t_gpu))
        else:
            gpu_times.append(None)

        print(f"n={n:2d} | CPU: {cpu_times[-1]*1000:8.2f} ms"
              f" | GPU: {gpu_times[-1]*1000 if gpu_times[-1] else 'N/A':>8} ms")

    return cpu_times, gpu_times

n_values = list(range(8, 22))
cpu_times, gpu_times = benchmark_wht(n_values)

# Plot results
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))

cpu_ms = [t * 1000 for t in cpu_times]
ax1.semilogy(n_values, cpu_ms, 'bo-', label='CPU (NumPy)', markersize=6)
if any(g is not None for g in gpu_times):
    gpu_ms = [t * 1000 if t else None for t in gpu_times]
    ax1.semilogy(n_values, gpu_ms, 'rs-', label='GPU (CuPy)', markersize=6)
ax1.set_xlabel('n (variables)')
ax1.set_ylabel('Time (ms, log scale)')
ax1.set_title('Walsh-Hadamard Transform: CPU vs GPU')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Speedup plot
if any(g is not None for g in gpu_times):
    speedups = [c / g if g else 0 for c, g in zip(cpu_times, gpu_times)]
    ax2.bar(n_values, speedups, color='green', alpha=0.7)
    ax2.axhline(y=1.0, color='red', linestyle='--', label='Break-even')
    ax2.set_xlabel('n (variables)')
    ax2.set_ylabel('Speedup (CPU time / GPU time)')
    ax2.set_title('GPU Speedup Factor')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
else:
    ax2.text(0.5, 0.5, 'No GPU available\n(run on Colab with GPU runtime)',
             ha='center', va='center', transform=ax2.transAxes, fontsize=14)

plt.tight_layout()
plt.show()

# Build majority(11) and compare CPU vs GPU analysis
n = 11
f = bf.majority(n)
tt = np.array(f.get_representation('truth_table'), dtype=float)

# CPU path
start = time.perf_counter()
fourier_cpu = f.fourier()
inf_cpu = np.array([f.influence(i) for i in range(n)])
stab_cpu = f.noise_stability(0.5)
t_cpu = time.perf_counter() - start

# GPU path
ops = GPUBooleanFunctionOps(tt)
start = time.perf_counter()
fourier_gpu = ops.fourier()
inf_gpu = ops.influences()
stab_gpu = ops.noise_stability(0.5)
weights_gpu = ops.spectral_weights()
t_gpu = time.perf_counter() - start

print(f"Majority({n}) analysis:")
print(f"  CPU time: {t_cpu*1000:.1f} ms")
print(f"  GPU time: {t_gpu*1000:.1f} ms")
print(f"  Speedup:  {t_cpu/t_gpu:.1f}x" if t_gpu > 0 else "")
print(f"\n  Noise stability (rho=0.5): {stab_gpu:.6f}")
print(f"  Total influence: {ops.total_influence():.4f}")
print(f"  Spectral weights by degree: {[f'{w:.4f}' for w in weights_gpu]}")

Technique	When to use	How
NumPy vectorisation	Always (free speed)	Use array operations, avoid Python loops
In-memory truth tables	n ≤ 20	Default — `bf.create()`, `.fourier()`
Numba JIT	Tight inner loops	`pip install boofun[performance]`
CuPy GPU	n > 14 for WHT; large batch operations	`pip install cupy-cuda12x`
Batch processing	Evaluating many inputs	`boofun.core.batch_processing`

GPU-Accelerated Boolean Function Analysis¶

1. Benchmarking the Walsh-Hadamard Transform¶

2. GPU-Accelerated Spectral Analysis¶

3. Performance Tips Summary¶