"""
Python implementation and Gradio app for "An Optimal Strategy for [Solitaire] Yahtzee" (James Glenn, 2006)
Source: https://gunpowder.cs.loyola.edu/~jglenn/research/optimal_yahtzee.pdf

At each turn, the optimal strategy depends only on:
- The available scoring categories (2**13 possibilities)
- The total score in the upper section, capped at 63 (2**6 possibilities)

For each of the 2**19 possible states, we compute the expected optimal score for the rest of the game.
This implementation does not include Yahtzee bonuses or jokers.

The Gradio app then allows you to input your current scorecard and dice roll to get the optimal action.
"""

import numpy as np
import gradio as gr
from tqdm import tqdm

FOUR_OF_A_KIND_IS_40_POINTS = False  # common variant used in France

# Compute Rk and R5 (unique dice combinations for up to 5 dice / exactly 5 dice)
Rk = np.indices((6,) * 6).reshape(6, -1).T
Rk = Rk[Rk.sum(axis=1) <= 5]  # size 462
R5 = Rk[Rk.sum(axis=1) == 5]  # size 252

# Calculate probabilities (P) of each R5 outcome from each Rk roll using multinomial weights
fact = np.array([1, 1, 2, 6, 24, 120])
Δ = R5[None, :] - Rk[:, None]  # Δ[rk, r5, i]: number of die #i missing to go from Rk[rk] to R5[r5]
P  = np.where((Δ >= 0).all(axis=2), fact[Δ.sum(axis=2)] // np.prod(fact[Δ.clip(min=0)], axis=2), 0)
P = P / P.sum(axis=1, keepdims=True)
M = (P > 0).T  # Mask for valid transitions

# Calculate category scores for all R5 outcomes
R5_scores = np.zeros((len(R5), 13), dtype=int)
R5_scores[:, :6] = R5 * np.arange(1, 7)
upper_score = R5_scores[:, :6].sum(axis=1)
R5_scores[:, 6] = upper_score
m = R5.max(axis=1)
R5_scores[:, 7] = upper_score * (m >= 3)
R5_scores[:, 8] = (40 if FOUR_OF_A_KIND_IS_40_POINTS else upper_score) * (m >= 4)
R5_scores[:, 9] = 25 * np.array([set(r) == {0, 2, 3} for r in R5])
R5_scores[:, 10] = 30 * ((R5[:, 0:4] > 0).all(axis=1)| (R5[:, 1:5] > 0).all(axis=1)| (R5[:, 2:6] > 0).all(axis=1))
R5_scores[:, 11] = 40 * ((R5[:, 0:5] > 0).all(axis=1) | (R5[:, 1:6] > 0).all(axis=1))
R5_scores[:, 12] = 50 * (m == 5)

# Initialize state scores:
# - Bits 0-5: upper section score (capped at 63 for bonus)
# - Bits 6-11: upper categories (Aces, Twos, Threes, Fours, Fives, Sixes)
# - Bits 12-18: lower categories (Chance, Three-of-a-kind, Four-of-a-kind, Full House, Small Straight, Large Straight, Yahtzee)
UPPER_TOTAL_BITS = 2**6 - 1
UPPER_TOTAL_AND_CATEGORIES_BITS = 2**12 - 1
CATEGORIES_BITS = 2**19 - 2**6

states = np.arange(2**19, dtype=int)
scores = np.zeros(2**19, dtype=float)

def get_score(state):
    # Get children states
    available = np.array([1 - (state >> (6 + i)) & 1 for i in range(13)])
    upper_total_states = np.clip((state & UPPER_TOTAL_BITS) + R5_scores * (np.arange(13) < 6), 0, 63)
    categories_states = (state & CATEGORIES_BITS) + 2 ** np.arange(6, 19)
    children = available * (upper_total_states + categories_states)

    # Backpropagate scores from children to root of the widget (as per the paper description)
    s3 = available * (R5_scores + scores[children]) # Roll 3
    s2 = P @ s3.max(axis=1) # Roll 2
    s1 = P @ np.where(M, s2, 0).max(axis=1)  # Roll 1
    s0 = P[0] @ np.where(M, s1, 0).max(axis=1)  # Root state

    return s0, s1, s2, s3

# Identify reachable states (e.g., an upper total of 1 is impossible if only "Twos" was played)
reachable_states = np.zeros(2**12, dtype=bool)
U = np.indices((7,) * 6).reshape(6, -1).T  # 6 means unused category
reachable_states[np.clip((U % 6) @ np.arange(1, 7), 0, 63) + (U < 6) @ (2 ** np.arange(6, 12))] = True

# Compute scores sorted by the number of categories already played
# The score for the 64 states with all categories played is 0 except if the upper total is 63
order = np.argsort([bin(state)[:-6].count("1") for state in states])[::-1]
scores[-1] = 35
for idx in tqdm(order[63:]):
    if reachable_states[states[idx] & UPPER_TOTAL_AND_CATEGORIES_BITS]:
        scores[idx] = get_score(states[idx])[0]
print(f"Optimal score at the beginning of the game: {scores[0]:.2f}")  # 245.87 as per the paper (section 6)

# Gradio app
EMPTY = ""
score_options = {
    "Ones": [EMPTY, 0, 1, 2, 3, 4, 5],
    "Twos": [EMPTY, 0, 2, 4, 6, 8, 10],
    "Threes": [EMPTY, 0, 3, 6, 9, 12, 15],
    "Fours": [EMPTY, 0, 4, 8, 12, 16, 20],
    "Fives": [EMPTY, 0, 5, 10, 15, 20, 25],
    "Sixes": [EMPTY, 0, 6, 12, 18, 24, 30],
    "Chance": [EMPTY] + list(range(0, 31)),
    "Three of a kind": [EMPTY] + list(range(0, 31)),
    "Four of a kind": ([EMPTY, 0, 40] if FOUR_OF_A_KIND_IS_40_POINTS else ([EMPTY] + list(range(0, 31)))),
    "Full House": [EMPTY, 0, 25],
    "Small Straight": [EMPTY, 0, 30],
    "Large Straight": [EMPTY, 0, 40],
    "Yahtzee": [EMPTY, 0, 50],
}

categories = list(score_options.keys())
upper_categories, lower_categories = categories[:6], categories[6:]

def update_components(*inputs):
    # Parse inputs
    scorecard = {c: inputs[i] for i, c in enumerate(categories)}
    roll, dice = inputs[len(categories):]
    dice = np.array([int(d) for d in str(dice) if d in "123456"])

    # Compute totals
    scorecard_values = [0 if v == EMPTY else v for v in scorecard.values()]
    upper, lower = sum(scorecard_values[:6]), sum(scorecard_values[6:])
    bonus = 35 if upper > 62 else 0

    # Compute optimal action
    if (len(dice) == 5) and (EMPTY in scorecard.values()):
        state = np.clip(upper, 0, 63) + sum(2 ** (6 + i) for i, c in enumerate(categories) if scorecard[c] != EMPTY)
        s = get_score(state)[roll]
        r5 = R5_indices[tuple(np.bincount(dice, minlength=7)[1:])]

        if roll == 3:
            idx = np.argmax(s[r5])
            new_score = R5_scores[r5, idx]
            action = f"Play {categories[idx]} ({new_score} pts)\nExpected score: {upper + lower + new_score + s[r5, idx]:.2f}"
        else:
            rk = Rk[np.argmax(np.where(M, s, 0)[r5])]
            keep = sum([[i + 1] * rk[i] for i in range(6)], [])
            action = f"Keep {keep}"
    else:
        action = ""

    return upper, bonus, lower, lower + bonus + upper, action


with gr.Blocks() as app:

    gr.Markdown("""# 🎲 Optimal Yahtzee
This app recommends the optimal strategy for playing solitaire Yahtzee, based on [James Glenn's 2006 research](http://gunpowder.cs.loyola.edu/~jglenn/research/optimal_yahtzee.pdf).
### How to use:
- Enter your current scores in the upper and lower sections.
- Select the roll number (1, 2, or 3) and input your dice values (e.g. 61542).
- The app will display the optimal dice to keep (rolls 1 or 2) or the best scoring category to select (roll 3).
""" + FOUR_OF_A_KIND_IS_40_POINTS * "\n**Note:** In this variant, Four of a Kind is worth 40 points.")

    score_inputs = {}

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Upper Section")
            for c in upper_categories:
                score_inputs[c] = gr.Dropdown(score_options[c], label=c, value=EMPTY)
            upper_score = gr.Number(label="Upper Total", interactive=False)
            bonus_score = gr.Number(label="Bonus (if >62)", interactive=False)

        with gr.Column():
            gr.Markdown("### Lower Section")
            for c in lower_categories:
                score_inputs[c] = gr.Dropdown(score_options[c], label=c, value=EMPTY)
            lower_score = gr.Number(label="Lower Total", interactive=False)
            total_score = gr.Number(label="Total", interactive=False)

        with gr.Column():
            gr.Markdown("### Current Roll")
            roll_input = gr.Dropdown([1, 2, 3], label="Roll number", value=1)
            dice_input = gr.Textbox(label="Dice values", lines=1)
            action_box = gr.Textbox(label="Optimal action", lines=2)

    inputs = list(score_inputs.values()) + [roll_input, dice_input]
    outputs = [upper_score, bonus_score, lower_score, total_score, action_box]

    for inp in inputs:
        inp.change(update_components, inputs=inputs, outputs=outputs)

app.launch()