How well do different adaptive algorithms perform?¶

This script will see how the adaptive algorithm out-preforms the passive algorithm.

It does this by seeing how many rankings are considered the funniest ranking. For each caption we have a mean and a standard deviation. If the first rated caption has mean \(\mu_1\) and std.dev. of \(\sigma_1\), we want how many captions \(i\) have \(\mu_i + \sigma_i > \mu_1 - \sigma_1\).

Metrics¶

Number of captions that could be the funniest caption
Rank of caption found to be the funniest at the end

[1]:

import numpy as np

[2]:

import caption_contest_data as ccd

[3]:

df1 = ccd.responses("559-active")
df2 = ccd.responses("559-passive")
df1.target_id.nunique(), df2.target_id.nunique()

[3]:

(5012, 138)

[4]:

import pandas as pd
df = df1
df.iloc[0]

[4]:

alg_label                                                               LilUCB
network_delay                                                          1.96677
participant_uid              c84f9a4715e586b920096d5d1c0b38_xofXRJCicRqzuVa...
response_time                                                            4.947
target                                        Sort of takes the fun out of it.
target_id                                                                 4254
target_reward                                                                3
timestamp_query_generated                           2017-03-12 04:45:35.389856
label                                                                    funny
contest                                                                    559
filename                                              559-active-responses.csv
Name: 0, dtype: object

[5]:

df.dtypes

[5]:

alg_label                            object
network_delay                       float64
participant_uid                      object
response_time                       float64
target                               object
target_id                             int64
target_reward                         int64
timestamp_query_generated    datetime64[ns]
label                                object
contest                              object
filename                             object
dtype: object

[6]:

df.memory_usage().sum() / 1024**2, "MB"

[6]:

(41.95037841796875, 'MB')

[7]:

df1.timestamp_query_generated.min(), df2.timestamp_query_generated.min()

[7]:

(Timestamp('2017-03-06 15:01:46.733847'),
 Timestamp('2017-03-07 20:07:07.962230'))

[8]:

start = pd.Timestamp('2017-03-06 15:01:46.733847')
time_diff1 = df1.timestamp_query_generated - start
time_diff2 = df2.timestamp_query_generated - start

time_d1 = time_diff1.apply(lambda x: x.total_seconds() / (60 * 60 * 24))
time_d2 = time_diff2.apply(lambda x: x.total_seconds() / (60 * 60 * 24))

[9]:

import matplotlib.pyplot as plt
fig, ax = plt.subplots()
time_d1.plot.hist(ax=ax, bins=40, label="Active")
time_d2.plot.hist(ax=ax, bins=30, alpha=0.8, label="Passive")
ax.legend(loc="best")
ax.set_xlabel("Days since start")
ax.set_ylabel("Number of responses")

[9]:

Text(0, 0.5, 'Number of responses')

../_images/example-analyses_Adaptive-gains_10_1.png

[10]:

df.sort_values(by="timestamp_query_generated", inplace=True)

[11]:

import caption_contest_data._raw as raw

[12]:

scores = df.pivot_table(
    index="target_id",
    values="target_reward",
    columns="alg_label",
    aggfunc=np.mean,
)
top_scores = {alg: (scores[alg].idxmax(), scores[alg].max()) for alg in ["KLUCB", "LilUCB"]}
top_scores

[12]:

{'KLUCB': (1559, 1.7674418604651163), 'LilUCB': (387, 1.8341463414634147)}

[13]:

num_responses = df.pivot_table(
    index="alg_label",
    values="target_reward",
    aggfunc=len,
)
num_responses

[13]:

	target_reward
alg_label
KLUCB	250564
LilUCB	249300

[14]:

def get_stats(df, alg_label, num_responses, top_id, top=10):
    idx = (df.alg_label == alg_label)
    s = df[idx]
    idx = np.arange(len(s)) <= num_responses
    show = s.iloc[idx]
    counts = show.pivot_table(
        columns="label",
        index="target_id",
        values="response_time",
        aggfunc=len,
    )
    count = counts.sum(axis=1)
    counts.fillna(0, inplace=True)
    score, prec = raw.score_and_prec(counts["unfunny"], counts["somewhat_funny"], counts["funny"], count)

    stats = pd.DataFrame({"score": score, "prec": prec})
    stats.sort_values(by="score", ascending=False, inplace=True)
    top_rank = np.argwhere(stats.index == top_id).item() + 1

    top_cap = stats.loc[stats.score.idxmax()]
    num_top_captions = (top_cap["score"] - top_cap["prec"] < score).sum()
    return {
        "num_top": num_top_captions,
        "top_in_top": top_id in stats.index[:10],
        "top": top,
        "num_responses": num_responses,
        "alg_label": alg_label,
        "top_rank": top_rank,
    }

[16]:

data = []
for k, num_responses in enumerate(np.linspace(10e3, 250e3, num=200).astype(int)):
    for alg_label in ["KLUCB", "LilUCB"]:
        if k % 20 == 0:
            print(k, num_responses)
        datum = get_stats(df, alg_label, num_responses, top_scores[alg_label][0])
        data.append(datum)

[17]:

show = pd.DataFrame(data)

[18]:

import matplotlib.pyplot as plt
fig, ax = plt.subplots()
for alg in show.alg_label.unique():
    show[show.alg_label == alg].plot(x="num_responses", y="top_rank", ax=ax, label=alg)
ax.set_ylabel("Rank of top caption")
ax.set_xlabel("Number of responses")
ax.set_yscale("log")

../_images/example-analyses_Adaptive-gains_18_0.png

[19]:

fig, ax = plt.subplots()
for alg in show.alg_label.unique():
    show[show.alg_label == alg].plot(x="num_responses", y="num_top", ax=ax, label=alg)
ax.set_ylabel("Number of top caption")
ax.set_xlabel("Number of responses")
# ax.set_xscale("log")
ax.set_yscale("log")
ax.grid(alpha=0.4)

../_images/example-analyses_Adaptive-gains_19_0.png

[ ]: