
# coding: utf-8

# In[1]:

import pymc3 as pm
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import math
import pandas as pd
import numpy as np


# In[2]:

subset = pd.read_csv('toy_events_data.csv')
subset['viewing_seconds'] = subset['p'] * subset['r'] * subset['t']

data = subset.copy()


# In[3]:

sns.pairplot(data[['r', 'n', 't', 'viewing_seconds']])
plt.show()


# In[4]:

data.t * data.p
data.viewing_seconds

a, loc, scale = stats.gamma.fit(data.r, floc=0)
beta = 1 / scale
alpha = a

print('alpha', alpha, 'beta', beta)

sns.distplot(data.r)
sns.distplot(stats.gamma.rvs(a, loc, scale, size=1000000))
plt.show()


# In[5]:

with pm.Model() as model:
#    alpha = pm.HalfNormal('alpha', sd=100)
#    beta = pm.HalfNormal('beta', sd=100)
    r = pm.Gamma('r', alpha=4.447, beta=273.414, testval=0.01, shape=len(data.viewing_seconds.values))
    poisson_rate = pm.Deterministic('poisson_rate', data.t.values * data.p.values * r)    
    viewing_seconds = pm.Poisson(
        'viewing_seconds', 
        mu=poisson_rate[data.viewing_seconds.index], 
        observed=np.asarray(data.viewing_seconds.values, dtype='int64'),
        shape=len(data.viewing_seconds.values)
    )


# In[6]:

with model:
    start = pm.find_MAP()

    # instantiate sampler
    step = pm.Metropolis()
    
    trace = pm.sample(50000, tune=50000, progressbar=True, start=start, step=step, chains=1, njobs=4)


# In[7]:

# r seems to be recovered using Poisson likelihood, but there doesn't seem to be any uncertainty around it
pm.traceplot(trace)
plt.show()


# In[8]:

# I lose uncertainty around the viewing_seconds
ppc = pm.sample_ppc(trace, model=model, samples=500)
sns.distplot(data.viewing_seconds)
sns.distplot(ppc['viewing_seconds'].flatten())
plt.show()


# In[9]:

with pm.Model() as model:
#    alpha = pm.HalfNormal('alpha', sd=100)
#    beta = pm.HalfNormal('beta', sd=100)
    r = pm.Gamma('r', alpha=4.447, beta=273.414, testval=0.01, shape=len(data.n.values))
    poisson_rate = pm.Deterministic('poisson_rate', data.t.values * data.p.values * r)    
    n = pm.Poisson(
        'n', 
        mu=poisson_rate[data.viewing_seconds.index], 
        observed=np.asarray(data.n.values, dtype='int64'),
        shape=len(data.n.values)
    )
    
with model:
    start = pm.find_MAP()

    # instantiate sampler
    step = pm.Metropolis()
    
    trace = pm.sample(50000, tune=50000, progressbar=True, start=start, step=step, chains=1, njobs=4)


# In[10]:

# r has uncertainty around it, but is not proportional to observed proportion of people in attendance.
pm.traceplot(trace)
plt.show()


# In[11]:

# I can get uncertainty around the number of samples but do not know number of seconds viewed now
ppc = pm.sample_ppc(trace, model=model, samples=500)
sns.distplot(data.n)
sns.distplot(ppc['n'].flatten())
plt.show()


# In[ ]:



