In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

I first create a 10x500 data frame where the values are randomly 0 or 1. Then I calculate the cumulative sum for each column and divide the sum by the corresponding row number. This gives me the proportion of ones that have occurred so far in the column.

In [2]:
random = pd.DataFrame(np.random.randint(0,2,size=(500, 10)))
rows = np.arange(1, 501)
result = random.cumsum().divide(rows, axis=0)
result.head()
Out[2]:
0 1 2 3 4 5 6 7 8 9
0 0.00 1.000000 0.000000 0.000000 0.000000 1.000000 1.000000 0.000000 0.000000 0.000000
1 0.00 0.500000 0.500000 0.000000 0.000000 1.000000 1.000000 0.000000 0.000000 0.000000
2 0.00 0.666667 0.666667 0.333333 0.333333 0.666667 0.666667 0.333333 0.333333 0.333333
3 0.25 0.500000 0.500000 0.250000 0.250000 0.500000 0.750000 0.500000 0.250000 0.500000
4 0.20 0.600000 0.600000 0.400000 0.200000 0.600000 0.600000 0.400000 0.400000 0.400000

And then let's visualize.

In [3]:
# plt.figure(figsize=(15,5))
fig,ax = plt.subplots(figsize = (15, 5))


for i in range(10):
    plt.plot(rows, result[i])

plt.gca().set_xlim(0, 300)
plt.gca().set_ylim(0, 1)
plt.grid(1)

plt.savefig('head_and_tails.png', bbox_inches='tight', dpi=200)

plt.show()