%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib.pyplot as pl
import scipy.stats as st
plt.style.use('ggplot')
import glob
import pandas as pd
parsed_data = map(lambda path: pd.read_csv(path, sep='\t'), 
                  glob.glob('/home/ben/Downloads/complete_flights_O1-O6/*.txt'))
parsed_data = list(parsed_data)
for df in parsed_data:
    plt.plot(df.x, df.y, alpha=.25, linestyle='--')
    plt.scatter(df.x, df.y)
    plt.show()
df = pd.concat(parsed_data, axis=0)
df.head()
sns.jointplot(df.x, df.y, size=16, joint_kws={'alpha': .25})
plt.figure(figsize=(16, 16))
for df in parsed_data:
    plt.plot(df.x, df.y, alpha=.25, linestyle='--')
    plt.scatter(df.x, df.y, alpha=.75)
plt.show()
#data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()
# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .75)
f = np.reshape(kernel(positions).T, xx.shape)
f = f / np.max(f)
#f = np.log(f)# + 100 * (f / np.max(f))
fig = pl.figure(figsize=(16, 16))
ax = fig.gca()
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')
ax.scatter(df.x, df.y, alpha=.1)
pl.show()
## data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()
# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:200j, ymin:ymax:200j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .005)
f = np.reshape(kernel(positions).T, xx.shape)
f = np.log(f)
fig = pl.figure(figsize=(16, 16))
ax = fig.gca()
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
#cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')
#ax.scatter(df.x, df.y, alpha=.1)
pl.show()
## data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()
# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .5)
f = np.reshape(kernel(positions).T, xx.shape)
f = np.log(f)
fig = pl.figure(figsize=(16, 16))
ax = fig.gca()
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')
ax.scatter(df.x, df.y, alpha=.1)
pl.show()
## data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()
# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .01)
f = np.reshape(kernel(positions).T, xx.shape)
f = np.log(f)
fig = pl.figure(figsize=(16, 16))
ax = fig.gca()
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')
ax.scatter(df.x, df.y, alpha=.1)
pl.show()
## data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()
# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:250j, ymin:ymax:250j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .01)
f = np.reshape(kernel(positions).T, xx.shape)
f = np.log(f)
fig = pl.figure(figsize=(16, 16))
ax = fig.gca()
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, 255, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
#cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')
ax.scatter(df.x, df.y, alpha=.05)
pl.show()