Stock Data Processing and visualizations (Multiple Stocks)


import datetime as dt
import numpy as np 
import pandas as pd 
import pandas_datareader.data as web 
import seaborn as sns
import matplotlib.pyplot as plt 
from matplotlib import style
%matplotlib inline

style.use('fivethirtyeight')
def get_price(tickers, start, end):
    data = web.DataReader(tickers, 'yahoo', start, end)['Adj Close']
    daily_price = data.sort_index()

    return daily_price
start = dt.datetime(2016,1,1)
end = dt.datetime(2016, 12, 31)

#list of stocks in portfolio
tickers = ['AAPL', 'BRK-B', 'BTI', 'COP', 'GS', 'LUV', 'NKE', 'PFE', 'TSLA']
daily_price = get_price(tickers, start, end)
daily_price.head()
AAPL BRK-B BTI COP GS LUV NKE PFE TSLA
Date
2016-01-04 101.790649 130.750000 50.554924 44.941711 172.800156 41.327766 60.192345 29.891121 223.410004
2016-01-05 99.239845 131.250000 50.662033 45.440212 169.824875 41.820232 61.033783 30.106300 223.429993
2016-01-06 97.297760 131.330002 50.559578 43.474976 165.678986 42.204353 60.162991 29.573030 219.039993
2016-01-07 93.191338 129.479996 49.418652 42.238308 160.586899 41.317917 58.558380 29.376560 215.649994
2016-01-08 93.684120 128.330002 48.743401 41.500145 159.923523 41.574001 57.599533 29.002338 211.000000
# Calculate the daily log return for `daily_price`
daily_return = np.log(daily_price.pct_change()+1)

# Replace NA values with 0
daily_return.fillna(0, inplace=True)

# View the first 5 rows
daily_return.head()
AAPL BRK-B BTI COP GS LUV NKE PFE TSLA
Date
2016-01-04 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2016-01-05 -0.025379 0.003817 0.002116 0.011031 -0.017368 0.011846 0.013882 0.007173 0.000089
2016-01-06 -0.019764 0.000609 -0.002024 -0.044212 -0.024716 0.009143 -0.014370 -0.017872 -0.019844
2016-01-07 -0.043121 -0.014187 -0.022824 -0.028858 -0.031217 -0.021227 -0.027033 -0.006666 -0.015598
2016-01-08 0.005274 -0.008921 -0.013758 -0.017631 -0.004140 0.006179 -0.016510 -0.012821 -0.021799
# Plot the frequency distributions
daily_return.hist(bins=40, sharex=True, figsize=(12,8), facecolor='r', alpha=0.75)
plt.show()

png

# calculate the correlation matrix
corr = daily_return.corr()

# plot the heatmap
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(corr, 
            xticklabels=corr.columns, 
            yticklabels=corr.columns, 
            annot=True, 
            linewidths=.5)
<matplotlib.axes._subplots.AxesSubplot at 0x11f3fda90>

png

cum_daily_return = (1 + daily_return).cumprod()
cum_daily_return.tail()
AAPL BRK-B BTI COP GS LUV NKE PFE TSLA
Date
2016-12-23 1.100237 1.250541 1.049368 1.025744 1.332540 1.155339 0.832765 1.037307 0.887124
2016-12-27 1.107203 1.247739 1.053289 1.030327 1.335799 1.156255 0.822759 1.038903 0.912497
2016-12-28 1.102471 1.238628 1.041533 1.013619 1.330757 1.148903 0.818416 1.033138 0.913370
2016-12-29 1.102188 1.233322 1.050843 1.009016 1.317028 1.149590 0.819058 1.037599 0.892091
2016-12-30 1.093562 1.232792 1.053645 1.000999 1.324031 1.141316 0.815360 1.037280 0.887968
plt.figure()
cum_daily_return.plot(grid = True, figsize=(14,10)).axhline(y = 1, color = "black", lw = 1)
plt.ylabel("Cumulative Returns")
plt.legend()
plt.show()
<matplotlib.figure.Figure at 0x11f712cc0>

png