Getting Stock Price Data from Yahoo Finance


import datetime as dt
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
#we're setting a visulization style
style.use('ggplot')
#we're setting a start and end datetime object
#this will be the range of dates that we're going to grab stock pricing information foR
start = dt.datetime(2016, 1, 1)
end = dt.datetime(2016, 12, 31)
#Select the ticker and source of stock data
df = web.DataReader('AAPL', "yahoo", start, end)
#Adj Close is helpful, since it accounts for future stock splits, and gives the relative price to splits
#For this reason, the adjusted prices are the prices you're most likely to be dealing with.
df.head()
Open High Low Close Adj Close Volume
Date
2016-01-04 102.610001 105.370003 102.000000 105.349998 101.790649 67649400
2016-01-05 105.750000 105.849998 102.410004 102.709999 99.239845 55791000
2016-01-06 100.559998 102.370003 99.870003 100.699997 97.297760 68457400
2016-01-07 98.680000 100.129997 96.430000 96.449997 93.191338 81094400
2016-01-08 98.550003 99.110001 96.760002 96.959999 93.684120 70798000
#We can save them easily to a csv in our wd
df.to_csv('AAPL.csv')

png

#We can either read data from DataFrame or from a CSV file into a DataFrame:
df = pd.read_csv('AAPL.csv', parse_dates=True, index_col=0)
##Now, we can graph it
df.plot()
plt.show()

png

#Now, we can graph one specific column in the DataFrame
df['Adj Close'].plot(grid=True)
plt.show()

png

#Call one or more columns in the DataFrame
df[['High','Low']]
High Low
Date
2016-01-04 105.370003 102.000000
2016-01-05 105.849998 102.410004
2016-01-06 102.370003 99.870003
2016-01-07 100.129997 96.430000
2016-01-08 99.110001 96.760002
2016-01-11 99.059998 97.339996
2016-01-12 100.690002 98.839996
2016-01-13 101.190002 97.300003
2016-01-14 100.480003 95.739998
2016-01-15 97.709999 95.360001
2016-01-19 98.650002 95.500000
2016-01-20 98.190002 93.419998
2016-01-21 97.879997 94.940002
2016-01-22 101.459999 98.370003
2016-01-25 101.529999 99.209999
2016-01-26 100.879997 98.070000
2016-01-27 96.629997 93.339996
2016-01-28 94.519997 92.389999
2016-01-29 97.339996 94.349998
2016-02-01 96.709999 95.400002
2016-02-02 96.040001 94.279999
2016-02-03 96.839996 94.080002
2016-02-04 97.330002 95.190002
2016-02-05 96.919998 93.690002
2016-02-08 95.699997 93.040001
2016-02-09 95.940002 93.930000
2016-02-10 96.349998 94.099998
2016-02-11 94.720001 92.589996
2016-02-12 94.500000 93.010002
2016-02-16 96.849998 94.610001
... ... ...
2016-11-17 110.349998 108.830002
2016-11-18 110.540001 109.660004
2016-11-21 111.989998 110.010002
2016-11-22 112.419998 111.400002
2016-11-23 111.510002 110.330002
2016-11-25 111.870003 110.949997
2016-11-28 112.470001 111.389999
2016-11-29 112.029999 110.070000
2016-11-30 112.199997 110.269997
2016-12-01 110.940002 109.029999
2016-12-02 110.089996 108.849998
2016-12-05 110.029999 108.250000
2016-12-06 110.360001 109.190002
2016-12-07 111.190002 109.160004
2016-12-08 112.430000 110.599998
2016-12-09 114.699997 112.309998
2016-12-12 115.000000 112.489998
2016-12-13 115.919998 113.750000
2016-12-14 116.199997 114.980003
2016-12-15 116.730003 115.230003
2016-12-16 116.500000 115.650002
2016-12-19 117.379997 115.750000
2016-12-20 117.500000 116.680000
2016-12-21 117.400002 116.779999
2016-12-22 116.510002 115.639999
2016-12-23 116.519997 115.589996
2016-12-27 117.800003 116.489998
2016-12-28 118.019997 116.199997
2016-12-29 117.110001 116.400002
2016-12-30 117.199997 115.430000

252 rows × 2 columns

# Assign `Adj Close` to `daily_close`
daily_close = df[['Adj Close']]

# Daily returns
daily_pct_change = daily_close.pct_change()

# Replace NA values with 0
daily_pct_change.fillna(0, inplace=True)

# Inspect daily returns
print(daily_pct_change)
            Adj Close
Date                 
2016-01-04   0.000000
2016-01-05  -0.025059
2016-01-06  -0.019570
2016-01-07  -0.042205
2016-01-08   0.005288
2016-01-11   0.016192
2016-01-12   0.014513
2016-01-13  -0.025710
2016-01-14   0.021871
2016-01-15  -0.024015
2016-01-19  -0.004839
2016-01-20   0.001345
2016-01-21  -0.005063
2016-01-22   0.053167
2016-01-25  -0.019523
2016-01-26   0.005531
2016-01-27  -0.065706
2016-01-28   0.007172
2016-01-29   0.034541
2016-02-01  -0.009349
2016-02-02  -0.020222
2016-02-03   0.019793
2016-02-04   0.008035
2016-02-05  -0.026708
2016-02-08   0.010530
2016-02-09  -0.000211
2016-02-10  -0.007580
2016-02-11  -0.006046
2016-02-12   0.003095
2016-02-16   0.028194
...               ...
2016-11-17  -0.000364
2016-11-18   0.001000
2016-11-21   0.015174
2016-11-22   0.000626
2016-11-23  -0.005098
2016-11-25   0.005035
2016-11-28  -0.001968
2016-11-29  -0.000986
2016-11-30  -0.008434
2016-12-01  -0.009320
2016-12-02   0.003745
2016-12-05  -0.007188
2016-12-06   0.007699
2016-12-07   0.009823
2016-12-08   0.009817
2016-12-09   0.016322
2016-12-12  -0.005704
2016-12-13   0.016681
2016-12-14   0.000000
2016-12-15   0.005469
2016-12-16   0.001295
2016-12-19   0.005777
2016-12-20   0.002658
2016-12-21   0.000941
2016-12-22  -0.006578
2016-12-23   0.001978
2016-12-27   0.006351
2016-12-28  -0.004264
2016-12-29  -0.000257
2016-12-30  -0.007796

[252 rows x 1 columns]
# Daily log returns
daily_log_returns = np.log(daily_close.pct_change()+1)

# Replace NA values with 0
daily_log_returns.fillna(0, inplace=True)

# Print daily log returns
print(daily_log_returns)
            Adj Close
Date                 
2016-01-04   0.000000
2016-01-05  -0.025379
2016-01-06  -0.019764
2016-01-07  -0.043121
2016-01-08   0.005274
2016-01-11   0.016063
2016-01-12   0.014409
2016-01-13  -0.026046
2016-01-14   0.021635
2016-01-15  -0.024308
2016-01-19  -0.004851
2016-01-20   0.001344
2016-01-21  -0.005075
2016-01-22   0.051802
2016-01-25  -0.019716
2016-01-26   0.005516
2016-01-27  -0.067965
2016-01-28   0.007146
2016-01-29   0.033958
2016-02-01  -0.009393
2016-02-02  -0.020429
2016-02-03   0.019599
2016-02-04   0.008003
2016-02-05  -0.027071
2016-02-08   0.010475
2016-02-09  -0.000211
2016-02-10  -0.007609
2016-02-11  -0.006065
2016-02-12   0.003090
2016-02-16   0.027804
...               ...
2016-11-17  -0.000364
2016-11-18   0.001000
2016-11-21   0.015060
2016-11-22   0.000626
2016-11-23  -0.005111
2016-11-25   0.005022
2016-11-28  -0.001970
2016-11-29  -0.000986
2016-11-30  -0.008469
2016-12-01  -0.009363
2016-12-02   0.003738
2016-12-05  -0.007214
2016-12-06   0.007669
2016-12-07   0.009775
2016-12-08   0.009769
2016-12-09   0.016190
2016-12-12  -0.005721
2016-12-13   0.016544
2016-12-14   0.000000
2016-12-15   0.005454
2016-12-16   0.001294
2016-12-19   0.005761
2016-12-20   0.002654
2016-12-21   0.000940
2016-12-22  -0.006600
2016-12-23   0.001976
2016-12-27   0.006331
2016-12-28  -0.004273
2016-12-29  -0.000257
2016-12-30  -0.007826

[252 rows x 1 columns]
# Plot the distribution of `daily_pct_change`
daily_pct_change.hist(bins=50)

# Show the plot
plt.show()

# Pull up summary statistics
print(daily_pct_change.describe())

png

        Adj Close
count  252.000000
mean     0.000571
std      0.014702
min     -0.065706
25%     -0.005742
50%      0.000823
75%      0.007724
max      0.064963
# Calculate the cumulative daily returns
cum_daily_return = (1 + daily_pct_change).cumprod()

# Print `cum_daily_return`
print(cum_daily_return)
            Adj Close
Date                 
2016-01-04   1.000000
2016-01-05   0.974941
2016-01-06   0.955861
2016-01-07   0.915520
2016-01-08   0.920361
2016-01-11   0.935263
2016-01-12   0.948837
2016-01-13   0.924442
2016-01-14   0.944661
2016-01-15   0.921974
2016-01-19   0.917513
2016-01-20   0.918747
2016-01-21   0.914096
2016-01-22   0.962696
2016-01-25   0.943901
2016-01-26   0.949122
2016-01-27   0.886759
2016-01-28   0.893118
2016-01-29   0.923968
2016-02-01   0.915330
2016-02-02   0.896820
2016-02-03   0.914571
2016-02-04   0.921919
2016-02-05   0.897296
2016-02-08   0.906745
2016-02-09   0.906554
2016-02-10   0.899682
2016-02-11   0.894243
2016-02-12   0.897010
2016-02-16   0.922301
...               ...
2016-11-17   1.066885
2016-11-18   1.067952
2016-11-21   1.084157
2016-11-22   1.084836
2016-11-23   1.079305
2016-11-25   1.084739
2016-11-28   1.082604
2016-11-29   1.081537
2016-11-30   1.072416
2016-12-01   1.062421
2016-12-02   1.066400
2016-12-05   1.058734
2016-12-06   1.066885
2016-12-07   1.077364
2016-12-08   1.087941
2016-12-09   1.105698
2016-12-12   1.099391
2016-12-13   1.117730
2016-12-14   1.117730
2016-12-15   1.123843
2016-12-16   1.125299
2016-12-19   1.131800
2016-12-20   1.134808
2016-12-21   1.135876
2016-12-22   1.128404
2016-12-23   1.130636
2016-12-27   1.137816
2016-12-28   1.132965
2016-12-29   1.132673
2016-12-30   1.123843

[252 rows x 1 columns]
# Plot the cumulative daily returns
cum_daily_return.plot(figsize=(12,8))

# Show the plot
plt.show()

png