# Import libraries
import pandas as pd
import numpy as np


# Import datetime
from datetime import datetime


# Build a date
now = datetime.now()
now

datetime.datetime(2020, 11, 25, 13, 23, 22, 106620)


# Get date and time details from current date
print('Year: ', now.year)
print('Month: ', now.month)
print('Day: ', now.day)
print('Hour: ', now.hour)
print('Minutes: ', now.minute)
print('Seconds: ', now.second)
print('Microsecond: ', now.microsecond)

Year:  2020
Month:  11
Day:  25
Hour:  13
Minutes:  23
Seconds:  22
Microsecond:  106620


datetime(year=2020, month=9, day=10)

datetime.datetime(2020, 9, 10, 0, 0)


datetime(year=2020, month=9, day=10, hour=11, minute=30)

datetime.datetime(2020, 9, 10, 11, 30)


# print data
now

datetime.datetime(2020, 11, 25, 13, 23, 22, 106620)


# apply strftime

print(now.strftime('%Y-%m-%d'))
print(now.strftime('%F'))
print(now.strftime('%D'))
print(now.strftime('%B %d, %Y'))
print(now.strftime('%d %b, %Y'))
print(now.strftime('%A'))

2020-11-25
2020-11-25
11/25/20
November 25, 2020
25 Nov, 2020
Wednesday


# Check type
print(type(now.strftime('%Y-%m-%d')))

<class 'str'>


xmas_day = '2020-12-25'
datetime.strptime(xmas_day, '%Y-%m-%d')

datetime.datetime(2020, 12, 25, 0, 0)


random_day = '20200422T203448'
datetime.strptime(random_day, '%Y%m%dT%H%M%S')

datetime.datetime(2020, 4, 22, 20, 34, 48)


# Import parse
from dateutil.parser import parse


# Parse various strings
xmas_day = '2020-12-25'
ind_day = '4th of July, 2015'
random_day = 'Nov 05, 2020 10:45 PM'
random_day2 = '20200422T203448'

print(parse(xmas_day))
print(parse(ind_day))
print(parse(random_day))
print(parse(random_day2))

2020-12-25 00:00:00
2015-07-04 00:00:00
2020-11-05 22:45:00
2020-04-22 20:34:48


# Check type
print(type(parse(xmas_day)))

<class 'datetime.datetime'>


# Create timestamp object
ind_day = pd.to_datetime('4th of July, 2020')
ind_day

Timestamp('2020-07-04 00:00:00')


# Get full name of Month using strftime
ind_day.strftime('%B')

'July'


dates = pd.to_datetime([datetime(2020, 12, 25), '4th of July, 2020',
                       '2018-Oct-6', '07-07-2017', '20200508', '20200422T203448'])
dates

DatetimeIndex(['2020-12-25 00:00:00', '2020-07-04 00:00:00',
               '2018-10-06 00:00:00', '2017-07-07 00:00:00',
               '2020-05-08 00:00:00', '2020-04-22 20:34:48'],
              dtype='datetime64[ns]', freq=None)


# Create Timestamp
pd.to_datetime(1349720105)

Timestamp('1970-01-01 00:00:01.349720105')


# Create DatetimeIndex with seconds
pd.to_datetime([1349720105, 1349806505, 1349892905,
               1349979305, 1350065705], unit='s')

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)


# Create DatetimeIndex with milliseconds
pd.to_datetime([1349720105, 1349806505, 1349892905,
               1349979305, 1350065705], unit='ms')

DatetimeIndex(['1970-01-16 14:55:20.105000', '1970-01-16 14:56:46.505000',
               '1970-01-16 14:58:12.905000', '1970-01-16 14:59:39.305000',
               '1970-01-16 15:01:05.705000'],
              dtype='datetime64[ns]', freq=None)


pd.to_datetime([1349720105, 1349806505, 1349892905,
               1349979305, 1350065705], unit='ns')

DatetimeIndex(['1970-01-01 00:00:01.349720105',
               '1970-01-01 00:00:01.349806505',
               '1970-01-01 00:00:01.349892905',
               '1970-01-01 00:00:01.349979305',
               '1970-01-01 00:00:01.350065705'],
              dtype='datetime64[ns]', freq=None)


# Create daily time periods
period_daily = dates.to_period('D')
period_daily

PeriodIndex(['2020-12-25', '2020-07-04', '2018-10-06', '2017-07-07',
             '2020-05-08', '2020-04-22'],
            dtype='period[D]', freq='D')


# Start time of a Period
period_daily.start_time

DatetimeIndex(['2020-12-25', '2020-07-04', '2018-10-06', '2017-07-07',
               '2020-05-08', '2020-04-22'],
              dtype='datetime64[ns]', freq=None)


# End time of a Period
period_daily.end_time

DatetimeIndex(['2020-12-25 23:59:59.999999999',
               '2020-07-04 23:59:59.999999999',
               '2018-10-06 23:59:59.999999999',
               '2017-07-07 23:59:59.999999999',
               '2020-05-08 23:59:59.999999999',
               '2020-04-22 23:59:59.999999999'],
              dtype='datetime64[ns]', freq=None)


# Create time period
p1 = pd.Period('2020-12-25')
print('Period is: ', p1)

# Create time stamp
t1 = pd.Timestamp('2020-12-25 18:12')
print('Timestamp is: ', t1)

# Test Time interval
p1.start_time < t1 < p1.end_time

Period is:  2020-12-25
Timestamp is:  2020-12-25 18:12:00

True


# Subtract 30 days 
period_daily - 30

PeriodIndex(['2020-11-25', '2020-06-04', '2018-09-06', '2017-06-07',
             '2020-04-08', '2020-03-23'],
            dtype='period[D]', freq='D')


# Add 10 days
period_daily + 10

PeriodIndex(['2021-01-04', '2020-07-14', '2018-10-16', '2017-07-17',
             '2020-05-18', '2020-05-02'],
            dtype='period[D]', freq='D')


# Create monthly frequency
period_monthly = dates.to_period('M')
period_monthly

PeriodIndex(['2020-12', '2020-07', '2018-10', '2017-07', '2020-05', '2020-04'], dtype='period[M]', freq='M')


# Subtract 12 months
period_monthly - 12

PeriodIndex(['2019-12', '2019-07', '2017-10', '2016-07', '2019-05', '2019-04'], dtype='period[M]', freq='M')


# Add 10 months
period_monthly + 10

PeriodIndex(['2021-10', '2021-05', '2019-08', '2018-05', '2021-03', '2021-02'], dtype='period[M]', freq='M')


# Subtract a specific date from dates
dates - pd.to_datetime('2020-05-15')

TimedeltaIndex([   '224 days 00:00:00',     '50 days 00:00:00',
                 '-587 days +00:00:00', '-1043 days +00:00:00',
                   '-7 days +00:00:00',   '-23 days +20:34:48'],
               dtype='timedelta64[ns]', freq=None)


# Subtract date using index
dates - dates[3]

TimedeltaIndex(['1267 days 00:00:00', '1093 days 00:00:00',
                 '456 days 00:00:00',    '0 days 00:00:00',
                '1036 days 00:00:00', '1020 days 20:34:48'],
               dtype='timedelta64[ns]', freq=None)


pd.date_range('2020-08-03', '2020-08-10')

DatetimeIndex(['2020-08-03', '2020-08-04', '2020-08-05', '2020-08-06',
               '2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10'],
              dtype='datetime64[ns]', freq='D')


dt_rng = pd.date_range('2020-08-03', periods=8)
dt_rng

DatetimeIndex(['2020-08-03', '2020-08-04', '2020-08-05', '2020-08-06',
               '2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10'],
              dtype='datetime64[ns]', freq='D')


dt_rng[0]

Timestamp('2020-08-03 00:00:00', freq='D')


# Date range with Hourly Frequency
pd.date_range('2020-08-03', periods=8, freq='H')

DatetimeIndex(['2020-08-03 00:00:00', '2020-08-03 01:00:00',
               '2020-08-03 02:00:00', '2020-08-03 03:00:00',
               '2020-08-03 04:00:00', '2020-08-03 05:00:00',
               '2020-08-03 06:00:00', '2020-08-03 07:00:00'],
              dtype='datetime64[ns]', freq='H')


# Date range with Month Start Frequency
pd.date_range('2020-02-03', periods=8, freq='MS')

DatetimeIndex(['2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01',
               '2020-07-01', '2020-08-01', '2020-09-01', '2020-10-01'],
              dtype='datetime64[ns]', freq='MS')


# Period Range with Monthly Frequency
pd.period_range('2020-02-03', periods=8, freq='M')

PeriodIndex(['2020-02', '2020-03', '2020-04', '2020-05', '2020-06', '2020-07',
             '2020-08', '2020-09'],
            dtype='period[M]', freq='M')


prd_rng = pd.period_range('2020-02-03', periods=8, freq='D')
prd_rng

PeriodIndex(['2020-02-03', '2020-02-04', '2020-02-05', '2020-02-06',
             '2020-02-07', '2020-02-08', '2020-02-09', '2020-02-10'],
            dtype='period[D]', freq='D')


# Print start and end times
print('Start time for period at 0 index: ', prd_rng[0].start_time)
print('End time for period at 0 index: ', prd_rng[0].end_time)

Start time for period at 0 index:  2020-02-03 00:00:00
End time for period at 0 index:  2020-02-03 23:59:59.999999999


# Time Deltas with daily frequency
pd.timedelta_range(start='1 day', periods=6)

TimedeltaIndex(['1 days', '2 days', '3 days', '4 days', '5 days', '6 days'], dtype='timedelta64[ns]', freq='D')


# Time deltas with hourly frequency
pd.timedelta_range(0, periods=8, freq='H')

TimedeltaIndex(['00:00:00', '01:00:00', '02:00:00', '03:00:00', '04:00:00',
                '05:00:00', '06:00:00', '07:00:00'],
               dtype='timedelta64[ns]', freq='H')


# Time deltas with a 6 hour frequency
pd.timedelta_range(start='1 day', end='3 days', freq='6H')

TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
                '1 days 18:00:00', '2 days 00:00:00', '2 days 06:00:00',
                '2 days 12:00:00', '2 days 18:00:00', '3 days 00:00:00'],
               dtype='timedelta64[ns]', freq='6H')


pd.date_range('2020-08-03', periods=10, freq='1H30T')

DatetimeIndex(['2020-08-03 00:00:00', '2020-08-03 01:30:00',
               '2020-08-03 03:00:00', '2020-08-03 04:30:00',
               '2020-08-03 06:00:00', '2020-08-03 07:30:00',
               '2020-08-03 09:00:00', '2020-08-03 10:30:00',
               '2020-08-03 12:00:00', '2020-08-03 13:30:00'],
              dtype='datetime64[ns]', freq='90T')


pd.timedelta_range(0, periods=10, freq='1D5H30T')

TimedeltaIndex([ '0 days 00:00:00',  '1 days 05:30:00',  '2 days 11:00:00',
                 '3 days 16:30:00',  '4 days 22:00:00',  '6 days 03:30:00',
                 '7 days 09:00:00',  '8 days 14:30:00',  '9 days 20:00:00',
                '11 days 01:30:00'],
               dtype='timedelta64[ns]', freq='1770T')


# Get stock data
from pandas_datareader import data

ge = data.DataReader('GE', start='2010', end='2021',
                       data_source='yahoo')
ge.head()


# Check data type
ge.index.dtype

dtype('<M8[ns]')


# Check data type
ge.index[0]

Timestamp('2010-01-04 00:00:00')


ge.loc['2015-07-06',:]

High         2.561539e+01
Low          2.519231e+01
Open         2.550961e+01
Close        2.529808e+01
Volume       2.897240e+07
Adj Close    2.216700e+01
Name: 2015-07-06 00:00:00, dtype: float64


print(ge.loc['07/06/2015',:])
print(ge.loc['20150706',:])
print(ge.loc[datetime(2015, 7, 6),:])

High         2.561539e+01
Low          2.519231e+01
Open         2.550961e+01
Close        2.529808e+01
Volume       2.897240e+07
Adj Close    2.216700e+01
Name: 2015-07-06 00:00:00, dtype: float64
High         2.561539e+01
Low          2.519231e+01
Open         2.550961e+01
Close        2.529808e+01
Volume       2.897240e+07
Adj Close    2.216700e+01
Name: 2015-07-06 00:00:00, dtype: float64
High         2.561539e+01
Low          2.519231e+01
Open         2.550961e+01
Close        2.529808e+01
Volume       2.897240e+07
Adj Close    2.216700e+01
Name: 2015-07-06 00:00:00, dtype: float64


# Using year
ge.loc['2020']


# Using year-month
ge.loc['2020-10']


# Slice using Dates
ge.loc['2020-05-04':'2020-05-12']


# Slice using year-month
ge.loc['2020-05':'2020-07']


ge = ge['Close']
ge.head()

Date
2010-01-04    14.855769
2010-01-05    14.932693
2010-01-06    14.855769
2010-01-07    15.625000
2010-01-08    15.961538
Name: Close, dtype: float64


# Using resample()
ge.resample('BA').mean()

Date
2010-12-31    15.893658
2011-12-30    17.434333
2012-12-31    19.453038
2013-12-31    23.083371
2014-12-31    24.994277
2015-12-31    25.767819
2016-12-30    29.180174
2017-12-29    24.972418
2018-12-31    12.402774
2019-12-31     9.782701
2020-12-31     7.963930
Freq: BA-DEC, Name: Close, dtype: float64


# Using asfreq()
ge.asfreq('BA')

Date
2010-12-31    17.586538
2011-12-30    17.221153
2012-12-31    20.182692
2013-12-31    26.951923
2014-12-31    24.298077
2015-12-31    29.951923
2016-12-30    30.384615
2017-12-29    16.778847
2018-12-31     7.278846
2019-12-31    11.160000
Freq: BA-DEC, Name: Close, dtype: float64


# Import Plotting libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn; seaborn.set()


# Plot data
plt.figure(figsize=(15,10))
ge.plot(alpha=0.5, style='-')
ge.resample('BA').mean().plot(style=':')
ge.asfreq('BA').plot(style='--');
plt.legend(['input', 'resample', 'asfreq'],
           loc='upper left');


# Subset Data
ge_up_data = ge.iloc[:10]
ge_up_data

Date
2010-01-04    14.855769
2010-01-05    14.932693
2010-01-06    14.855769
2010-01-07    15.625000
2010-01-08    15.961538
2010-01-11    16.115385
2010-01-12    16.125000
2010-01-13    16.182692
2010-01-14    16.057692
2010-01-15    15.807693
Name: Close, dtype: float64


# Upsample with Daily frequency
ge_up_data.asfreq('D')

Date
2010-01-04    14.855769
2010-01-05    14.932693
2010-01-06    14.855769
2010-01-07    15.625000
2010-01-08    15.961538
2010-01-09          NaN
2010-01-10          NaN
2010-01-11    16.115385
2010-01-12    16.125000
2010-01-13    16.182692
2010-01-14    16.057692
2010-01-15    15.807693
Freq: D, Name: Close, dtype: float64


# Using forward fill
ge_up_data.asfreq('D', method='ffill')

Date
2010-01-04    14.855769
2010-01-05    14.932693
2010-01-06    14.855769
2010-01-07    15.625000
2010-01-08    15.961538
2010-01-09    15.961538
2010-01-10    15.961538
2010-01-11    16.115385
2010-01-12    16.125000
2010-01-13    16.182692
2010-01-14    16.057692
2010-01-15    15.807693
Freq: D, Name: Close, dtype: float64


fig, ax = plt.subplots(2, figsize=(10,8), sharex=True)
ge_up_data.asfreq('D').plot(ax=ax[0])

ge_up_data.asfreq('D', method='bfill').plot(ax=ax[1], style='-o')
ge_up_data.asfreq('D', method='ffill').plot(ax=ax[1], style='--o')
ax[1].legend(["back-fill", "forward-fill"]);


# Print Data
ge_up_data

Date
2010-01-04    14.855769
2010-01-05    14.932693
2010-01-06    14.855769
2010-01-07    15.625000
2010-01-08    15.961538
2010-01-11    16.115385
2010-01-12    16.125000
2010-01-13    16.182692
2010-01-14    16.057692
2010-01-15    15.807693
Name: Close, dtype: float64


# Shift Forward
ge_up_data.shift(2)

Date
2010-01-04          NaN
2010-01-05          NaN
2010-01-06    14.855769
2010-01-07    14.932693
2010-01-08    14.855769
2010-01-11    15.625000
2010-01-12    15.961538
2010-01-13    16.115385
2010-01-14    16.125000
2010-01-15    16.182692
Name: Close, dtype: float64


# Shift Backward
ge_up_data.shift(-2)

Date
2010-01-04    14.855769
2010-01-05    15.625000
2010-01-06    15.961538
2010-01-07    16.115385
2010-01-08    16.125000
2010-01-11    16.182692
2010-01-12    16.057692
2010-01-13    15.807693
2010-01-14          NaN
2010-01-15          NaN
Name: Close, dtype: float64


# Shift backward with Index
ge_up_data.tshift(-2)

Date
2009-12-31    14.855769
2010-01-01    14.932693
2010-01-04    14.855769
2010-01-05    15.625000
2010-01-06    15.961538
2010-01-07    16.115385
2010-01-08    16.125000
2010-01-11    16.182692
2010-01-12    16.057692
2010-01-13    15.807693
Freq: B, Name: Close, dtype: float64


fig, ax = plt.subplots(3, figsize=(15,8), sharey=True)

# apply a frequency to the data
ge = ge.asfreq('D', method='pad')

# shift the data
ge.plot(ax=ax[0])
ge.shift(900).plot(ax=ax[1])
ge.tshift(900).plot(ax=ax[2])

# legends and annotations
local_max = pd.to_datetime('2012-05-05')
offset = pd.Timedelta(900, 'D')

ax[0].legend(['input'], loc=2)
ax[0].get_xticklabels()[2].set(weight='heavy', color='red')
ax[0].axvline(local_max, alpha=0.3, color='red')

ax[1].legend(['shift(900)'], loc=2)
ax[1].get_xticklabels()[2].set(weight='heavy', color='red')
ax[1].axvline(local_max + offset, alpha=0.3, color='red')

ax[2].legend(['tshift(900)'], loc=2)
ax[2].get_xticklabels()[1].set(weight='heavy', color='red')
ax[2].axvline(local_max + offset, alpha=0.3, color='red');


plt.figure(figsize=(15,8))

# plot data
ge.plot()

# plot 250 day rolling mean
ge.rolling(250).mean().plot(style='--');


# print data
ge.head()

Date
2010-01-04    14.855769
2010-01-05    14.932693
2010-01-06    14.855769
2010-01-07    15.625000
2010-01-08    15.961538
Freq: D, Name: Close, dtype: float64


# check timezone
print(ge.index.tz)

None


# localize timezone
ge_utc = ge.tz_localize('UTC')
ge_utc.index

DatetimeIndex(['2010-01-04 00:00:00+00:00', '2010-01-05 00:00:00+00:00',
               '2010-01-06 00:00:00+00:00', '2010-01-07 00:00:00+00:00',
               '2010-01-08 00:00:00+00:00', '2010-01-09 00:00:00+00:00',
               '2010-01-10 00:00:00+00:00', '2010-01-11 00:00:00+00:00',
               '2010-01-12 00:00:00+00:00', '2010-01-13 00:00:00+00:00',
               ...
               '2020-11-16 00:00:00+00:00', '2020-11-17 00:00:00+00:00',
               '2020-11-18 00:00:00+00:00', '2020-11-19 00:00:00+00:00',
               '2020-11-20 00:00:00+00:00', '2020-11-21 00:00:00+00:00',
               '2020-11-22 00:00:00+00:00', '2020-11-23 00:00:00+00:00',
               '2020-11-24 00:00:00+00:00', '2020-11-25 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='Date', length=3979, freq='D')


ge_ny = ge_utc.tz_convert('America/New_York')
ge_ny.index

DatetimeIndex(['2010-01-03 19:00:00-05:00', '2010-01-04 19:00:00-05:00',
               '2010-01-05 19:00:00-05:00', '2010-01-06 19:00:00-05:00',
               '2010-01-07 19:00:00-05:00', '2010-01-08 19:00:00-05:00',
               '2010-01-09 19:00:00-05:00', '2010-01-10 19:00:00-05:00',
               '2010-01-11 19:00:00-05:00', '2010-01-12 19:00:00-05:00',
               ...
               '2020-11-15 19:00:00-05:00', '2020-11-16 19:00:00-05:00',
               '2020-11-17 19:00:00-05:00', '2020-11-18 19:00:00-05:00',
               '2020-11-19 19:00:00-05:00', '2020-11-20 19:00:00-05:00',
               '2020-11-21 19:00:00-05:00', '2020-11-22 19:00:00-05:00',
               '2020-11-23 19:00:00-05:00', '2020-11-24 19:00:00-05:00'],
              dtype='datetime64[ns, America/New_York]', name='Date', length=3979, freq='D')


# Localize epoch as a timestamp with US/Pacific timezone
pd.Timestamp(1262347200000000000).tz_localize('US/Pacific')

Timestamp('2010-01-01 12:00:00-0800', tz='US/Pacific')


# Localize epoch as a DatetimeIndex with UTC timezone
pd.DatetimeIndex([1262347200000000000]).tz_localize('UTC')

DatetimeIndex(['2010-01-01 12:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None)


# create two time series with different time zones
ge_la = ge_utc.tz_convert('America/Los_Angeles')

ts1 = ge_ny[:7]
print(ts1)
print('Timezone of ts1: ', ts1.index.tz)
print()

ts2 = ge_la[:7]
print(ts2)
print('Timezone of ts2: ', ts2.index.tz)

Date
2010-01-03 19:00:00-05:00    14.855769
2010-01-04 19:00:00-05:00    14.932693
2010-01-05 19:00:00-05:00    14.855769
2010-01-06 19:00:00-05:00    15.625000
2010-01-07 19:00:00-05:00    15.961538
2010-01-08 19:00:00-05:00    15.961538
2010-01-09 19:00:00-05:00    15.961538
Freq: D, Name: Close, dtype: float64
Timezone of ts1:  America/New_York

Date
2010-01-03 16:00:00-08:00    14.855769
2010-01-04 16:00:00-08:00    14.932693
2010-01-05 16:00:00-08:00    14.855769
2010-01-06 16:00:00-08:00    15.625000
2010-01-07 16:00:00-08:00    15.961538
2010-01-08 16:00:00-08:00    15.961538
2010-01-09 16:00:00-08:00    15.961538
Freq: D, Name: Close, dtype: float64
Timezone of ts2:  America/Los_Angeles


# add two time series
ts3 = ts1 + ts2
print(ts3)
print('Timezone of ts3: ', ts3.index.tz)

Date
2010-01-04 00:00:00+00:00    29.711538
2010-01-05 00:00:00+00:00    29.865385
2010-01-06 00:00:00+00:00    29.711538
2010-01-07 00:00:00+00:00    31.250000
2010-01-08 00:00:00+00:00    31.923077
2010-01-09 00:00:00+00:00    31.923077
2010-01-10 00:00:00+00:00    31.923077
Freq: D, Name: Close, dtype: float64
Timezone of ts3:  UTC


# import library
import pytz


# common time zones
pytz.common_timezones[-10:]

['Pacific/Wake',
 'Pacific/Wallis',
 'US/Alaska',
 'US/Arizona',
 'US/Central',
 'US/Eastern',
 'US/Hawaii',
 'US/Mountain',
 'US/Pacific',
 'UTC']


tz = pytz.timezone('US/Pacific')
tz

<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>


# Get item with data
import os
from arcgis import GIS

gis = GIS()
earthquake_item = gis.content.get('008fcafa23a24351b4f37f7c8a542cb1')
earthquake_item


# Download data item

# Create folder for file download
data_folder = '../../samples_data'
if not os.path.exists(data_folder):
    os.makedirs(data_folder)
    print(f'Created data folder at: {data_folder}')
else:
    print(f'Using existing data folder at: {data_folder}')
    
# Download file
filename = 'earthquakes_data.csv'
if not os.path.exists(os.path.join(data_folder, filename)):
    earthquake_item.download(data_folder, 'earthquakes_data.csv')
    print(f'{filename} downloaded')
else:
    print(f'{filename} exists')

Created data folder at: ../../samples_data
earthquakes_data.csv downloaded


# Read data
quake_data = pd.read_csv('./samples_data/earthquakes_data.csv', parse_dates=['datetime'])
quake_data.head()


# Check Data Types
quake_data.dtypes

datetime     datetime64[ns]
latitude            float64
longitude           float64
depth               float64
magnitude           float64
dtype: object


# Create new columns
quake_data['date'] = quake_data['datetime'].dt.date
quake_data['time'] = quake_data['datetime'].dt.time
quake_data['year'] = quake_data['datetime'].dt.year
quake_data['month'] = quake_data['datetime'].dt.month
quake_data['day'] = quake_data['datetime'].dt.day
quake_data['hour'] = quake_data['datetime'].dt.hour
quake_data['minute'] = quake_data['datetime'].dt.minute
quake_data['second'] = quake_data['datetime'].dt.second


# Check dataset
quake_data.head()


# Check data types
quake_data.dtypes

datetime     datetime64[ns]
latitude            float64
longitude           float64
depth               float64
magnitude           float64
date                 object
time                 object
year                  int64
month                 int64
day                   int64
hour                  int64
minute                int64
second                int64
dtype: object


quake_data['date'] = pd.to_datetime(quake_data['date'])
quake_data.dtypes

datetime     datetime64[ns]
latitude            float64
longitude           float64
depth               float64
magnitude           float64
date         datetime64[ns]
time                 object
year                  int64
month                 int64
day                   int64
hour                  int64
minute                int64
second                int64
dtype: object


# Create data
quake_data.drop(columns=['datetime'], inplace=True)
quake_data.head()


# Create new datetime column
quake_data['new_datetime'] = pd.to_datetime(quake_data[["year", "month", "day", "hour", "minute", "second"]])
quake_data.head()


# Check data types
quake_data.dtypes

latitude               float64
longitude              float64
depth                  float64
magnitude              float64
date            datetime64[ns]
time                    object
year                     int64
month                    int64
day                      int64
hour                     int64
minute                   int64
second                   int64
new_datetime    datetime64[ns]
dtype: object

	High	Low	Open	Close	Volume	Adj Close
Date
2010-01-04	15.038462	14.567307	14.634615	14.855769	69763000.0	10.840267
2010-01-05	15.067307	14.855769	14.865385	14.932693	67132600.0	10.896401
2010-01-06	15.019231	14.846154	14.932693	14.855769	57683400.0	10.840267
2010-01-07	15.846154	14.836538	14.884615	15.625000	192891100.0	11.401575
2010-01-08	16.048077	15.644231	15.682693	15.961538	119717100.0	11.647147

	High	Low	Open	Close	Volume	Adj Close
Date
2020-01-02	11.96	11.23	11.23	11.93	87421800.0	11.880686
2020-01-03	12.00	11.53	11.57	11.97	85885800.0	11.920521
2020-01-06	12.21	11.84	11.84	12.14	111948700.0	12.089818
2020-01-07	12.24	11.92	12.15	12.05	70579300.0	12.000189
2020-01-08	12.05	11.87	11.99	11.94	55402500.0	11.890644
...	...	...	...	...	...	...
2020-11-19	9.76	9.51	9.62	9.66	87177500.0	9.660000
2020-11-20	9.83	9.59	9.64	9.76	79923400.0	9.760000
2020-11-23	10.27	9.86	9.86	10.07	108197300.0	10.070000
2020-11-24	10.85	10.40	10.71	10.45	175891500.0	10.450000
2020-11-25	10.56	10.34	10.53	10.50	107645396.0	10.500000

	High	Low	Open	Close	Volume	Adj Close
Date
2020-10-01	6.29	6.11	6.27	6.24	79175600.0	6.24
2020-10-02	6.40	6.05	6.05	6.39	90076400.0	6.39
2020-10-05	6.45	6.32	6.39	6.41	58283600.0	6.41
2020-10-06	6.58	6.11	6.43	6.17	170066200.0	6.17
2020-10-07	6.40	6.21	6.22	6.31	83286100.0	6.31
2020-10-08	6.67	6.34	6.36	6.65	103167300.0	6.65
2020-10-09	7.07	6.70	7.07	6.84	171507500.0	6.84
2020-10-12	6.92	6.74	6.92	6.83	89036400.0	6.83
2020-10-13	6.82	6.66	6.79	6.72	75287600.0	6.72
2020-10-14	6.89	6.72	6.72	6.82	98076200.0	6.82
2020-10-15	6.88	6.61	6.70	6.87	89252700.0	6.87
2020-10-16	7.35	6.94	6.96	7.29	169147300.0	7.29
2020-10-19	7.47	7.23	7.39	7.29	130837100.0	7.29
2020-10-20	7.42	7.27	7.35	7.34	98420100.0	7.34
2020-10-21	7.41	7.27	7.28	7.32	73811100.0	7.32
2020-10-22	7.75	7.32	7.33	7.72	95766900.0	7.72
2020-10-23	8.03	7.56	7.93	7.63	132563200.0	7.63
2020-10-26	7.56	7.28	7.46	7.38	104254400.0	7.38
2020-10-27	7.40	7.09	7.40	7.10	98170000.0	7.10
2020-10-28	7.86	7.41	7.51	7.42	253494100.0	7.42
2020-10-29	7.74	7.31	7.66	7.37	123298000.0	7.37
2020-10-30	7.54	7.29	7.34	7.42	102370100.0	7.42

	High	Low	Open	Close	Volume	Adj Close
Date
2020-05-04	6.31	6.15	6.30	6.21	136852400.0	6.190472
2020-05-05	6.46	6.16	6.28	6.20	116998500.0	6.180502
2020-05-06	6.25	5.97	6.20	5.98	117253600.0	5.961195
2020-05-07	6.26	6.06	6.06	6.11	100663300.0	6.090786
2020-05-08	6.33	6.16	6.21	6.29	93934600.0	6.270220
2020-05-11	6.25	6.13	6.24	6.19	71843000.0	6.170535
2020-05-12	6.28	6.00	6.22	6.00	95652200.0	5.981132

	High	Low	Open	Close	Volume	Adj Close
Date
2020-05-01	6.74	6.41	6.67	6.50	120376500.0	6.479559
2020-05-04	6.31	6.15	6.30	6.21	136852400.0	6.190472
2020-05-05	6.46	6.16	6.28	6.20	116998500.0	6.180502
2020-05-06	6.25	5.97	6.20	5.98	117253600.0	5.961195
2020-05-07	6.26	6.06	6.06	6.11	100663300.0	6.090786
...	...	...	...	...	...	...
2020-07-27	6.85	6.69	6.84	6.71	70704000.0	6.698927
2020-07-28	6.96	6.69	6.70	6.89	76033600.0	6.878630
2020-07-29	7.00	6.52	6.99	6.59	148442400.0	6.579125
2020-07-30	6.51	6.26	6.50	6.26	127526900.0	6.249670
2020-07-31	6.29	6.00	6.25	6.07	142731700.0	6.059984

Part 5 - Working with Time Series Data¶

Table of Contents

Date and Time in Python¶

Building `datetime` object¶

Converting between String and DateTime¶

Using `strftime` and `strptime`¶

Using `parser.parse`¶

Date and Time in Pandas¶

The Basics¶

`DatetimeIndex`¶

`PeriodIndex`¶

Arithmetic Operations¶

`TimedeltaIndex`¶

Date Range and Frequency¶

Combining Frequency Codes¶

Indexing and Selection¶

Resampling, Shifting, and Windowing¶

Resampling¶

Downsample Plot¶

Upsample Plot¶

Shifting¶

Plot the Data¶

Rolling Window¶

Time Zones¶

Localization and Conversion¶

Operating between TIme Zones¶

Common Time Zones¶

Common Use Cases¶

Import data with date/time¶

Split into multiple columns¶

Combine columns with Date/Time information¶

Conclusion¶

References¶

	datetime	latitude	longitude	depth	magnitude
0	1973-08-09 02:18:00	40.260	-124.233	2.0	5.1
1	1976-11-27 02:49:00	40.998	-120.447	5.0	5.0
2	1977-02-22 06:24:00	38.478	-119.287	5.0	5.0
3	1978-09-04 21:54:00	38.814	-119.811	14.0	5.2
4	1979-10-07 20:54:00	38.224	-119.348	11.0	5.2

	latitude	longitude	depth	magnitude	date	time	year	month	day	hour	minute
0	40.260	-124.233	2.0	5.1	1973-08-09	02:18:00	1973	8	9	2	18
1	40.998	-120.447	5.0	5.0	1976-11-27	02:49:00	1976	11	27	2	49
2	38.478	-119.287	5.0	5.0	1977-02-22	06:24:00	1977	2	22	6	24
3	38.814	-119.811	14.0	5.2	1978-09-04	21:54:00	1978	9	4	21	54
4	38.224	-119.348	11.0	5.2	1979-10-07	20:54:00	1979	10	7	20	54

Part 5 - Working with Time Series Data¶

Table of Contents

Date and Time in Python¶

Building datetime object¶

Converting between String and DateTime¶

Using strftime and strptime¶

Using parser.parse¶

Date and Time in Pandas¶

The Basics¶

DatetimeIndex¶

PeriodIndex¶

Arithmetic Operations¶

TimedeltaIndex¶

Date Range and Frequency¶

Combining Frequency Codes¶

Indexing and Selection¶

Resampling, Shifting, and Windowing¶

Resampling¶

Downsample Plot¶

Upsample Plot¶

Shifting¶

Plot the Data¶

Rolling Window¶

Time Zones¶

Localization and Conversion¶

Operating between TIme Zones¶

Common Time Zones¶

Common Use Cases¶

Import data with date/time¶

Split into multiple columns¶

Combine columns with Date/Time information¶

Conclusion¶

References¶

Building `datetime` object¶

Using `strftime` and `strptime`¶

Using `parser.parse`¶

`DatetimeIndex`¶

`PeriodIndex`¶

`TimedeltaIndex`¶