Module marketools.stqscraper.stockquotes
Expand source code
from . import get_storage_status, get_storage_dir
import pandas as pd
import numpy as np
from os import path
from urllib.request import urlretrieve
from datetime import datetime, timedelta
import warnings
def read_ohlcv_from_csv(file_path):
"""
Reads and returns OHLCV data from CSV file.
Parameters
----------
file_path : str or os.path
path to CSV file with OHLCV data
Returns
-------
pandas.DataFrame
"""
output = pd.read_csv(file_path,
index_col='Date',
parse_dates=['Date'],
date_parser=lambda x: datetime.strptime(x, '%Y-%m-%d'))
output['Volume'] = output['Volume'].astype(np.float64)
return output
class StockQuotes:
check_for_update = True # if True OHLC data will be checked for updates
update_period = 24 # time in hours, how often data are checked for updates
update_hour = 20 # full hour after that the data are checked for update
def __init__(self, ticker):
self.ticker = ticker
self._historical_ohlc = dict(d=None, w=None, m=None, q=None, y=None)
@property
def data(self):
warnings.warn('data is depracted, use ohlc_d instead',
DeprecationWarning)
if self._historical_ohlc['d'] is None:
self._historical_ohlc['d'] = self._get_data(interval='d')
return self._historical_ohlc['d']
def ohlc(self, interval='d'):
if self._historical_ohlc[interval] is None:
self._historical_ohlc[interval] = self._get_data(interval=interval)
return self._historical_ohlc[interval]
@property
def ohlc_d(self):
return self.ohlc(interval='d')
@property
def ohlc_w(self):
return self.ohlc(interval='w')
@property
def ohlc_m(self):
return self.ohlc(interval='m')
@property
def ohlc_q(self):
return self.ohlc(interval='q')
@property
def ohlc_y(self):
return self.ohlc(interval='y')
def csv_file_path(self, interval='d'):
if get_storage_status():
output = path.join(get_storage_dir(),
f'{self.ticker}_ohcl_{interval}.csv')
else:
output = None
return output
def download_ohlc_from_stooq(self, interval='d'):
"""
Download CSV file with OHLC data from Stooq.com and reads the data into
DataFrame. Returns None if daily hits limit for Stooq is exceeded.
Parameters
----------
interval : str
single letter defining the interval for OHLC data:
d - day (default), w - weekly, m - monthly, q - quarterly,
y - yearly
Returns
-------
pandas.DataFrame
"""
url = f'http://stooq.com/q/d/l/?i={interval}&s={self.ticker}'
file_path, _ = urlretrieve(url)
try:
output = read_ohlcv_from_csv(file_path)
except ValueError: # Stooq: Exceeded the daily hits limit
output = None
return output
def _get_data(self, interval='d'):
update_required = self.check_for_update # assuming that update will be required
output = pd.DataFrame()
# time info
time_now = datetime.now()
weekday_now = datetime.weekday(time_now)
is_weekend = True if weekday_now in (5, 6) else False
# calculate expected date for last OHLC data, consider only weekdays
# from Mo-Fr, assume that last update was one day earlier
delta_days = (weekday_now - 4) if is_weekend else 0
expected_ohlc_time = time_now - timedelta(days=delta_days)
# file with data for ticker exists
if get_storage_status() and path.exists(self.csv_file_path(interval=interval)):
timestamp_now = datetime.timestamp(time_now)
timestamp_up = path.getatime(self.csv_file_path(interval=interval)) # CSV file modification time
# CSV updated within last 24 hours or it is weekend (no new data)
if (timestamp_now - timestamp_up < StockQuotes.update_period * 3600) or is_weekend:
output = read_ohlcv_from_csv(self.csv_file_path(interval=interval))
last_ohlc_time = output.iloc[-1].name
updated_data = last_ohlc_time.date() == expected_ohlc_time.date()
session_time = time_now.hour < StockQuotes.update_hour and not is_weekend
if updated_data or session_time:
update_required = False
if update_required:
# update CSV file and read data
new_output = self.download_ohlc_from_stooq(interval=interval)
if not new_output.empty:
# Updated data downloaded - update output
output = new_output
# save to CSV
if get_storage_status():
new_output.to_csv(self.csv_file_path(interval=interval))
else:
# Update error (Stooq: Exceeded the daily hits limit)
pass
if not output.empty:
output.sort_index(ascending=True, inplace=True)
return output
if __name__ == '__main__':
pass
Functions
def read_ohlcv_from_csv(file_path)
-
Reads and returns OHLCV data from CSV file.
Parameters
file_path
:str
oros.path
- path to CSV file with OHLCV data
Returns
pandas.DataFrame
Expand source code
def read_ohlcv_from_csv(file_path): """ Reads and returns OHLCV data from CSV file. Parameters ---------- file_path : str or os.path path to CSV file with OHLCV data Returns ------- pandas.DataFrame """ output = pd.read_csv(file_path, index_col='Date', parse_dates=['Date'], date_parser=lambda x: datetime.strptime(x, '%Y-%m-%d')) output['Volume'] = output['Volume'].astype(np.float64) return output
Classes
class StockQuotes (ticker)
-
Expand source code
class StockQuotes: check_for_update = True # if True OHLC data will be checked for updates update_period = 24 # time in hours, how often data are checked for updates update_hour = 20 # full hour after that the data are checked for update def __init__(self, ticker): self.ticker = ticker self._historical_ohlc = dict(d=None, w=None, m=None, q=None, y=None) @property def data(self): warnings.warn('data is depracted, use ohlc_d instead', DeprecationWarning) if self._historical_ohlc['d'] is None: self._historical_ohlc['d'] = self._get_data(interval='d') return self._historical_ohlc['d'] def ohlc(self, interval='d'): if self._historical_ohlc[interval] is None: self._historical_ohlc[interval] = self._get_data(interval=interval) return self._historical_ohlc[interval] @property def ohlc_d(self): return self.ohlc(interval='d') @property def ohlc_w(self): return self.ohlc(interval='w') @property def ohlc_m(self): return self.ohlc(interval='m') @property def ohlc_q(self): return self.ohlc(interval='q') @property def ohlc_y(self): return self.ohlc(interval='y') def csv_file_path(self, interval='d'): if get_storage_status(): output = path.join(get_storage_dir(), f'{self.ticker}_ohcl_{interval}.csv') else: output = None return output def download_ohlc_from_stooq(self, interval='d'): """ Download CSV file with OHLC data from Stooq.com and reads the data into DataFrame. Returns None if daily hits limit for Stooq is exceeded. Parameters ---------- interval : str single letter defining the interval for OHLC data: d - day (default), w - weekly, m - monthly, q - quarterly, y - yearly Returns ------- pandas.DataFrame """ url = f'http://stooq.com/q/d/l/?i={interval}&s={self.ticker}' file_path, _ = urlretrieve(url) try: output = read_ohlcv_from_csv(file_path) except ValueError: # Stooq: Exceeded the daily hits limit output = None return output def _get_data(self, interval='d'): update_required = self.check_for_update # assuming that update will be required output = pd.DataFrame() # time info time_now = datetime.now() weekday_now = datetime.weekday(time_now) is_weekend = True if weekday_now in (5, 6) else False # calculate expected date for last OHLC data, consider only weekdays # from Mo-Fr, assume that last update was one day earlier delta_days = (weekday_now - 4) if is_weekend else 0 expected_ohlc_time = time_now - timedelta(days=delta_days) # file with data for ticker exists if get_storage_status() and path.exists(self.csv_file_path(interval=interval)): timestamp_now = datetime.timestamp(time_now) timestamp_up = path.getatime(self.csv_file_path(interval=interval)) # CSV file modification time # CSV updated within last 24 hours or it is weekend (no new data) if (timestamp_now - timestamp_up < StockQuotes.update_period * 3600) or is_weekend: output = read_ohlcv_from_csv(self.csv_file_path(interval=interval)) last_ohlc_time = output.iloc[-1].name updated_data = last_ohlc_time.date() == expected_ohlc_time.date() session_time = time_now.hour < StockQuotes.update_hour and not is_weekend if updated_data or session_time: update_required = False if update_required: # update CSV file and read data new_output = self.download_ohlc_from_stooq(interval=interval) if not new_output.empty: # Updated data downloaded - update output output = new_output # save to CSV if get_storage_status(): new_output.to_csv(self.csv_file_path(interval=interval)) else: # Update error (Stooq: Exceeded the daily hits limit) pass if not output.empty: output.sort_index(ascending=True, inplace=True) return output
Class variables
var check_for_update
var update_hour
var update_period
Instance variables
var data
-
Expand source code
@property def data(self): warnings.warn('data is depracted, use ohlc_d instead', DeprecationWarning) if self._historical_ohlc['d'] is None: self._historical_ohlc['d'] = self._get_data(interval='d') return self._historical_ohlc['d']
var ohlc_d
-
Expand source code
@property def ohlc_d(self): return self.ohlc(interval='d')
var ohlc_m
-
Expand source code
@property def ohlc_m(self): return self.ohlc(interval='m')
var ohlc_q
-
Expand source code
@property def ohlc_q(self): return self.ohlc(interval='q')
var ohlc_w
-
Expand source code
@property def ohlc_w(self): return self.ohlc(interval='w')
var ohlc_y
-
Expand source code
@property def ohlc_y(self): return self.ohlc(interval='y')
Methods
def csv_file_path(self, interval='d')
-
Expand source code
def csv_file_path(self, interval='d'): if get_storage_status(): output = path.join(get_storage_dir(), f'{self.ticker}_ohcl_{interval}.csv') else: output = None return output
def download_ohlc_from_stooq(self, interval='d')
-
Download CSV file with OHLC data from Stooq.com and reads the data into DataFrame. Returns None if daily hits limit for Stooq is exceeded.
Parameters
interval
:str
- single letter defining the interval for OHLC data: d - day (default), w - weekly, m - monthly, q - quarterly, y - yearly
Returns
pandas.DataFrame
Expand source code
def download_ohlc_from_stooq(self, interval='d'): """ Download CSV file with OHLC data from Stooq.com and reads the data into DataFrame. Returns None if daily hits limit for Stooq is exceeded. Parameters ---------- interval : str single letter defining the interval for OHLC data: d - day (default), w - weekly, m - monthly, q - quarterly, y - yearly Returns ------- pandas.DataFrame """ url = f'http://stooq.com/q/d/l/?i={interval}&s={self.ticker}' file_path, _ = urlretrieve(url) try: output = read_ohlcv_from_csv(file_path) except ValueError: # Stooq: Exceeded the daily hits limit output = None return output
def ohlc(self, interval='d')
-
Expand source code
def ohlc(self, interval='d'): if self._historical_ohlc[interval] is None: self._historical_ohlc[interval] = self._get_data(interval=interval) return self._historical_ohlc[interval]