Module marketools.stqscraper.scrapers

Expand source code
import requests
import pandas as pd
import re


def get_raw_summary_table(ticker):
    """
    Downloads and returns raw summary table from Stooq.

    Parameters
    ----------
    ticker : str
        ticker of a stock
    Returns
    -------
    pd.DataFrame
    """

    url = f'https://stooq.pl/q/g/?s={ticker}'
    html = requests.get(url).text

    # extracting table with summary
    raw_table = pd.read_html(html)[0]
    idx = raw_table.iloc[:, 0]
    raw_table.set_index(idx, inplace=True)

    return raw_table


def scrap_summary_table(ticker):
    """
    Scraps summary table with information about given stock ticker. Returns 
    dictionary with keys: Las - last price, Open - last open price, Volume - 
    last volume, EPS - EPS, P/E - P/E, P/BV - P/BV, Dividend yield % - dividend
    yield in percents. 

    Parameters
    ----------
    ticker : str
        ticker of a stock
    Returns
    -------
    dict
    """

    # get raw table    
    raw_table = get_raw_summary_table(ticker)

    # Stooq is Polish - translations needed
    keys = {
        'Last': 'Kurs',
        'Open': 'Otwarcie',
        'Volume': 'Wolumen',
        'EPS': 'EPS (ttm)',
        'P/E': 'C/Z (ttm)',
        'P/BV': 'C/WK',
        'Dividend yield %': 'Stopa dywidendy'
    }

    # creating and filling output dict 
    output_dict = dict()
    for k in keys:
        try:
            output_dict[k] = raw_table.loc[keys[k], 1]
        except KeyError:
            output_dict[k] = None

    # remove currency from price using regex
    m = re.search(r'\d+(\.\d+)?', output_dict['Last'])
    output_dict['Last'] = m.group(0)

    # remove % from dividend yield
    if output_dict['Dividend yield %']:
        output_dict['Dividend yield %'] = output_dict['Dividend yield %'][:-1] 

    for k in output_dict.keys():
        if output_dict[k]:
            output_dict[k] = float(output_dict[k])

    return output_dict


if __name__ == '__main__':
    pass

Functions

def get_raw_summary_table(ticker)

Downloads and returns raw summary table from Stooq.

Parameters

ticker : str
ticker of a stock

Returns

pd.DataFrame
 
Expand source code
def get_raw_summary_table(ticker):
    """
    Downloads and returns raw summary table from Stooq.

    Parameters
    ----------
    ticker : str
        ticker of a stock
    Returns
    -------
    pd.DataFrame
    """

    url = f'https://stooq.pl/q/g/?s={ticker}'
    html = requests.get(url).text

    # extracting table with summary
    raw_table = pd.read_html(html)[0]
    idx = raw_table.iloc[:, 0]
    raw_table.set_index(idx, inplace=True)

    return raw_table
def scrap_summary_table(ticker)

Scraps summary table with information about given stock ticker. Returns dictionary with keys: Las - last price, Open - last open price, Volume - last volume, EPS - EPS, P/E - P/E, P/BV - P/BV, Dividend yield % - dividend yield in percents.

Parameters

ticker : str
ticker of a stock

Returns

dict
 
Expand source code
def scrap_summary_table(ticker):
    """
    Scraps summary table with information about given stock ticker. Returns 
    dictionary with keys: Las - last price, Open - last open price, Volume - 
    last volume, EPS - EPS, P/E - P/E, P/BV - P/BV, Dividend yield % - dividend
    yield in percents. 

    Parameters
    ----------
    ticker : str
        ticker of a stock
    Returns
    -------
    dict
    """

    # get raw table    
    raw_table = get_raw_summary_table(ticker)

    # Stooq is Polish - translations needed
    keys = {
        'Last': 'Kurs',
        'Open': 'Otwarcie',
        'Volume': 'Wolumen',
        'EPS': 'EPS (ttm)',
        'P/E': 'C/Z (ttm)',
        'P/BV': 'C/WK',
        'Dividend yield %': 'Stopa dywidendy'
    }

    # creating and filling output dict 
    output_dict = dict()
    for k in keys:
        try:
            output_dict[k] = raw_table.loc[keys[k], 1]
        except KeyError:
            output_dict[k] = None

    # remove currency from price using regex
    m = re.search(r'\d+(\.\d+)?', output_dict['Last'])
    output_dict['Last'] = m.group(0)

    # remove % from dividend yield
    if output_dict['Dividend yield %']:
        output_dict['Dividend yield %'] = output_dict['Dividend yield %'][:-1] 

    for k in output_dict.keys():
        if output_dict[k]:
            output_dict[k] = float(output_dict[k])

    return output_dict