Skip to content

Commit

Permalink
version update & minor changes
Browse files Browse the repository at this point in the history
- extra data added to funds
- updated todo list
  • Loading branch information
alvarob96 committed May 20, 2019
1 parent 8684f33 commit 73032f6
Show file tree
Hide file tree
Showing 10 changed files with 774 additions and 550 deletions.
972 changes: 569 additions & 403 deletions .idea/workspace.xml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ install:
- pip install requests==2.22.0
- pip install lxml==4.3.3
- pip install unidecode==1.0.23
- pip install tqdm==4.32.1
- pip install investpy==0.8.4.5
- pip install pytest==4.5

Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ To conclude this section, I am in the need to specify that this is not the final

In order to get this package working you will need to install [**investpy**](https://pypi.org/project/investpy/) from PyPi via Terminal typing:

``pip install investpy==0.8.4.5``
``pip install investpy==0.8.4.6``

All the dependencies are already listed on the setup file of the package, but to sum them up, you will need the following requirements:

Expand Down Expand Up @@ -51,6 +51,8 @@ If needed you can open an [issue](https://github.com/alvarob96/investpy/issues)
* Updated docstrings as reStructuredText (via PyCharm)
* Modified JSON output to fit current standard for historical data
* Added function to retrieve information from listed ETFs (id, name, symbol and tag)
* Funds additional information add
* Loading bar tests
* ...

## Additional Information
Expand Down
3 changes: 0 additions & 3 deletions investpy/Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
__author__ = "Alvaro Bartolome <[email protected]>"


# TODO: all lower case in objects to access it via dot operator (.)
# look for a proper justification of it

class Data(object):
"""
A class used to store the historical data of an equity, fund or etf
Expand Down
36 changes: 12 additions & 24 deletions investpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import datetime
import json
from random import randint
from tqdm import tqdm

import pandas as pd
import pkg_resources
Expand All @@ -22,8 +23,6 @@
# TODO: add country/market param and mapping of ‘resources/available_markets’ in order to allow users retrieve
# historical data from different markets.

# DONE: available_languages replaced by available_markets

# TODO: create thread pools to increase scraping efficiency and improve ‘investpy’ performance => CHECK BOOK DOC

# TODO: generate sphinx documentation for version 1.0
Expand All @@ -34,32 +33,21 @@

# TODO: consider moving from es.investing to www.investing (long task - develop on developer branch)

# DONE: create API project built on Flask => 0.8.5

# TODO: add additional markets for equities/funds/etfs

# DONE: redefine JSON output for ETFs => 0.8.5
# https://eodhistoricaldata.com/api/eod/AAPL.US?api_token=OeAFFmMliFG5orCUuwAKQ8l4WWFQ67YX&period=d.&fmt=json

# TODO: keep HTML doc structure (remove get_text() functions or similar)

# TODO: improve project as described in ‘’Web Scraping with Python’'

# TODO: modify __init__ structure as functions are not supposed to be defined here?

# DONE: get etfs listed as dictionary with specified params

# DONE: updated docstrings
# TODO: update equities.py and equity data retrieval

# TODO: fix dosctrings and unify structure with Google docstrings or similar

# WARNING: RE-GENERATE MARKET FILES BEFORE EVERY RELEASE
# TODO: handle all errors => 0.9

# TODO: add 'clase de activo', 'isin' and 'emisor' to funds
# TODO: add filtering when retrieving list of equities/funds/etfs

# DONE: updated equities, funds and etfs retrieval functions
# TODO: add function to download information from market (es/en/ge/..) investpy.load('market name')

# TODO: handle all errors => 0.9
# TODO: handle market without data exceptions (loaded by default?)


def get_equities_list():
Expand Down Expand Up @@ -139,7 +127,7 @@ def get_recent_data(equity, as_json=False, order='ascending'):
result = list()

if path_:
for elements_ in path_:
for elements_ in tqdm(path_, ascii=True, ncols=80):
info = []
for nested_ in elements_.xpath(".//td"):
info.append(nested_.text_content())
Expand Down Expand Up @@ -331,7 +319,7 @@ def get_historical_data(equity, start, end, as_json=False, order='ascending'):
result = list()

if path_:
for elements_ in path_:
for elements_ in tqdm(path_, ascii=True, ncols=80):
info = []
for nested_ in elements_.xpath(".//td"):
info.append(nested_.text_content())
Expand Down Expand Up @@ -578,7 +566,7 @@ def get_fund_recent_data(fund, as_json=False, order='ascending'):
result = list()

if path_:
for elements_ in path_:
for elements_ in tqdm(path_, ascii=True, ncols=80):
info = []
for nested_ in elements_.xpath(".//td"):
info.append(nested_.text_content())
Expand Down Expand Up @@ -745,7 +733,7 @@ def get_fund_historical_data(fund, start, end, as_json=False, order='ascending')
result = list()

if path_:
for elements_ in path_:
for elements_ in tqdm(path_, ascii=True, ncols=80):
info = []
for nested_ in elements_.xpath(".//td"):
info.append(nested_.text_content())
Expand Down Expand Up @@ -1020,7 +1008,7 @@ def get_etf_recent_data(etf, as_json=False, order='ascending'):
result = list()

if path_:
for elements_ in path_:
for elements_ in tqdm(path_, ascii=True, ncols=80):
info = []
for nested_ in elements_.xpath(".//td"):
info.append(nested_.text_content())
Expand Down Expand Up @@ -1187,7 +1175,7 @@ def get_etf_historical_data(etf, start, end, as_json=False, order='ascending'):
result = list()

if path_:
for elements_ in path_:
for elements_ in tqdm(path_, ascii=True, ncols=80):
info = []
for nested_ in elements_.xpath(".//td"):
info.append(nested_.text_content())
Expand Down
3 changes: 2 additions & 1 deletion investpy/equities.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pkg_resources
import requests
from lxml.html import fromstring
from tqdm import tqdm

from investpy import user_agent as ua

Expand Down Expand Up @@ -56,7 +57,7 @@ def get_equity_names():
results = list()

if path_:
for elements_ in path_:
for elements_ in tqdm(path_, ascii=True, ncols=80):
id_ = elements_.get('id').replace('pair_', '')

for element_ in elements_.xpath('.//a'):
Expand Down
8 changes: 4 additions & 4 deletions investpy/etfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

__author__ = "Alvaro Bartolome <[email protected]>"

import time
import json

import pandas as pd
import requests
import json
import pkg_resources
import requests
from lxml.html import fromstring
from tqdm import tqdm

from investpy import user_agent as ua

Expand Down Expand Up @@ -49,7 +49,7 @@ def get_etf_names():
results = list()

if path_:
for elements_ in path_:
for elements_ in tqdm(path_, ascii=True, ncols=80):
id_ = elements_.get('id').replace('pair_', '')
symbol = elements_.xpath(".//td[contains(@class, 'symbol')]")[0].get('title')

Expand Down
84 changes: 74 additions & 10 deletions investpy/funds.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import pandas as pd
import requests
from tqdm import tqdm
import json
from lxml.html import fromstring
import pkg_resources
Expand Down Expand Up @@ -47,29 +48,37 @@ def get_fund_names():
results = list()

if path_:
for elements_ in path_:
for elements_ in tqdm(path_, ascii=True, ncols=80):
id_ = elements_.get('id').replace('pair_', '')
symbol = elements_.xpath(".//td[contains(@class, 'symbol')]")[0].get('title')

nested = elements_.xpath(".//a")[0].get('title').rstrip()
info = elements_.xpath(".//a")[0].get('href').replace('/funds/', '')

data = get_fund_data(info)

if symbol:
data = {
obj = {
"name": nested,
"symbol": symbol,
"tag": info,
"id": id_
"id": id_,
"issuer": data['issuer'],
"isin": data['isin'],
"asset class": data['asset class'],
}
else:
data = {
obj = {
"name": nested,
"symbol": "undefined",
"tag": info,
"id": id_
"id": id_,
"issuer": data['issuer'],
"isin": data['isin'],
"asset class": data['asset class'],
}

results.append(data)
results.append(obj)

resource_package = __name__
resource_path = '/'.join(('resources', 'es', 'funds.csv'))
Expand All @@ -81,6 +90,65 @@ def get_fund_names():
return results


def get_fund_data(fund_tag):
url = "https://www.investing.com/funds/" + fund_tag

head = {
"User-Agent": ua.get_random(),
"X-Requested-With": "XMLHttpRequest",
"Accept": "text/html",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
}

req = requests.get(url, headers=head, timeout=5)

if req.status_code != 200:
raise ConnectionError("ERR#015: error " + req.status_code + ", try again later.")

root_ = fromstring(req.text)
path_ = root_.xpath(".//div[contains(@class, 'overViewBox')]"
"/div[@id='quotes_summary_current_data']"
"/div[@class='right']"
"/div")

result = {
'issuer': None,
'isin': None,
'asset class': None,
}

for p in path_:
try:
if p.xpath("span[not(@class)]")[0].text_content().__contains__('Issuer'):
try:
result['issuer'] = p.xpath("span[@class='elp']")[0].get('title').rstrip()
continue
except IndexError:
raise IndexError("ERR#023: fund issuer unavailable or not found.")
elif p.xpath("span[not(@class)]")[0].text_content().__contains__('ISIN'):
try:
result['isin'] = p.xpath("span[@class='elp']")[0].get('title').rstrip()
continue
except IndexError:
raise IndexError("ERR#024: fund isin code unavailable or not found.")
elif p.xpath("span[not(@class)]")[0].text_content().__contains__('Asset Class'):
try:
result['asset class'] = p.xpath("span[@class='elp']")[0].get('title').rstrip()
continue
except IndexError:
raise IndexError("ERR#025: fund asset class unavailable or not found.")
else:
continue
except IndexError:
raise IndexError("ERR#017: isin code unavailable or not found.")

if None not in result.values():
return result
else:
return result


def fund_information_to_json(df):
"""
This function converts a pandas.DataFrame, containing all the information from a fund, into a JSON
Expand Down Expand Up @@ -136,7 +204,3 @@ def list_funds():
raise IOError("ERR#005: fund list not found or unable to retrieve.")
else:
return funds['name'].tolist()


if __name__ == '__main__':
get_fund_names()
Loading

0 comments on commit 73032f6

Please sign in to comment.