version update & minor changes

- extra data added to funds - updated todo list
alvarobartt · May 20, 2019 · 73032f6 · 73032f6
1 parent 8684f33
commit 73032f6
Show file tree

Hide file tree

Showing 10 changed files with 774 additions and 550 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/.travis.yml b/.travis.yml
@@ -12,6 +12,7 @@ install:
   - pip install requests==2.22.0
   - pip install lxml==4.3.3
   - pip install unidecode==1.0.23
+  - pip install tqdm==4.32.1
   - pip install investpy==0.8.4.5
   - pip install pytest==4.5
 

diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ To conclude this section, I am in the need to specify that this is not the final
 
 In order to get this package working you will need to install [**investpy**](https://pypi.org/project/investpy/) from PyPi via Terminal typing:
 
-``pip install investpy==0.8.4.5``
+``pip install investpy==0.8.4.6``
 
 All the dependencies are already listed on the setup file of the package, but to sum them up, you will need the following requirements:
 
@@ -51,6 +51,8 @@ If needed you can open an [issue](https://github.com/alvarob96/investpy/issues)
 * Updated docstrings as reStructuredText (via PyCharm)
 * Modified JSON output to fit current standard for historical data
 * Added function to retrieve information from listed ETFs (id, name, symbol and tag)
+* Funds additional information add
+* Loading bar tests
 * ...
 
 ## Additional Information

diff --git a/investpy/Data.py b/investpy/Data.py
@@ -6,9 +6,6 @@
 __author__ = "Alvaro Bartolome <[email protected]>"
 
 
-# TODO: all lower case in objects to access it via dot operator (.)
-#  look for a proper justification of it
-
 class Data(object):
     """
     A class used to store the historical data of an equity, fund or etf

diff --git a/investpy/__init__.py b/investpy/__init__.py
@@ -8,6 +8,7 @@
 import datetime
 import json
 from random import randint
+from tqdm import tqdm
 
 import pandas as pd
 import pkg_resources
@@ -22,8 +23,6 @@
 # TODO: add country/market param and mapping of ‘resources/available_markets’ in order to allow users retrieve
 #  historical data from different markets.
 
-# DONE: available_languages replaced by available_markets
-
 # TODO: create thread pools to increase scraping efficiency and improve ‘investpy’ performance => CHECK BOOK DOC
 
 # TODO: generate sphinx documentation for version 1.0
@@ -34,32 +33,21 @@
 
 # TODO: consider moving from es.investing to www.investing (long task - develop on developer branch)
 
-# DONE: create API project built on Flask => 0.8.5
-
 # TODO: add additional markets for equities/funds/etfs
 
-# DONE: redefine JSON output for ETFs => 0.8.5
-#  https://eodhistoricaldata.com/api/eod/AAPL.US?api_token=OeAFFmMliFG5orCUuwAKQ8l4WWFQ67YX&period=d.&fmt=json
-
-# TODO: keep HTML doc structure (remove get_text() functions or similar)
-
-# TODO: improve project as described in ‘’Web Scraping with Python’'
-
 # TODO: modify __init__ structure as functions are not supposed to be defined here?
 
-# DONE: get etfs listed as dictionary with specified params
-
-# DONE: updated docstrings
+# TODO: update equities.py and equity data retrieval
 
 # TODO: fix dosctrings and unify structure with Google docstrings or similar
 
-# WARNING: RE-GENERATE MARKET FILES BEFORE EVERY RELEASE
+# TODO: handle all errors => 0.9
 
-# TODO: add 'clase de activo', 'isin' and 'emisor' to funds
+# TODO: add filtering when retrieving list of equities/funds/etfs
 
-# DONE: updated equities, funds and etfs retrieval functions
+# TODO: add function to download information from market (es/en/ge/..) investpy.load('market name')
 
-# TODO: handle all errors => 0.9
+# TODO: handle market without data exceptions (loaded by default?)
 
 
 def get_equities_list():
@@ -139,7 +127,7 @@ def get_recent_data(equity, as_json=False, order='ascending'):
             result = list()
 
             if path_:
-                for elements_ in path_:
+                for elements_ in tqdm(path_, ascii=True, ncols=80):
                     info = []
                     for nested_ in elements_.xpath(".//td"):
                         info.append(nested_.text_content())
@@ -331,7 +319,7 @@ def get_historical_data(equity, start, end, as_json=False, order='ascending'):
                 result = list()
 
                 if path_:
-                    for elements_ in path_:
+                    for elements_ in tqdm(path_, ascii=True, ncols=80):
                         info = []
                         for nested_ in elements_.xpath(".//td"):
                             info.append(nested_.text_content())
@@ -578,7 +566,7 @@ def get_fund_recent_data(fund, as_json=False, order='ascending'):
             result = list()
 
             if path_:
-                for elements_ in path_:
+                for elements_ in tqdm(path_, ascii=True, ncols=80):
                     info = []
                     for nested_ in elements_.xpath(".//td"):
                         info.append(nested_.text_content())
@@ -745,7 +733,7 @@ def get_fund_historical_data(fund, start, end, as_json=False, order='ascending')
                 result = list()
 
                 if path_:
-                    for elements_ in path_:
+                    for elements_ in tqdm(path_, ascii=True, ncols=80):
                         info = []
                         for nested_ in elements_.xpath(".//td"):
                             info.append(nested_.text_content())
@@ -1020,7 +1008,7 @@ def get_etf_recent_data(etf, as_json=False, order='ascending'):
             result = list()
 
             if path_:
-                for elements_ in path_:
+                for elements_ in tqdm(path_, ascii=True, ncols=80):
                     info = []
                     for nested_ in elements_.xpath(".//td"):
                         info.append(nested_.text_content())
@@ -1187,7 +1175,7 @@ def get_etf_historical_data(etf, start, end, as_json=False, order='ascending'):
                 result = list()
 
                 if path_:
-                    for elements_ in path_:
+                    for elements_ in tqdm(path_, ascii=True, ncols=80):
                         info = []
                         for nested_ in elements_.xpath(".//td"):
                             info.append(nested_.text_content())

diff --git a/investpy/equities.py b/investpy/equities.py
@@ -11,6 +11,7 @@
 import pkg_resources
 import requests
 from lxml.html import fromstring
+from tqdm import tqdm
 
 from investpy import user_agent as ua
 
@@ -56,7 +57,7 @@ def get_equity_names():
     results = list()
 
     if path_:
-        for elements_ in path_:
+        for elements_ in tqdm(path_, ascii=True, ncols=80):
             id_ = elements_.get('id').replace('pair_', '')
 
             for element_ in elements_.xpath('.//a'):

diff --git a/investpy/etfs.py b/investpy/etfs.py
@@ -5,13 +5,13 @@
 
 __author__ = "Alvaro Bartolome <[email protected]>"
 
-import time
+import json
 
 import pandas as pd
-import requests
-import json
 import pkg_resources
+import requests
 from lxml.html import fromstring
+from tqdm import tqdm
 
 from investpy import user_agent as ua
 
@@ -49,7 +49,7 @@ def get_etf_names():
     results = list()
 
     if path_:
-        for elements_ in path_:
+        for elements_ in tqdm(path_, ascii=True, ncols=80):
             id_ = elements_.get('id').replace('pair_', '')
             symbol = elements_.xpath(".//td[contains(@class, 'symbol')]")[0].get('title')
 

diff --git a/investpy/funds.py b/investpy/funds.py
@@ -7,6 +7,7 @@
 
 import pandas as pd
 import requests
+from tqdm import tqdm
 import json
 from lxml.html import fromstring
 import pkg_resources
@@ -47,29 +48,37 @@ def get_fund_names():
     results = list()
 
     if path_:
-        for elements_ in path_:
+        for elements_ in tqdm(path_, ascii=True, ncols=80):
             id_ = elements_.get('id').replace('pair_', '')
             symbol = elements_.xpath(".//td[contains(@class, 'symbol')]")[0].get('title')
 
             nested = elements_.xpath(".//a")[0].get('title').rstrip()
             info = elements_.xpath(".//a")[0].get('href').replace('/funds/', '')
 
+            data = get_fund_data(info)
+
             if symbol:
-                data = {
+                obj = {
                     "name": nested,
                     "symbol": symbol,
                     "tag": info,
-                    "id": id_
+                    "id": id_,
+                    "issuer": data['issuer'],
+                    "isin": data['isin'],
+                    "asset class": data['asset class'],
                 }
             else:
-                data = {
+                obj = {
                     "name": nested,
                     "symbol": "undefined",
                     "tag": info,
-                    "id": id_
+                    "id": id_,
+                    "issuer": data['issuer'],
+                    "isin": data['isin'],
+                    "asset class": data['asset class'],
                 }
 
-            results.append(data)
+            results.append(obj)
 
     resource_package = __name__
     resource_path = '/'.join(('resources', 'es', 'funds.csv'))
@@ -81,6 +90,65 @@ def get_fund_names():
     return results
 
 
+def get_fund_data(fund_tag):
+    url = "https://www.investing.com/funds/" + fund_tag
+
+    head = {
+        "User-Agent": ua.get_random(),
+        "X-Requested-With": "XMLHttpRequest",
+        "Accept": "text/html",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Connection": "keep-alive",
+    }
+
+    req = requests.get(url, headers=head, timeout=5)
+
+    if req.status_code != 200:
+        raise ConnectionError("ERR#015: error " + req.status_code + ", try again later.")
+
+    root_ = fromstring(req.text)
+    path_ = root_.xpath(".//div[contains(@class, 'overViewBox')]"
+                        "/div[@id='quotes_summary_current_data']"
+                        "/div[@class='right']"
+                        "/div")
+
+    result = {
+        'issuer': None,
+        'isin': None,
+        'asset class': None,
+    }
+
+    for p in path_:
+        try:
+            if p.xpath("span[not(@class)]")[0].text_content().__contains__('Issuer'):
+                try:
+                    result['issuer'] = p.xpath("span[@class='elp']")[0].get('title').rstrip()
+                    continue
+                except IndexError:
+                    raise IndexError("ERR#023: fund issuer unavailable or not found.")
+            elif p.xpath("span[not(@class)]")[0].text_content().__contains__('ISIN'):
+                try:
+                    result['isin'] = p.xpath("span[@class='elp']")[0].get('title').rstrip()
+                    continue
+                except IndexError:
+                    raise IndexError("ERR#024: fund isin code unavailable or not found.")
+            elif p.xpath("span[not(@class)]")[0].text_content().__contains__('Asset Class'):
+                try:
+                    result['asset class'] = p.xpath("span[@class='elp']")[0].get('title').rstrip()
+                    continue
+                except IndexError:
+                    raise IndexError("ERR#025: fund asset class unavailable or not found.")
+            else:
+                continue
+        except IndexError:
+            raise IndexError("ERR#017: isin code unavailable or not found.")
+
+    if None not in result.values():
+        return result
+    else:
+        return result
+
+
 def fund_information_to_json(df):
     """
     This function converts a pandas.DataFrame, containing all the information from a fund, into a JSON
@@ -136,7 +204,3 @@ def list_funds():
         raise IOError("ERR#005: fund list not found or unable to retrieve.")
     else:
         return funds['name'].tolist()
-
-
-if __name__ == '__main__':
-    get_fund_names()