From d599a54e42509b96e2ac540c3f08edd51d97c76d Mon Sep 17 00:00:00 2001
From: miro <jarbasai@mailfence.com>
Date: Wed, 27 Mar 2024 06:38:06 +0000
Subject: [PATCH] v2

---
 audiobooker/__init__.py                       |   2 +-
 audiobooker/base.py                           | 251 +---------
 audiobooker/scrappers/__init__.py             | 200 +++-----
 audiobooker/scrappers/audioanarchy.py         | 101 ++--
 audiobooker/scrappers/darkerprojects.py       | 146 ++----
 audiobooker/scrappers/goldenaudiobooks.py     | 256 ++--------
 audiobooker/scrappers/hpaudiotales.py         |  48 ++
 audiobooker/scrappers/librivox.py             | 208 ++++----
 audiobooker/scrappers/loyalbooks.py           | 446 +++---------------
 audiobooker/scrappers/sharedaudiobooks.py     |  69 +++
 .../scrappers/stephenkingaudiobooks.py        | 169 +++----
 audiobooker/scrappers/storynory.py            | 178 +++----
 audiobooker/scrappers/thoughtaudio.py         | 148 +++---
 audiobooker/utils.py                          |  58 +++
 examples/search_librivox.py                   |   2 +-
 setup.py                                      |   2 +-
 test/test_utils.py                            |  75 +++
 17 files changed, 765 insertions(+), 1594 deletions(-)
 create mode 100644 audiobooker/scrappers/hpaudiotales.py
 create mode 100644 audiobooker/scrappers/sharedaudiobooks.py
 create mode 100644 test/test_utils.py

diff --git a/audiobooker/__init__.py b/audiobooker/__init__.py
index 769d9f9..7a28859 100644
--- a/audiobooker/__init__.py
+++ b/audiobooker/__init__.py
@@ -1,4 +1,4 @@
-from audiobooker.base import BookTag, BookAuthor, AudioBook
+from audiobooker.base import BookAuthor, AudioBook
 from audiobooker.exceptions import UnknownAuthorIdException, \
     UnknownBookIdException, UnknownDurationError, ScrappingError, \
     UnknownGenreIdException, UnknownAuthorException, UnknownBookException, \
diff --git a/audiobooker/base.py b/audiobooker/base.py
index 2e9f5ed..a2fc96a 100644
--- a/audiobooker/base.py
+++ b/audiobooker/base.py
@@ -1,245 +1,34 @@
-import json
-import subprocess
-from bs4 import BeautifulSoup
-from requests_cache import CachedSession
-from datetime import timedelta
-
+from typing import List
+from dataclasses import dataclass, field
 from audiobooker.exceptions import UnknownAuthorIdException, \
     UnknownBookIdException, UnknownDurationError, ScrappingError, \
     UnknownGenreIdException, UnknownAuthorException, UnknownBookException, \
     UnknownGenreException, ParseErrorException
 
-expire_after = timedelta(hours=1)
-session = CachedSession(backend='memory', expire_after=expire_after)
-
-
-class BookTag:
-    def __init__(self, name="", tag_id="", url="", from_data=None):
-        self.name = name
-        self.tag_id = tag_id or name
-        self.url = url
-        if from_data:
-            self.from_json(from_data)
-
-    @property
-    def as_json(self):
-        return {"name": self.name,
-                "id": self.tag_id,
-                "url": self.url}
-
-    def from_json(self, json_data):
-        if isinstance(json_data, str):
-            json_data = json.loads(json_data)
-        if isinstance(json_data, BookTag):
-            json_data = json_data.as_json
-        if not isinstance(json_data, dict):
-            raise TypeError
-        self.name = json_data.get("name", self.name)
-        self.tag_id = json_data.get("id", self.tag_id) or self.name
-        self.url = json_data.get("url", self.url)
-
-    def __str__(self):
-        return self.name
-
-    def __repr__(self):
-        return "BookGenre(" + str(self) + ", " + self.tag_id + ")"
-
 
+@dataclass
 class BookAuthor:
-    def __init__(self, first_name="", last_name="", author_id="", url="",
-                 from_data=None):
-        self.first_name = first_name
-        self.last_name = last_name
-        self.first_name, self.last_name = self.normalize_name()
-        self.author_id = author_id
-        self.url = url
-        if from_data:
-            self.from_json(from_data)
+    first_name: str = ""
+    last_name: str = ""
 
-    def normalize_name(self):
-        author = " ".join([self.first_name, self.last_name])
-        names = author.split(" ")
-        last_name = " ".join(names[1:])
-        first_name = names[0]
-        return first_name, last_name
 
-    def from_json(self, json_data):
-        if isinstance(json_data, str):
-            try:
-                json_data = json.loads(json_data)
-            except:
-                json_data = {"last_name": json_data}
-        if isinstance(json_data, BookAuthor):
-            json_data = json_data.as_json
-        if not isinstance(json_data, dict):
-            print(json_data, type(json_data))
-            raise TypeError
-        self.first_name = json_data.get("first_name", self.first_name)
-        self.last_name = json_data.get("last_name", self.last_name)
-        self.first_name, self.last_name = self.normalize_name()
-        self.author_id = json_data.get("id", self.author_id)
-        self.url = json_data.get("url", self.url)
-
-    @property
-    def as_json(self):
-        return {"first_name": self.first_name, "last_name": self.last_name,
-                "id": self.author_id, "url": self.url}
-
-    def __str__(self):
-        return (self.first_name + " " + self.last_name).strip()
-
-    def __repr__(self):
-        return "BookAuthor(" + str(self) + ", " + self.author_id + ")"
+@dataclass
+class AudiobookNarrator:
+    first_name: str = ""
+    last_name: str = ""
 
 
+@dataclass
 class AudioBook:
-    def __init__(self, title="", authors=None, description="", tags=None,
-                 book_id="", runtime=0, url="", img="", language='english',
-                 from_data=None, stream_list=None, parse=False):
-        self.img = img
-        self.url = url
-        self.title = title
-        self._authors = authors or []
-        self._description = description
-        self._tags = tags or []
-        self.book_id = book_id
-        self.runtime = runtime
-        self.lang = language.lower()
-        self._stream_list = stream_list or []
-        if not self.book_id and "/" in self.url:
-            self.book_id = self.url.split("/")[-1]
-        elif not self.book_id:
-            self.book_id = title
-        if from_data:
-            self.from_json(from_data)
-        self.raw = from_data or {}
-        if parse:
-            try:
-                self.from_page()
-            except:
-                pass
-
-    def calc_runtime(self, data=None):
-        raise UnknownDurationError
-
-    def parse_page(self):
-        raise ParseErrorException
-
-    def from_page(self):
-        self.raw = self.parse_page()
-
-    @property
-    def html(self):
-        try:
-            return session.get(self.url).text
-        except Exception as e:
-            try:
-                return session.get(self.url, verify=False).text
-            except:
-                return None
-
-    @property
-    def soup(self):
-        return BeautifulSoup(self.html, "html.parser")
-
-    @property
-    def description(self):
-        return self._description.strip()
-
-    @property
-    def streamer(self):
-        for s in self._stream_list:
-            yield s
-
-    @property
-    def streams(self):
-        return [s for s in self.streamer]
-
-    def play_sox(self):
-        self.play("play %1")
-
-    def play_mplayer(self):
-        self.play("mplayer %1")
-
-    def play_vlc(self):
-        self.play("cvlc %1 --play-and-exit")
-
-    def play(self, cmd="cvlc %1 --play-and-exit"):
-        for stream_url in self.streamer:
-            print("playing", stream_url)
-            if isinstance(cmd, str):
-                cmd = cmd.split(" ")
-            if isinstance(cmd, list):
-                play_cmd = cmd
-                for idx, c in enumerate(cmd):
-                    if c == "%1":
-                        play_cmd[idx] = stream_url
-                subprocess.call(" ".join(play_cmd), shell=True)
-            else:
-                raise TypeError
-
-    @property
-    def authors(self):
-        authors = []
-        for a in self._authors:
-            if isinstance(a, str):
-                try:
-                    a = json.loads(a)
-                except Exception as e:
-                    a = {"last_name": a}
-            if isinstance(a, dict):
-                authors += [a]
-        return [BookAuthor(from_data=a) for a in authors]
-
-    @property
-    def tags(self):
-        return [BookTag(from_data=a) for a in self._tags]
-
-    @property
-    def as_json(self):
-        bucket = self.raw
-        bucket["url"] = self.url
-        bucket["img"] = self.img
-        bucket["title"] = self.title
-        bucket["authors"] = self._authors
-        bucket["description"] = self._description
-        bucket["tags"] = self._tags
-        bucket["id"] = self.book_id
-        bucket["runtime"] = self.runtime
-        bucket["language"] = self.lang
-        bucket["streams"] = self.streams
-        return bucket
-
-    def from_json(self, json_data):
-        if isinstance(json_data, str):
-            json_data = json.loads(json_data)
-        if not isinstance(json_data, dict):
-            raise TypeError
-        json_data = json_data or {}
-        self.url = json_data.get("url", self.url)
-        self.img = json_data.get("img",
-                                 json_data.get("pic",
-                                               json_data.get("image",
-                                                             self.img)))
-        self.title = json_data.get("title", json_data.get("name", self.title))
-        self._authors = json_data.get("authors", self._authors)
-        self._authors = self._authors or [json_data.get("author", "")]
-        self._description = json_data.get("description", self._description)
-        self._tags = json_data.get("tags", self._tags)
-        self.book_id = json_data.get("id")
-        self.runtime = json_data.get("runtime", self.runtime)
-        self.lang = json_data.get('language',
-                                  json_data.get('lang', self.lang)).lower()
-        self._stream_list = json_data.get("streams", self._stream_list)
-        self.raw = json_data
-        if not self.book_id and "/" in self.url:
-            self.book_id = self.url.split("/")[-1]
-
-    def __str__(self):
-        return self.title
-
-    def __repr__(self):
-        return "AudioBook(" + str(self) + ", " + self.book_id + ")"
-
+    title: str = ""
+    description: str = ""
+    image: str = ""
+    language: str = ""
+    authors: List[BookAuthor] = field(default_factory=list)
+    tags: List[str] = field(default_factory=list)
+    streams: List[str] = field(default_factory=list)
+    narrator: AudiobookNarrator = None
+    year: int = 0
+    runtime: int = 0
 
 
diff --git a/audiobooker/scrappers/__init__.py b/audiobooker/scrappers/__init__.py
index a14d02b..e293003 100644
--- a/audiobooker/scrappers/__init__.py
+++ b/audiobooker/scrappers/__init__.py
@@ -1,149 +1,63 @@
-from threading import Thread
-
-from bs4 import BeautifulSoup
-from rapidfuzz import process
-
-from audiobooker.base import session, BookTag
+import abc
+from requests_cache import CachedSession
+from datetime import timedelta
 from audiobooker.exceptions import UnknownAuthorIdException, \
     UnknownBookIdException, ScrappingError, UnknownAuthorException, UnknownBookException
 from audiobooker.utils import random_user_agent
+from typing import List, Iterable
+from audiobooker.base import AudioBook, BookAuthor, AudiobookNarrator
 
 
 class AudioBookSource:
-    base_url = ""
-    popular_url = ""
-    tags_url = ""
-    authors_url = ""
-    search_url = ""
-    _cache = None
-    _tags = []
-    _tag_pages = {}
-
-    @classmethod
-    def populate_cache(self, books=None, threaded=False):
-        if self._cache is None:
-            if books:
-                self._cache = books
-                return
-            if threaded:
-                t = Thread(target=self.get_all_audiobooks,
-                           daemon=True).start()
-            else:
-                self._cache = self.get_all_audiobooks()
-        elif books:
-            self._cache += books
-
-    @property
-    def tags(self):
-        return sorted(self._tags) or []
-
-    @staticmethod
-    def _get_html(url):
-        user_agent = random_user_agent()
-        try:
-            return session.get(url, headers={'User-Agent': user_agent}).text
-        except Exception as e:
-            return session.get(url, verify=False,
-                               headers={'User-Agent': user_agent}).text
-
-    @staticmethod
-    def _get_soup(html):
-        return BeautifulSoup(html, "html.parser")
-
-    @classmethod
-    def scrap_popular(cls, limit=-1, offset=0):
-        raise ScrappingError
-
-    @property
-    def tag_pages(self):
-        return self._tag_pages or {}
-
-    @classmethod
-    def scrap_tags(cls):
-        return cls._tag_pages
-
-    @staticmethod
-    def scrap_all_audiobooks(limit=-1, offset=0):
-        raise ScrappingError
-
-    @classmethod
-    def scrap_by_tag(cls, tag, limit=-1, offset=0):
-        for book in cls.search_audiobooks(tag=tag):
-            yield book
-
-    @classmethod
-    def get_all_audiobooks(self, limit=2000, offset=0):
-        if self._cache is not None:
-            return self._cache
-        self._cache = [book for book in self.scrap_all_audiobooks(limit,
-                                                                  offset)]
-        return self._cache
-
-    @classmethod
-    def get_tag_id(cls, tag):
-        if tag in cls._tags:
-            return str(cls._tags.index(tag))
-        tags = []
-        for gen in cls.scrap_tags():
-            tags.append(gen)
-        tags = sorted(tags)
-        return str(tags.index(tag))
-
-    @classmethod
-    def get_tag(cls, tag_id):
-        if tag_id <= len(cls._tags):
-            tag = cls._tags[tag_id]
-        else:
-            tags = []
-            for tag in cls.scrap_tags():
-                tags.append(tag)
-            tags = sorted(tags)
-            tag = tags[tag_id]
-        return BookTag(tag_id=tag_id, name=tag)
-
-    @staticmethod
-    def get_audiobook(book_id):
-        raise UnknownBookIdException
-
-    @staticmethod
-    def get_author(author_id):
-        raise UnknownAuthorIdException
-
-    @staticmethod
-    def get_audiobook_id(book):
-        raise UnknownBookException
-
-    @staticmethod
-    def get_author_id(author):
-        raise UnknownAuthorException
-
-    @classmethod
-    def search_audiobooks(self, since=None, author=None, title=None,
-                          tag=None, limit=25):
-        """
-        Args:
-            since: a UNIX timestamp; returns all projects cataloged since that time
-            author: all records by that author last name
-            title: all matching titles
-            tag: all projects of the matching tag
-            limit: max entries to return (int)
-
-        Returns:
-            list : list of AudioBook objects
-        """
-        # priority for title matches
-        alll = self.get_all_audiobooks()
-        if title:
-            for res in process.extract(title, alll, limit=limit):
-                match, score = res
-                yield match
-                alll.remove(match)
-
-        # second author matches
-        if author:
-            choices = [" ".join([str(a) for a in b.authors]) for b in alll]
-            for res in process.extract(author, choices, limit=limit):
-                match, score = res
-                match = alll[choices.index(match)]
-                yield match
-                alll.remove(match)
+    expire_after = timedelta(hours=1)
+    session = CachedSession(backend='memory', expire_after=expire_after)
+
+    def search(self, query) -> Iterable[AudioBook]:
+        # TODO fuzzy match instead
+        for b in self.search_by_title(query):
+            yield b
+        for b in self.search_by_author(query):
+            yield b
+        for b in self.search_by_tag(query):
+            yield b
+
+    def search_by_narrator(self, query) -> Iterable[AudioBook]:
+        for b in self.iterate_all():
+            if b.narrator:
+                if b.narrator.last_name.lower() in query.lower():
+                    yield b
+
+    def search_by_author(self, query) -> Iterable[AudioBook]:
+        for b in self.iterate_all():
+            for a in b.authors:
+                if (a.last_name and a.last_name.lower() in query.lower()) or \
+                        (a.first_name and a.first_name.lower() in query.lower()):
+                    yield b
+
+    def search_by_title(self, query) -> Iterable[AudioBook]:
+        for b in self.iterate_all():
+            if query.lower() in b.title.lower():
+                yield b
+
+    def search_by_tag(self, query) -> Iterable[AudioBook]:
+        for b in self.iterate_all():
+            if query.lower() in [t.lower() for t in b.tags]:
+                yield b
+
+    @abc.abstractmethod
+    def iterate_all(self) -> Iterable[AudioBook]:
+        pass
+
+    def iterate_popular(self) -> Iterable[AudioBook]:
+        return self.iterate_all()
+
+    def iterate_by_author(self, author) -> Iterable[AudioBook]:
+        for b in self.iterate_all():
+            for a in b.authors:
+                if a.last_name.lower() in author.lower():
+                    yield b
+
+    def iterate_by_tag(self, tag) -> Iterable[AudioBook]:
+        for b in self.iterate_all():
+            if tag in b.tags:
+                yield b
diff --git a/audiobooker/scrappers/audioanarchy.py b/audiobooker/scrappers/audioanarchy.py
index 43d3b40..50135d5 100644
--- a/audiobooker/scrappers/audioanarchy.py
+++ b/audiobooker/scrappers/audioanarchy.py
@@ -1,102 +1,57 @@
-import requests
+from dataclasses import dataclass
 
 from audiobooker.base import AudioBook, BookAuthor
 from audiobooker.scrappers import AudioBookSource
+from audiobooker.utils import get_soup
 
 
-class AudioAnarchyAudioBook(AudioBook):
-    base_url = "http://www.audioanarchy.org/"
+@dataclass
+class AudioAnarchyAudioBook:
+    url: str
+    image: str = ""
 
-    def parse_page(self):
+    def parse_page(self) -> AudioBook:
+        base_url = "http://www.audioanarchy.org/"
+        soup = get_soup(self.url)
         streams = []
-        for url in self.soup.find_all("a"):
+        for url in soup.find_all("a"):
             try:
                 if not url["href"].endswith(".mp3"):
                     continue
-                streams.append(self.base_url + url["href"])
+                streams.append(base_url + url["href"])
             except:
                 continue
-        title = self.soup.find("title").text
-        author_name = "Audio Anarchy"
-        authors = [BookAuthor(first_name=author_name)]
-        img = self.img
-        return {"authors": authors,
-                "title": title.strip(),
-                "streams": streams,
-                "rating": 0,
-                "tags": [],
-                "img": img}
-
-    def from_page(self):
-        data = self.parse_page()
-        if not self.title:
-            self.title = data["title"]
-        if not self._description:
-            self._description = data.get("description") or self.title
-        self.img = data.get("img", self.img)
-        for tag in data["tags"]:
-            if tag.as_json not in self._tags:
-                self._tags.append(tag.as_json)
-        for author in data["authors"]:
-            if author.as_json not in self._authors:
-                self._authors.append(author.as_json)
-        self._stream_list = data["streams"]
-        self.raw.update(data)
-
-    def __repr__(self):
-        return "AudioAnarchyAudioBook(" + str(
-            self) + ", " + self.book_id + ")"
+        title = soup.find("title").text.split(" - ")[-1].split(" :: ")[-1]
+        return AudioBook(
+            title=title,
+            streams=streams,
+            image=self.image,
+            tags=["Anarchy"],
+            authors=[BookAuthor(last_name="Audio Anarchy")],
+            language="en"
+        )
 
 
 class AudioAnarchy(AudioBookSource):
     base_url = "http://www.audioanarchy.org"
-    _tags = ["Anarchy"]
-    _tag_pages = {"Anarchy": 'http://www.audioanarchy.org'}
 
-    @staticmethod
-    def _parse_page(html, limit=-1):
-        soup = AudioAnarchy._get_soup(html)
+    def iterate_all(self):
+        soup = get_soup(self.base_url)
         for entry in soup.find_all("div", {"id": "album"}):
             try:
                 a = entry.find("a")
                 img = entry.find("img")
-                book = AudioAnarchyAudioBook(from_data={
-                    "title": img["alt"],
-                    "url": "https://www.audioanarchy.org/" + a["href"],
-                    "img": "https://www.audioanarchy.org/" + img["src"]
-                })
-                book.from_page()  # parse url
-                yield book
+                yield AudioAnarchyAudioBook(
+                    url="https://www.audioanarchy.org/" + a["href"],
+                    image="https://www.audioanarchy.org/" + img["src"]
+                ).parse_page()
             except:
-                raise
                 continue
 
-    def scrap_popular(self, limit=-1, offset=0):
-        html = requests.get(self.base_url).text
-        return AudioAnarchy._parse_page(html)
-
-    @classmethod
-    def search_audiobooks(cls, since=None, author=None, title=None, tag=None,
-                          limit=25):
-        html = requests.get(AudioAnarchy.base_url).text
-        return AudioAnarchy._parse_page(html)
-
-    @classmethod
-    def get_audiobook(cls, book_id):
-        url = cls.base_url + '/' + book_id
-        book = AudioAnarchyAudioBook(url=url)
-        return book
-
-    def scrap_all_audiobooks(self, limit=-1, offset=0):
-        return self.scrap_popular()
-
 
 if __name__ == "__main__":
     from pprint import pprint
 
-    # for book in AudioAnarchy.search_audiobooks(title="Dark Tower"):
-    #     pprint(book.as_json)
-
     scraper = AudioAnarchy()
-    for book in scraper.scrap_all_audiobooks():
-        pprint(book.as_json)
+    for book in scraper.iterate_all():
+        pprint(book)
diff --git a/audiobooker/scrappers/darkerprojects.py b/audiobooker/scrappers/darkerprojects.py
index b838100..5a12867 100644
--- a/audiobooker/scrappers/darkerprojects.py
+++ b/audiobooker/scrappers/darkerprojects.py
@@ -1,146 +1,56 @@
-import requests
 from sitemapparser import SiteMapParser
-
-from audiobooker.base import AudioBook
+from dataclasses import dataclass
+from audiobooker.base import AudioBook, BookAuthor
 from audiobooker.scrappers import AudioBookSource
+from audiobooker.utils import get_soup
 
 
-class DarkerProjectsAudioBook(AudioBook):
-    base_url = "http://darkerprojects.com"
+@dataclass
+class DarkerProjectsAudioBook:
+    url: str
 
     def parse_page(self):
         streams = []
-        img = self.img
-        desc = self.description
+        soup = get_soup(self.url)
+        img = ""
+        desc = ""
 
-        for d in self.soup.find("div", {"class": "inner-entry-content"}).find_all("i"):
+        for d in soup.find("div", {"class": "inner-entry-content"}).find_all("i"):
             desc = d.text
             break
 
-        for url in self.soup.find_all("img")[1:]:
+        for url in soup.find_all("img")[1:]:
             img = url["src"]
             break
 
-        for url in self.soup.find_all("a"):
+        for url in soup.find_all("a"):
             if not url.get("href"):
                 continue
             if url["href"].endswith(".mp3"):
                 if url["href"] not in streams:
                     streams.append(url["href"])
-        title = self.soup.find("title").text
-
-        return {"title": title.strip(),
-                "streams": streams,
-                "description": desc,
-                "img": img}
+        title = soup.find("title").text
 
-    def from_page(self):
-        data = self.parse_page()
-        self.title = data["title"]
-        self.img = data.get("img", self.img)
-        self._stream_list = data["streams"]
-        self.raw.update(data)
-
-    def __repr__(self):
-        return "DarkerProjectsAudioBook(" + str(
-            self) + ", " + self.book_id + ")"
+        return AudioBook(
+            title=title,
+            streams=streams,
+            image=img,
+            tags=["audio drama"],
+            description=desc,
+            authors=[BookAuthor(last_name="Darker Projects")],
+            language="en"
+        )
 
 
 class DarkerProjects(AudioBookSource):
-    base_url = "http://darkerprojects.com"
-
-    @classmethod
-    def _parse_page(cls, html, limit=-1):
-        soup = cls._get_soup(html)
-        for entry in soup.find_all("article"):
-            try:
-                if not entry.find("div", {"class": "powerpress_player"}):
-                    continue  # no audio streams, text only post
-                a = entry.find("a")
-                desc = ""
-                for p in entry.find_all("p"):
-                    desc = p.text
-
-                tags = []
-                try:
-                    cat = entry.find("span", {"class": "cat-links"}).find("a")
-                    tags.append({"name": cat.text, "url": cat["href"]})
-                except:
-                    pass
-                dl = entry.find("a", {"class": "powerpress_link_d"})
-                yield DarkerProjectsAudioBook(
-                    title=a.text,
-                    description=desc,
-                    stream_list=[dl["href"]],
-                    tags=tags,
-                    url=a["href"]
-                )
-            except:
-                continue
-
-    @classmethod
-    def scrap_popular(cls, limit=-1, offset=0):
-        html = requests.get(cls.base_url).text
-        return cls._parse_page(html)
-
-    @classmethod
-    def scrap_tags(cls):
-        bucket = {}
-        sm = SiteMapParser('https://darkerprojects.com/wp-sitemap-taxonomies-category-1.xml')  # reads /sitemap.xml
-        urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
-        for url in urls:
-            url = str(url)
-            title = url.strip("/").split("/")[-1].replace("-", " ").title()
-            bucket[title] = url
-        return bucket
-
-    @classmethod
-    def scrap_collections(cls, limit=-1, offset=0):
-        for tag in cls.scrap_tags():
-            yield cls.get_collection(tag)
 
-    @classmethod
-    def get_collection(cls, collection):
-        for tag, url in cls.scrap_tags().items():
-            if tag == collection:
-                html = requests.get(url).text
-                streams = []
-                for book in cls._parse_page(html):
-                    streams += book.streams
-                streams.reverse()
-                return DarkerProjectsAudioBook(title=tag,
-                                               stream_list=streams,
-                                               url=url)
-
-    @classmethod
-    def search_audiobooks(cls, since=None, author=None, title=None, tag=None,
-                          limit=25):
-        query = ""
-        if title:
-            query += title + " "
-        if tag:
-            query += tag + " "
-        if author:
-            query += author + " "
-        html = requests.get(cls.base_url, params={"s": query}).text
-        return cls._parse_page(html)
-
-    @classmethod
-    def get_audiobook(cls, book_id):
-        url = cls.base_url + '/' + book_id
-        book = DarkerProjectsAudioBook(url=url)
-        return book
-
-    @classmethod
-    def scrap_all_audiobooks(cls, limit=-1, offset=0):
+    def iterate_all(self):
         sm = SiteMapParser('https://darkerprojects.com/wp-sitemap-posts-post-1.xml')  # reads /sitemap.xml
         urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
         for url in urls:
             url = str(url)
-            title = url.strip("/").split("/")[-1].replace("-", " ").title()
-            book = DarkerProjectsAudioBook(url=url, title=title)
-            book.from_page()
-            yield book
+            book = DarkerProjectsAudioBook(url=url)
+            yield book.parse_page()
 
 
 if __name__ == "__main__":
@@ -148,7 +58,5 @@ def scrap_all_audiobooks(cls, limit=-1, offset=0):
 
     scraper = DarkerProjects()
 
-    print(scraper.scrap_tags())
-
-    for book in scraper.scrap_all_audiobooks():
-        pprint(book.as_json)
+    for book in scraper.iterate_all():
+        pprint(book)
diff --git a/audiobooker/scrappers/goldenaudiobooks.py b/audiobooker/scrappers/goldenaudiobooks.py
index 5f5b3f8..6c5a8df 100644
--- a/audiobooker/scrappers/goldenaudiobooks.py
+++ b/audiobooker/scrappers/goldenaudiobooks.py
@@ -1,240 +1,60 @@
-import requests
+from dataclasses import dataclass
+
 from sitemapparser import SiteMapParser
 
 from audiobooker.base import AudioBook, BookAuthor
 from audiobooker.scrappers import AudioBookSource
+from audiobooker.utils import get_soup, normalize_name
 
 
-class GoldenAudioBooksAudioBook(AudioBook):
-    base_url = "https://goldenaudiobooks.com"
+@dataclass
+class GoldenAudioBooksAudioBook:
+    url: str
 
     def parse_page(self):
-        author_name = "goldenaudiobooks"
-        title = self.soup.find("h1", {"class": "entry-title"}).text
-        content = self.soup.find("div", {"class": "entry-content"})
-        desc = content.find("p").text
+        soup = get_soup(self.url)
+        title = soup.find("h1", {"class": "title-page"}).text.replace(" Audiobook", "")
+        tags = [t for t in soup.find("span", {"class": "post-meta-category"}).text.split(" ") if len(t) > 2]
+        img = soup.find("figure").find("img")["src"]
+
+        authors = []
+
         if "–" in title:
             pts = title.split("–")
             author_name = pts[0]
             title = " ".join(pts[1:])
 
-        img = content.find("img")["data-src"]
-        names = author_name.strip().split(" ")
-        if len(names):
-            first_name = names[0].strip()
-            last_name = " ".join(names[1:]).strip()
-            if not last_name:
-                last_name = first_name
-                first_name = ""
-        else:
-            first_name = ""
-            last_name = author_name.strip()
-
-        authors = [BookAuthor(first_name=first_name, last_name=last_name)]
-
-        streams = [s.find("a").text for s in content.find_all("audio")]
-
-        return {"description": desc,
-                "authors": authors,
-                "title": title.strip(),
-                "streams": streams,
-                "rating": 0,
-                "tags": [],
-                "img": img}
-
-    def from_page(self):
-        data = self.parse_page()
-        if not self.title:
-            self.title = data["title"]
-        if not self._description:
-            self._description = data["description"]
-
-        self.img = data.get("img", self.img)
-        for tag in data["tags"]:
-            if tag.as_json not in self._tags:
-                self._tags.append(tag.as_json)
-        for author in data["authors"]:
-            if author.as_json not in self._authors:
-                self._authors.append(author.as_json)
-        self._stream_list = data["streams"]
-        self.raw.update(data)
-
-    def __repr__(self):
-        return "GoldenAudioBooksAudioBook(" + str(
-            self) + ", " + self.book_id + ")"
+            f, l = normalize_name(author_name)
 
+            authors = [BookAuthor(first_name=f, last_name=l)]
 
-class GoldenAudioBooks(AudioBookSource):
-    base_url = "https://goldenaudiobooks.com"
-    popular_url = "https://goldenaudiobooks.com/category/bestsellers"
-
-    @classmethod
-    def scrap_tags(cls):
-        bucket = {}
-        sm = SiteMapParser('https://goldenaudiobook.co/category-sitemap.xml')  # reads /sitemap.xml
-        urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
-        for url in urls:
-            url = str(url)
-            title = url.strip("/").split("/")[-1].replace("-", " ").title()
-
-            bucket[title] = url
-
-        return bucket
-
-    @property
-    def tag_pages(self):
-        if self._tag_pages is None:
-            try:
-                self._tag_pages = self.scrap_tags()
-            except Exception as e:
-                self._tag_pages = {
-                    'Action': 'https://goldenaudiobooks.com/category/action/',
-                    'Adults': 'https://goldenaudiobooks.com/category/adults-audios/',
-                    'Adventure': 'https://goldenaudiobooks.com/category/adventure/',
-                    'Autobiography & Biographies': 'https://goldenaudiobooks.com/category/autobiography-biographies/',
-                    'Bestsellers': 'https://goldenaudiobooks.com/category/bestsellers/',
-                    'Business': 'https://goldenaudiobooks.com/category/business/',
-                    'Children': 'https://goldenaudiobooks.com/category/children/',
-                    'Classic': 'https://goldenaudiobooks.com/category/classic/',
-                    'Crime': 'https://goldenaudiobooks.com/category/crime/',
-                    'Fantasy': 'https://goldenaudiobooks.com/category/audio-fantasy/',
-                    'General Fiction': 'https://goldenaudiobooks.com/category/general-fiction/',
-                    'Historical Fiction': 'https://goldenaudiobooks.com/category/historical-fiction/',
-                    'History': 'https://goldenaudiobooks.com/category/history/',
-                    'Horror': 'https://goldenaudiobooks.com/category/horror/',
-                    'Humor': 'https://goldenaudiobooks.com/category/humors/',
-                    'Literary': 'https://goldenaudiobooks.com/category/literary/',
-                    'Literature & Fiction': 'https://goldenaudiobooks.com/category/literature-fiction/',
-                    'Mystery': 'https://goldenaudiobooks.com/category/mystery/',
-                    'Nonfiction': 'https://goldenaudiobooks.com/category/nonfiction/',
-                    'Novel': 'https://goldenaudiobooks.com/category/novel/',
-                    'Other': 'https://goldenaudiobooks.com/category/other/',
-                    'Paranormal': 'https://goldenaudiobooks.com/category/paranormal-audiobooks/',
-                    'Philosophy': 'https://goldenaudiobooks.com/category/philosophy/',
-                    'Romance': 'https://goldenaudiobooks.com/category/audiobooks-romance/',
-                    'Sci-Fi': 'https://goldenaudiobooks.com/category/science-fiction-audiobooks/',
-                    'Science': 'https://goldenaudiobooks.com/category/science/',
-                    'Self-help': 'https://goldenaudiobooks.com/category/self-help/',
-                    'Short Story': 'https://goldenaudiobooks.com/category/short-story/',
-                    'Spiritual & Religious': 'https://goldenaudiobooks.com/category/spiritual-religious/',
-                    'Sports': 'https://goldenaudiobooks.com/category/sports/',
-                    'Suspense': 'https://goldenaudiobooks.com/category/suspense/',
-                    'Teen & Young Adult': 'https://goldenaudiobooks.com/category/teen-and-young-adult/',
-                    'Thriller': 'https://goldenaudiobooks.com/category/thriller/',
-                    'Uncategorized': 'https://goldenaudiobooks.com/category/uncategorized/',
-                    'Westerns': 'https://goldenaudiobooks.com/category/westerns/'}
-        return self._tag_pages or {}
-
-    @classmethod
-    def _parse_page(cls, html, limit=-1):
-        soup = cls._get_soup(html)
-        for entry in soup.find_all("div", {"class": "columns postbox"}):
-            a = entry.find("a")
-            img = entry.find("img")["data-src"]
-            url = a["href"]
-            title = a["title"]
-            tags = []
-            for a in entry.find("span", {"class": "cat-links"}). \
-                    find_all("a"):
-                tags.append({"name": a.text, "url": a["href"]})
-            yield GoldenAudioBooksAudioBook(from_data={
-                "title": title,
-                "url": url,
-                "img": img,
-                "tags": tags
-            })
-        if limit == -1 or limit > 0:
-            limit -= 1
-            next_page = soup.find("a", {"class": "next page-numbers"})
-            if next_page:
-                html = requests.get(next_page["href"]).text
-                for ntry in cls._parse_page(html, limit=limit):
-                    yield ntry
-
-    @classmethod
-    def scrap_by_tag(cls, tag, limit=-1, offset=0):
-        if tag in cls._tag_pages:
-            url = cls._tag_pages[tag]
-            html = requests.get(url).text
-            for book in cls._parse_page(html):
-                # TODO inject tag in book obj
-                yield book
-        else:
-            for book in cls.search_audiobooks(tag=tag):
-                yield book
-
-    @classmethod
-    def scrap_popular(cls, limit=-1, offset=0):
-        html = requests.get(cls.popular_url).text
-        return cls._parse_page(html)
-
-    @classmethod
-    def search_audiobooks(cls, since=None, author=None, title=None, tag=None,
-                          limit=25):
-        """
-        Args:
-            since: a UNIX timestamp; returns all projects cataloged since that time
-            author: all records by that author last name
-            title: all matching titles
-            tag: all projects of the matching tag
-        Yields:
-            AudioBook objects
-        """
-        query = ""
-        if title:
-            query += title + " "
-        if tag:
-            query += tag + " "
-        if author:
-            query += author + " "
-        html = requests.get(cls.base_url,
-                            params={"s": query}).text
-        return cls._parse_page(html)
-
-    @classmethod
-    def get_audiobook(cls, book_id):
-        url = cls.base_url + '/' + book_id
-        book = GoldenAudioBooksAudioBook(url=url)
-        return book
-
-    @classmethod
-    def scrap_all_audiobooks(cls, limit=-1, offset=0):
-        sm = SiteMapParser('https://goldenaudiobook.co/post-sitemap.xml')  # reads /sitemap.xml
-        urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
-        for url in urls:
-            url = str(url)
-            title = url.strip("/").split("/")[-1].replace("-", " ").title()
-            yield GoldenAudioBooksAudioBook(url=url, title=title)
-
-        sm = SiteMapParser('https://goldenaudiobook.co/post-sitemap2.xml')  # reads /sitemap.xml
-        urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
-        for url in urls:
-            url = str(url)
-            title = url.strip("/").split("/")[-1].replace("-", " ").title()
-            yield GoldenAudioBooksAudioBook(url=url, title=title)
-
+        streams = [s.find("a").text for s in soup.find_all("audio")]
 
-if __name__ == "__main__":
-    from pprint import pprint
+        return AudioBook(
+            title=title.strip(),
+            streams=streams,
+            image=img,
+            tags=tags,
+            authors=authors,
+            language="en"
+        )
 
-    book = GoldenAudioBooks.get_audiobook('andy-weir-artemis-audiobook/')
-    # pprint(book.parse_page())
-    for a in book.authors:
-        # print(a.as_json)
-        pass
 
-    tags = GoldenAudioBooks.scrap_tags()
-    print(tags)
+class GoldenAudioBooks(AudioBookSource):
+    base_url = "https://goldenaudiobook.co/"
 
-    for book in GoldenAudioBooks.search_audiobooks(author="Lovecraft"):
-        pprint(book.as_json)
+    def iterate_all(self):
+        for u in ['https://goldenaudiobook.co/post-sitemap.xml',
+                  'https://goldenaudiobook.co/post-sitemap2.xml']:
+            sm = SiteMapParser(u)  # reads /sitemap.xml
+            urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
+            for url in urls:
+                yield GoldenAudioBooksAudioBook(url=str(url)).parse_page()
 
-    scraper = GoldenAudioBooks()
-    for book in scraper.scrap_popular():
-        pprint(book.as_json)
 
-    for book in scraper.scrap_by_tag("science-fiction-audiobooks"):
-        pprint(book.as_json)
+if __name__ == "__main__":
+    from pprint import pprint
 
-    for book in scraper.scrap_all_audiobooks():
-        pprint(book.as_json)
+    scraper = GoldenAudioBooks()
+    for book in scraper.iterate_all():
+        pprint(book)
diff --git a/audiobooker/scrappers/hpaudiotales.py b/audiobooker/scrappers/hpaudiotales.py
new file mode 100644
index 0000000..9d55043
--- /dev/null
+++ b/audiobooker/scrappers/hpaudiotales.py
@@ -0,0 +1,48 @@
+from dataclasses import dataclass
+
+from sitemapparser import SiteMapParser
+
+from audiobooker.base import AudioBook
+from audiobooker.scrappers import AudioBookSource
+from audiobooker.utils import get_soup
+
+
+@dataclass
+class HPTalesAudioBook:
+    url: str
+
+    def parse_page(self):
+        soup = get_soup(self.url)
+        title = soup.find("h1", {"class": "entry-title"}).text
+        tags = ["Harry Potter", "Fantasy", "Magic"]
+
+        d = soup.find("div", {"class": "audioigniter-root"})["data-tracks-url"]
+        data = AudioBookSource.session.get(d).json()
+        tags += list(set(s["subtitle"] for s in data))
+
+        streams = [s["audio"] for s in data]
+
+        return AudioBook(
+            title=title.strip(),
+            streams=streams,
+            tags=tags,
+            language="en"
+        )
+
+
+class HPTalesAudioBooks(AudioBookSource):
+
+    @classmethod
+    def iterate_all(cls, limit=-1, offset=0):
+        sm = SiteMapParser('https://hpaudiotales.com/wp-sitemap-posts-post-1.xml')  # reads /sitemap.xml
+        urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
+        for url in urls:
+            yield HPTalesAudioBook(url=str(url)).parse_page()
+
+
+if __name__ == "__main__":
+    from pprint import pprint
+
+    scraper = HPTalesAudioBooks()
+    for book in scraper.iterate_all():
+        pprint(book)
diff --git a/audiobooker/scrappers/librivox.py b/audiobooker/scrappers/librivox.py
index 2352a29..bdcd7cf 100644
--- a/audiobooker/scrappers/librivox.py
+++ b/audiobooker/scrappers/librivox.py
@@ -1,116 +1,116 @@
+from typing import Iterable
+
 import feedparser
 
-from audiobooker.base import AudioBook, BookAuthor, session
+from audiobooker.base import AudioBook, BookAuthor, AudiobookNarrator
 from audiobooker.scrappers import AudioBookSource
-
-
-class LibrivoxAudioBook(AudioBook):
-    def __init__(self, title="", authors=None, description="", tags=None,
-                 book_id="", runtime=0, url="", img="", rss_url="",
-                 copyright_year=0, language='english', from_data=None):
-        self.rss_url = rss_url
-        self.copyright_year = copyright_year
-        AudioBook.__init__(self, title, authors, description, tags,
-                           book_id, runtime, url, img, language, from_data=from_data)
-
-    @property
-    def description(self):
-        return self._description.replace("<p>", "").replace("</p>", "") \
-            .replace("(summary from Wikipedia)", "").strip().rstrip("\"") \
-            .lstrip("\"")
-
-    @property
-    def rss_data(self):
-        return feedparser.parse(self.rss_url)
-
-    @property
-    def streamer(self):
-        for stream in self.rss_data["entries"]:
-            try:
-                yield stream['media_content'][0]["url"]
-            except Exception as e:
-                print(e)
-                continue
-
-    def from_json(self, json_data):
-        AudioBook.from_json(self, json_data)
-        self.url = json_data.get("url_librivox", self.url)
-        self.runtime = json_data.get("totaltimesecs", self.runtime)
-        self.copyright_year = json_data.get("copyright_year",
-                                            self.copyright_year)
-        self.rss_url = json_data.get("url_rss", self.rss_url)
-
-    def __repr__(self):
-        return "LibrivoxAudioBook(" + str(self) + ", " + self.book_id + ")"
+from audiobooker.utils import normalize_name
 
 
 class Librivox(AudioBookSource):
     base_url = "https://librivox.org/api/feed/audiobooks/?%s&format=json"
     authors_url = "https://librivox.org/api/feed/authors/?%s&format=json"
 
-    @classmethod
-    def scrap_all_audiobooks(cls, limit=2000, offset=0):
-        """
-        Generator, yields LibrivoxAudioBook objects
-        Args:
-            limit:
-            offset:
-        """
-        url = cls.base_url % \
-              ("limit=" + str(limit) + "offset=" + str(offset) + "&extended=1")
-        json_data = session.get(url).json()['books']
-        for k in json_data:
-            yield LibrivoxAudioBook(from_data=json_data[k])
-
-    @classmethod
-    def get_audiobook(cls, book_id):
-        url = cls.base_url % ("id=" + str(book_id),)
-        json_data = session.get(url).json()['books']
-        return LibrivoxAudioBook(from_data=json_data[0])
-
-    @classmethod
-    def get_author(cls, author_id):
-        url = cls.authors_url % ("id=" + str(author_id),)
-        json_data = session.get(url).json()["authors"]
-        return BookAuthor(from_data=json_data[0])
-
-    @classmethod
-    def search_audiobooks(cls, since=None, author=None, title=None, tag=None,
-                          limit=25):
-        """
-        Args:
-            since: a UNIX timestamp; returns all projects cataloged since that time
-            author: all records by that author last name
-            title: all matching titles
-            tag: all projects of the matching tag
-
-        Returns:
-            list : list of LibrivoxAudioBook objects
-        """
-        searchterm = []
-        if limit:
-            # TODO validate
-            searchterm.append("limit=" + str(limit))
-        if since:
-            # TODO validate
-            searchterm.append("since=" + since)
-        if author:
-            searchterm.append("author=" + author)
-        if title:
-            searchterm.append("title=" + title)
-        if tag:
-            # TODO validate
-            searchterm.append("tag=" + tag)
-        if not searchterm:
-            raise TypeError
-        searchterm = "&".join(searchterm)
-        url = cls.base_url % (searchterm,)
-        json_data = session.get(url).json()
-        if "error" in json_data:
-            return []
-        return [LibrivoxAudioBook(from_data=a) for a in json_data["books"]]
+    def iterate_all(self, offset=0, max_offset=100000):
+        url = "https://librivox.org/api/feed/audiobooks"
+        params = {
+            "limit": 50,
+            "offset": offset,
+            "extended": 1,
+            "format": "json"
+        }
+        json_data = AudioBookSource.session.get(url, params=params).json()
+        for k in json_data['books']:
+            for book in self._parse_res(k):
+                yield book
+        if offset < max_offset:
+            offset += 50
+            for k in self.iterate_all(offset):
+                yield k
+
+    def search_by_author(self, query) -> Iterable[AudioBook]:
+        url = "https://librivox.org/api/feed/audiobooks"
+        params = {
+            "author": query,
+            "limit": 50,
+            "extended": 1,
+            "format": "json"
+        }
+        json_data = AudioBookSource.session.get(url, params=params).json()
+        for k in json_data['books']:
+            for book in self._parse_res(k):
+                yield book
+
+    def search_by_narrator(self, query) -> Iterable[AudioBook]:
+        url = "https://librivox.org/api/feed/audiobooks"
+        params = {
+            "reader": query,
+            "limit": 50,
+            "extended": 1,
+            "format": "json"
+        }
+        json_data = AudioBookSource.session.get(url, params=params).json()
+        for k in json_data['books']:
+            for book in self._parse_res(k):
+                yield book
+
+    def search_by_tag(self, query) -> Iterable[AudioBook]:
+        url = "https://librivox.org/api/feed/audiobooks"
+        params = {
+            "tag": query,
+            "limit": 50,
+            "extended": 1,
+            "format": "json"
+        }
+        json_data = AudioBookSource.session.get(url, params=params).json()
+        for k in json_data['books']:
+            for book in self._parse_res(k):
+                yield book
+
+    def search_by_title(self, query) -> Iterable[AudioBook]:
+        url = "https://librivox.org/api/feed/audiobooks"
+        params = {
+            "title": query,
+            "limit": 50,
+            "extended": 1,
+            "format": "json"
+        }
+        json_data = AudioBookSource.session.get(url, params=params).json()
+        for k in json_data['books']:
+            for book in self._parse_res(k):
+                yield book
+
+    def _parse_res(self, k):
+        rss = feedparser.parse(k['url_rss'])
+        streams = [stream['media_content'][0]["url"]
+                   for stream in rss["entries"]]
+
+        for idx, s in enumerate(k["sections"]):
+
+            if len(s["readers"]) > 1:
+                narrator = AudiobookNarrator(last_name="Various")
+            else:
+                f, l = normalize_name(s["readers"][0]['display_name'])
+                narrator = AudiobookNarrator(last_name=l, first_name=f)
+
+            yield AudioBook(
+                streams=[streams[idx]],
+                narrator=narrator,
+                tags=[g["name"] for g in k["genres"]],
+                authors=[BookAuthor(first_name=a["first_name"],
+                                    last_name=a["last_name"])
+                         for a in k["authors"]],
+                title=k["title"] + " | " + s["title"],
+                description=k["description"],
+                year=int(k['copyright_year']),
+                runtime=s['playtime'],
+                language=k["language"]  # TODO - convert to lang code
+            )
 
 
 if __name__ == "__main__":
-    book = Librivox.search_audiobooks(title="War of the worlds")[0]
-    book.play_mplayer()
+    l = Librivox()
+    for book in l.search_by_title("Art of War"):
+        print(book)
+    for book in l.search_by_author("Lovecraft"):
+        print(book)
diff --git a/audiobooker/scrappers/loyalbooks.py b/audiobooker/scrappers/loyalbooks.py
index d9c0c90..5d633ed 100644
--- a/audiobooker/scrappers/loyalbooks.py
+++ b/audiobooker/scrappers/loyalbooks.py
@@ -1,417 +1,83 @@
 import feedparser
 from sitemapparser import SiteMapParser
 
-from audiobooker.base import AudioBook, BookTag, BookAuthor
+from audiobooker.base import AudioBook, BookAuthor
 from audiobooker.scrappers import AudioBookSource
-
-
-class LoyalBooksAudioBook(AudioBook):
-    base_url = "http://www.loyalbooks.com"
-
-    def __init__(self, title="", authors=None, description="", tags=None,
-                 book_id="", runtime=0, url="", rss_url="", img="", rating=0,
-                 language='english', from_data=None):
-        self.rss_url = rss_url or url + "/feed"
-        self.rating = rating
-        AudioBook.__init__(self, title, authors, description, tags,
-                           book_id, runtime, url, img, language)
-        self.from_rss()
-
-    def parse_page(self):
-        title = self.soup.find("span", {"itemprop": "name"}).text
-        description = self.soup.find("font",
-                                     {"class": "book-description"}).text
-        if self.soup.find(id="star1") is not None:
-            rating = 1
-        elif self.soup.find(id="star2") is not None:
-            rating = 2
-        elif self.soup.find(id="star3") is not None:
-            rating = 3
-        elif self.soup.find(id="star4") is not None:
-            rating = 4
-        elif self.soup.find(id="star5") is not None:
-            rating = 5
-        else:
-            rating = 0
-        author = self.soup.find("font", {"class": "book-author"})
-        author_name = author.text.replace("By: ", "")
-
-        names = author_name.split(" ")
-        if len(names):
-            first_name = names[0].strip()
-            last_name = " ".join(names[1:]).strip()
-            if not last_name:
-                last_name = first_name
-                first_name = ""
-        else:
-            first_name = ""
-            last_name = author_name.strip()
-
-        author_url = author.find("a")
-        if author_url:
-            author_url = self.base_url + author_url["href"]
-
-        authors = [BookAuthor(url=author_url, first_name=first_name,
-                              last_name=last_name)]
-
-        tags = []
-        tags_table = self.soup.find(summary="Genres for this book")
-        if tags_table:
-            tags_urls = tags_table.find_all("a")
-            for a in tags_urls:
-                url = self.base_url + a["href"]
-                tag = a.text.strip()
-                tag_id = LoyalBooks.get_tag_id(tag)
-                tags.append(BookTag(name=tag, url=url,
-                                    tag_id=tag_id))
-
-        img = self.soup.find("img", {"itemprop": "image", "class": "cover"})
-        if img:
-            img = self.base_url + img["src"]
-        return {"description": description, "rating": rating, "tags": tags,
-                "authors": authors, "title": title, "img": img}
-
-    @property
-    def rss_data(self):
-        return feedparser.parse(self.rss_url)
-
-    @property
-    def streamer(self):
-        for stream in self.rss_data["entries"]:
-            try:
-                for url in stream["links"]:
-                    if url["type"] == 'audio/mpeg':
-                        yield url["href"]
-            except Exception as e:
+from audiobooker.utils import normalize_name
+
+
+def calc_runtime(rss_data):
+    runtime = rss_data["itunes_duration"].split(":")
+    if len(runtime) == 1:  # seconds
+        return int(runtime[0])
+    elif len(runtime) == 2:  # minutes : seconds
+        return int(runtime[1]) + (int(runtime[0]) * 60)
+    elif len(runtime) == 3:  # hours : minutes : seconds
+        return int(runtime[2]) + (int(runtime[1]) * 60) + \
+            (int(runtime[0]) * 120)
+    return 0
+
+
+def from_rss(rss_url):
+    data = feedparser.parse(rss_url)
+    for rss in data["entries"]:
+        authors = []
+        streams = [s['href'] for s in rss["links"]
+                   if "audio" in s["type"]]
+        for rss_data in rss["authors"]:
+            if not rss_data:
                 continue
-
-    def from_json(self, json_data):
-        AudioBook.from_json(self, json_data)
-        self.rss_url = json_data.get("url_rss", self.rss_url)
-        self.rating = json_data.get("rating", self.rating)
-
-    def calc_runtime(self, data=None):
-        data = data or self.rss_data["entries"]
-        for rss_data in data:
-            runtime = rss_data["itunes_duration"].split(":")
-            if len(runtime) == 1:  # seconds
-                self.runtime += int(runtime[0])
-            elif len(runtime) == 2:  # minutes : seconds
-                self.runtime += int(runtime[1]) + (int(runtime[0]) * 60)
-            elif len(runtime) == 3:  # hours : minutes : seconds
-                self.runtime += int(runtime[2]) + (int(runtime[1]) * 60) + \
-                                (int(runtime[0]) * 120)
-
-    def from_rss(self):
-        rss = self.rss_data["entries"]
-
-        if self.runtime < 1:
-            self.calc_runtime()
-
-        if not self.url:
-            self.url = rss[0]["link"]
-
-        for rss_data in rss:
-            first_name = ""
-            last_name = rss_data["author"]
-            names = last_name.split(" ")
-            if len(names) > 1:
-                first_name = names[0].strip()
-                last_name = " ".join(names[1:]).strip()
-                if not last_name:
-                    last_name = first_name
-                    first_name = ""
-            author = BookAuthor(from_data={"first_name": first_name,
-                                           "last_name": last_name})
-            if author.as_json not in self._authors:
-                self._authors.append(author.as_json)
-
-    def from_page(self):
-        data = self.parse_page()
-        if self.rating < 1:
-            self.rating = data["rating"]
-        if not self.title:
-            self.title = data["title"]
-        if not self._description:
-            self._description = data["description"]
-
-        self.img = data.get("img", self.img)
-        for tag in data["tags"]:
-            if tag.as_json not in self._tags:
-                self._tags.append(tag.as_json)
-        for author in data["authors"]:
-            if author.as_json not in self._authors:
-                self._authors.append(author.as_json)
-
-    def __repr__(self):
-        return "LoyalBooksAudioBook(" + str(self) + ", " + self.book_id + ")"
+            f, l = normalize_name(rss_data["name"])
+            author = BookAuthor(first_name=f, last_name=l)
+            authors.append(author)
+        yield AudioBook(
+            language=data["feed"]["language"],
+            description=data["feed"]["summary"],
+            tags=[t['term'] for t in data["feed"]["tags"]],
+            image=data["feed"]["image"]["href"],
+            streams=streams,
+            title=data["feed"]["title"] + " | " + rss["title"],
+            runtime=calc_runtime(rss),
+            authors=authors
+        )
 
 
 class LoyalBooks(AudioBookSource):
-    base_url = "https://www.loyalbooks.com"
-    popular_url = "https://www.loyalbooks.com"
-    tags_url = "https://www.loyalbooks.com/tag-menu"
-    search_url = "https://www.loyalbooks.com/search?q=%s"
-
-    @classmethod
-    def scrap_tags(cls):
-        soup = cls._get_soup(cls._get_html(cls.tags_url))
-        urls = soup.find("div", {"class": "left"}).find_all("a")
-        bucket = {}
-        for url in urls:
-            tag = url.text
-            url = url["href"]
-            if url.startswith("/tag"):
-                url = "http://www.loyalbooks.com" + url
-                bucket[tag] = url
-        cls._tags = list(bucket.keys())
-        return bucket
-
-    @property
-    def tag_pages(self):
-        if LoyalBooks._tag_pages is None:
-            try:
-                LoyalBooks._tag_pages = LoyalBooks.scrap_tags()
-            except Exception as e:
-                LoyalBooks._tag_pages = {
-                    'Adventure': 'http://www.loyalbooks.com/tag/Adventure',
-                    'Advice': 'http://www.loyalbooks.com/tag/Advice',
-                    'Ancient Texts': 'http://www.loyalbooks.com/tag/Ancient_Texts',
-                    'Animals': 'http://www.loyalbooks.com/tag/Animals',
-                    'Art': 'http://www.loyalbooks.com/tag/Art',
-                    'Biography': 'http://www.loyalbooks.com/tag/Biography',
-                    'Children': 'http://www.loyalbooks.com/tag/Children',
-                    'Classics (antiquity)': 'http://www.loyalbooks.com/tag/Classics_antiquity',
-                    'Comedy': 'http://www.loyalbooks.com/tag/Comedy',
-                    'Cookery': 'http://www.loyalbooks.com/tag/Cookery',
-                    'Dramatic Works': 'http://www.loyalbooks.com/tag/Dramatic_Works',
-                    'Economics': 'http://www.loyalbooks.com/tag/Economics_Political_Economy',
-                    'Epistolary fiction': 'http://www.loyalbooks.com/tag/Epistolary_fiction',
-                    'Essay/Short nonfiction': 'http://www.loyalbooks.com/tag/Essay_Short_nonfiction',
-                    'Fairy tales': 'http://www.loyalbooks.com/tag/Fairy_tales',
-                    'Fantasy': 'http://www.loyalbooks.com/tag/Fantasy',
-                    'Fiction': 'http://www.loyalbooks.com/tag/Fiction',
-                    'Historical Fiction': 'http://www.loyalbooks.com/tag/Historical_Fiction',
-                    'History': 'http://www.loyalbooks.com/tag/History',
-                    'Holiday': 'http://www.loyalbooks.com/tag/Holiday',
-                    'Horror/Ghost stories': 'http://www.loyalbooks.com/tag/Horror_Ghost_stories',
-                    'Humor': 'http://www.loyalbooks.com/tag/Humor',
-                    'Instruction': 'http://www.loyalbooks.com/tag/Instruction',
-                    'Languages': 'http://www.loyalbooks.com/tag/Languages',
-                    'Literature': 'http://www.loyalbooks.com/tag/Literature',
-                    'Memoirs': 'http://www.loyalbooks.com/tag/Memoirs',
-                    'Music': 'http://www.loyalbooks.com/tag/Music',
-                    'Mystery': 'http://www.loyalbooks.com/tag/Mystery',
-                    'Myths/Legends': 'http://www.loyalbooks.com/tag/Myths_Legends',
-                    'Nature': 'http://www.loyalbooks.com/tag/Nature',
-                    'Non-fiction': 'http://www.loyalbooks.com/tag/Non-fiction',
-                    'Philosophy': 'http://www.loyalbooks.com/tag/Philosophy',
-                    'Play': 'http://www.loyalbooks.com/tag/Play',
-                    'Poetry': 'http://www.loyalbooks.com/tag/Poetry',
-                    'Politics': 'http://www.loyalbooks.com/tag/Politics',
-                    'Psychology': 'http://www.loyalbooks.com/tag/Psychology',
-                    'Religion': 'http://www.loyalbooks.com/tag/Religion',
-                    'Romance': 'http://www.loyalbooks.com/tag/Romance',
-                    'Satire': 'http://www.loyalbooks.com/tag/Satire',
-                    'Science': 'http://www.loyalbooks.com/tag/Science',
-                    'Science fiction': 'http://www.loyalbooks.com/tag/Science_fiction',
-                    'Sea stories': 'http://www.loyalbooks.com/tag/Sea_stories',
-                    'Self Published': 'http://www.loyalbooks.com/tag/Self-Published',
-                    'Short stories': 'http://www.loyalbooks.com/tag/Short_stories',
-                    'Spy stories': 'http://www.loyalbooks.com/tag/Spy_stories',
-                    'Teen/Young adult': 'http://www.loyalbooks.com/tag/Teen_Young_adult',
-                    'Tragedy': 'http://www.loyalbooks.com/tag/Tragedy',
-                    'Travel': 'http://www.loyalbooks.com/tag/Travel',
-                    'War stories': 'http://www.loyalbooks.com/tag/War_stories',
-                    'Westerns': 'http://www.loyalbooks.com/tag/Westerns'}
-        return self._tag_pages or {}
-
-    @property
-    def tags(self):
-        if LoyalBooks._tags is None:
-            try:
-                LoyalBooks._tags = list(self.tag_pages.keys())
-            except Exception as e:
-                LoyalBooks._tags = ['Advice', 'Instruction',
-                                    'Ancient Texts',
-                                    'Biography', 'Memoirs', 'Languages',
-                                    'Myths/Legends', 'Holiday', 'Art',
-                                    'Politics', 'Short stories', 'Romance',
-                                    'Essay/Short nonfiction', 'Fiction',
-                                    'Epistolary fiction', 'Science',
-                                    'Nature', 'Dramatic Works',
-                                    'Spy stories', 'History', 'Non-fiction',
-                                    'Historical Fiction', 'Play', 'Children',
-                                    'Satire', 'Humor',
-                                    'Classics (antiquity)', 'Travel',
-                                    'Religion', 'Adventure', 'Animals',
-                                    'Psychology', 'Sea stories',
-                                    'Horror/Ghost stories', 'Fantasy',
-                                    'Cookery', 'Poetry', 'Self Published',
-                                    'Westerns', 'Comedy', 'Music',
-                                    'Economics', 'Fairy tales', 'Tragedy',
-                                    'Teen/Young adult', 'Literature',
-                                    'War stories', 'Science fiction',
-                                    'Philosophy', 'Mystery']
-        return sorted(self._tags) or []
-
-    @classmethod
-    def _parse_book_div(cls, book):
-        try:
-            url = cls.base_url + book.find("a")[
-                "href"].strip()
-            img = book.find("img")
-            if img:
-                img = cls.base_url + img["src"].strip()
-            name = book.find("b")
-            if name:
-                name = name.text.strip()
-                author = book.text.replace(name, "").strip()
-            else:
-                name, author = book.find("div", {"class": "s-left"}) \
-                    .text.split(" By: ")
-            if book.find(id="star1") is not None:
-                rating = 1
-            elif book.find(id="star2") is not None:
-                rating = 2
-            elif book.find(id="star3") is not None:
-                rating = 3
-            elif book.find(id="star4") is not None:
-                rating = 4
-            elif book.find(id="star5") is not None:
-                rating = 5
-            else:
-                rating = 0
-            names = author.split(" ")
-            if len(names):
-                first_name = names[0].strip()
-                last_name = " ".join(names[1:]).strip()
-                if not last_name:
-                    last_name = first_name
-                    first_name = ""
-            else:
-                first_name = ""
-                last_name = author.strip()
-            return LoyalBooksAudioBook(title=name.strip(), url=url,
-                                       img=img or "", rating=rating,
-                                       authors=[BookAuthor(
-                                           first_name=first_name,
-                                           last_name=last_name).as_json])
-        except Exception as e:
-            pass  # probably an add
-        return None
 
-    @classmethod
-    def scrap_by_tag(cls, tag, limit=-1, offset=0):
-        """
-        Generator, yields AudioBook objects
-        """
-        if tag not in cls._tag_pages:
-            cls._tag_pages = cls.scrap_tags()
-        if tag not in cls._tag_pages:
-            return
-
-        url = cls._tag_pages[tag] + "?page=" + str(offset)
-        limit = int(limit)
-        soup = cls._get_soup(cls._get_html(url))
-        el = soup.find("table", {"class": "layout2-blue"})
-        if el is None:
-            el = soup.find("table", {"class": "layout3"})
-
-        books = el.find_all("td", {"class": "layout2-blue"})
-        if not len(books):
-            books = el.find_all("td", {"class": "layout3"})
-
-        for book in books:
-            book = cls._parse_book_div(book)
-            if book is None:
-                continue
-            book._tags = [BookTag(name=tag, url=cls._tag_pages[tag],
-                                  tag_id=cls.get_tag_id(tag)).as_json],
-            yield book
-
-        # check if last page reached
-        pages = soup.find("div", {"class": "result-pages"}).text
-        if ">" not in pages:
-            return
-
-        # check if limit crawled
-        if limit > 0 and int(offset) > limit:
-            return
-
-        # crawl next page
-        for book in cls.scrap_by_tag(tag, offset + 1, limit):
-            yield book
-
-    @classmethod
-    def scrap_popular(cls, limit=-1, offset=0):
-        """
-        Generator, yields AudioBook objects
-        """
-        soup = cls._get_soup(cls._get_html(cls.popular_url))
-        books = soup.find(summary="Audio books").find_all("td")
-        for b in books:
-            b = cls._parse_book_div(b)
-            if b is not None:
-                yield b
-
-    @classmethod
-    def search_audiobooks(cls, since=None, author=None, title=None, tag=None,
-                          limit=25):
-        """
-        Args:
-            since: a UNIX timestamp; returns all projects cataloged since that time
-            author: all records by that author last name
-            title: all matching titles
-            tag: all projects of the matching tag
-
-        Yields:
-            AudioBook objects
-        """
-        sm = SiteMapParser(f"{LoyalBooks.base_url}/sitemap.xml")  # reads /sitemap.xml
+    def search(self, query):
+        sm = SiteMapParser("https://www.loyalbooks.com/sitemap.xml")  # reads /sitemap.xml
         for url in sm.get_urls():
             url = str(url)
-            if not url.startswith(f"{LoyalBooks.base_url}/book/"):
+            if not url.startswith("https://www.loyalbooks.com/book/"):
                 continue
             t = url.split("/")[-1].replace("-", " ").lower()
-            if author and author.lower() in t:
-                yield LoyalBooksAudioBook(url=url, title=t)
-            elif title and title.lower() in t:
-                yield LoyalBooksAudioBook(url=url, title=t)
+            if query.lower() in t:
+                yield from from_rss(url + "/feed")
+
+    def search_by_narrator(self, query):
+        return []  # narrator info unavailable
+
+    def search_by_title(self, query):
+        return self.search(query)
 
-    @classmethod
-    def get_audiobook(cls, book_id):
-        url = cls.base_url + '/book/' + book_id
-        return LoyalBooksAudioBook(url=url)
+    def search_by_author(self, query):
+        return self.search(query)
 
-    def scrap_all_audiobooks(self, limit=-1, offset=0):
+    def iterate_all(self):
         """
         Generator, yields AudioBook objects
         """
-        sm = SiteMapParser('https://www.loyalbooks.com/sitemap.xml')  # reads /sitemap.xml
-        urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
-        for url in urls:
+        sm = SiteMapParser("https://www.loyalbooks.com/sitemap.xml")  # reads /sitemap.xml
+        for url in sm.get_urls():
             url = str(url)
             if not url.startswith("https://www.loyalbooks.com/book/"):
                 continue
-            title = url.split("/")[-1].replace("-", " ").title()
-            yield LoyalBooksAudioBook(url=url, title=title)
+            yield from from_rss(url + "/feed")
 
 
 if __name__ == "__main__":
     from pprint import pprint
 
-    for book in LoyalBooks.search_audiobooks(author="Lovecraft"):
-        pprint(book.as_json)
-
-    scraper = LoyalBooks()
-    for book in scraper.scrap_popular():
-        pprint(book.as_json)
-
-    for book in scraper.scrap_by_tag("Science fiction"):
-        pprint(book.as_json)
+    for book in LoyalBooks().search_by_author("lovecraft"):
+        print(book)
 
-    for book in scraper.scrap_all_audiobooks():
-        pprint(book.as_json)
-    pprint(scraper.scrap_tags())
-    pprint(scraper.tags)
diff --git a/audiobooker/scrappers/sharedaudiobooks.py b/audiobooker/scrappers/sharedaudiobooks.py
new file mode 100644
index 0000000..cea4e09
--- /dev/null
+++ b/audiobooker/scrappers/sharedaudiobooks.py
@@ -0,0 +1,69 @@
+from dataclasses import dataclass
+
+from sitemapparser import SiteMapParser
+
+from audiobooker.base import AudioBook, BookAuthor
+from audiobooker.scrappers import AudioBookSource
+from audiobooker.utils import get_soup, normalize_name
+
+
+@dataclass
+class SharedAudioBook:
+    url: str
+    image: str = ""
+
+    def parse_page(self):
+        soup = get_soup(self.url)
+        title = soup.find("h1", {"class": "entry-title"}).text
+        tags = [t for t in soup.find("ul", {"class": "post-categories"}).text.split("\n") if t.strip()]
+
+        img = soup.find_all("img")[-1]["src"]
+
+        authors = []
+
+        if "–" in title:
+            pts = title.split("–")
+            author_name = pts[0]
+            title = " ".join(pts[1:])
+
+            f, l = normalize_name(author_name)
+
+            authors = [BookAuthor(first_name=f, last_name=l)]
+
+        streams = [s.find("a").text for s in soup.find_all("audio")]
+
+        return AudioBook(
+            title=title.strip(),
+            streams=streams,
+            image=img,
+            tags=tags,
+            authors=authors,
+            language="en"
+        )
+
+
+class SharedAudioBooks(AudioBookSource):
+
+    @classmethod
+    def iterate_all(cls, limit=-1, offset=0):
+        sm = SiteMapParser('https://sharedaudiobooks.com/post-sitemap.xml')  # reads /sitemap.xml
+        urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
+        for url in urls:
+            url = str(url)
+            if url == "https://sharedaudiobooks.com/":
+                continue
+            yield SharedAudioBook(url=str(url)).parse_page()
+
+        for i in range(2, 10):
+            sm = SiteMapParser(f'https://sharedaudiobooks.com/post-sitemap{i}.xml')  # reads /sitemap.xml
+            urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
+            for url in urls:
+                yield SharedAudioBook(url=str(url)).parse_page()
+
+
+if __name__ == "__main__":
+    from pprint import pprint
+
+    scraper = SharedAudioBooks()
+    for book in scraper.iterate_all():
+        pprint(book)
diff --git a/audiobooker/scrappers/stephenkingaudiobooks.py b/audiobooker/scrappers/stephenkingaudiobooks.py
index a9b32a1..fbd8e40 100644
--- a/audiobooker/scrappers/stephenkingaudiobooks.py
+++ b/audiobooker/scrappers/stephenkingaudiobooks.py
@@ -1,146 +1,97 @@
-import requests
+from dataclasses import dataclass
+from typing import Iterable
 
-from audiobooker.base import AudioBook, BookAuthor
+from sitemapparser import SiteMapParser
+
+from audiobooker.exceptions import ParseErrorException
+from audiobooker.base import AudioBook, BookAuthor, AudiobookNarrator
 from audiobooker.scrappers import AudioBookSource
+from audiobooker.utils import get_soup, extractor_narrator, extract_year
 
 
-class StephenKingAudioBook(AudioBook):
-    base_url = "https://stephenkingaudiobooks.com/"
+@dataclass
+class StephenKingAudioBook:
+    url: str
 
     def parse_page(self):
-        author_name = "Stephen King"
-        title = self.soup.find("h1", {"class": "title-page"}).text
-        content = self.soup.find("div", {"class": "post-single clearfix"})
-        desc = content.find("p").text
-        if "–" in title:
-            pts = title.split("–")
-            author_name = pts[0]
-            title = " ".join(pts[1:]).strip().lstrip(",")
+        soup = get_soup(self.url)
+        tags = soup.find("span", {"class": "post-meta-category"})
+        title = soup.find("h1", {"class": "title-page"}).text.replace("\xa0", " ")
+        content = soup.find("div", {"class": "post-single clearfix"})
+        desc = content.find("p").text.replace("\xa0", " ")
 
         img = content.find("img")["src"]
-        names = author_name.strip().split(" ")
-        if len(names):
-            first_name = names[0].strip()
-            last_name = " ".join(names[1:]).strip()
-            if not last_name:
-                last_name = first_name
-                first_name = ""
+
+        if "Harry Potter" not in tags.text:
+            authors = [BookAuthor(first_name="Stephen", last_name="King")]
         else:
-            first_name = ""
-            last_name = author_name.strip()
+            authors = [BookAuthor(first_name="J.K.", last_name="Rowling")]
 
-        authors = [BookAuthor(first_name=first_name, last_name=last_name)]
+        if "Stephen Fry" in title and "Harry Potter" in tags.text:
+            narrator = AudiobookNarrator(first_name="Stephen",
+                                         last_name="Fry")
+        else:
+            narrator = (extractor_narrator(title) or
+                        extractor_narrator(desc))
 
         streams = [s.find("a").text for s in content.find_all("audio")]
 
-        return {"description": desc,
-                "authors": authors,
-                "title": title.strip(),
-                "streams": streams,
-                "rating": 0,
-                "tags": [],
-                "img": img}
-
-    def from_page(self):
-        data = self.parse_page()
-        if not self.title:
-            self.title = data["title"]
-        if not self._description:
-            self._description = data["description"]
-
-        self.img = data.get("img", self.img)
-        for tag in data["tags"]:
-            if tag.as_json not in self._tags:
-                self._tags.append(tag.as_json)
-        for author in data["authors"]:
-            if author.as_json not in self._authors:
-                self._authors.append(author.as_json)
-        self._stream_list = data["streams"]
-        self.raw.update(data)
-
-    def __repr__(self):
-        return "StephenKingAudioBook(" + str(
-            self) + ", " + self.book_id + ")"
+        if not streams:
+            raise ParseErrorException("No streams found")
+        return AudioBook(
+            title=title.replace(" Audiobook", ""),
+            streams=streams,
+            description=desc,
+            narrator=narrator,
+            image=img,
+            tags=[],
+            authors=authors,
+            year=extract_year(title) or
+                 extract_year(desc),
+            language="en"
+        )
 
 
 class StephenKingAudioBooks(AudioBookSource):
     base_url = "https://stephenkingaudiobooks.com"
-    _tags = ["Harry Potter", 'Stephen King']
-    _tag_pages = {
-        "Harry Potter": "https://stephenkingaudiobooks.com/category/harry-potter/",
-        'Stephen King': 'https://stephenkingaudiobooks.com/category/stephen-king/'}
-
     @classmethod
-    def _parse_page(cls, html, limit=-1):
-        soup = cls._get_soup(html)
+    def _parse_page(cls,url = "https://stephenkingaudiobooks.com", limit=-1, **params):
+        soup = get_soup(url, **params)
         for entry in soup.find_all("article"):
             try:
                 a = entry.find("a")
-                img = entry.find("img")["src"]
                 url = a["href"]
-                title = a["title"]
-                book = StephenKingAudioBook(title=title, url=url, img=img)
-                book.from_page()  # parse url
-                yield book
+                yield StephenKingAudioBook(url=url).parse_page()
             except:
                 continue
         if limit == -1 or limit > 0:
             limit -= 1
             next_page = soup.find("div", {"class": "nav-previous"})
             if next_page:
-                html = requests.get(next_page.find("a")["href"]).text
-                for ntry in cls._parse_page(html, limit=limit):
+                url = next_page.find("a")["href"]
+                for ntry in cls._parse_page(url=url, limit=limit, **params):
                     yield ntry
 
-    @classmethod
-    def scrap_by_tag(cls, tag, limit=-1, offset=0):
-        for book in cls.search_audiobooks(tag=tag):
-            yield book
-
-    @classmethod
-    def scrap_popular(cls, limit=-1, offset=0):
-        html = requests.get(cls.base_url).text
-        return cls._parse_page(html)
-
-    @classmethod
-    def search_audiobooks(cls, since=None, author=None, title=None, tag=None,
-                          limit=25):
-        """
-        Args:
-            since: a UNIX timestamp; returns all projects cataloged since that time
-            author: all records by that author last name
-            title: all matching titles
-            tag: all projects of the matching tag
-        Yields:
-            AudioBook objects
-        """
-        query = ""
-        if title:
-            query += title + " "
-        if tag:
-            query += tag + " "
-        if author:
-            query += author + " "
-        html = requests.get(cls.base_url, params={"s": query}).text
-        return cls._parse_page(html)
-
-    @classmethod
-    def get_audiobook(cls, book_id):
-        url = cls.base_url + '/' + book_id
-        book = StephenKingAudioBook(url=url)
-        return book
+    def search(self, query):
+        return self._parse_page(params={"s": query})
 
-    @classmethod
-    def scrap_all_audiobooks(cls, limit=-1, offset=0):
-        return cls.scrap_popular()
 
+    def iterate_all(self):
+        sm = SiteMapParser('https://stephenkingaudiobook.net/wp-sitemap-posts-post-1.xml')  # reads /sitemap.xml
+        urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
+        for url in urls:
+            try:
+                yield StephenKingAudioBook(url=str(url)).parse_page()
+            except:
+                pass
 
 if __name__ == "__main__":
     from pprint import pprint
 
-    # for book in StephenKingAudioBooks.search_audiobooks(title="Dark Tower"):
-    #     pprint(book.as_json)
-
     scraper = StephenKingAudioBooks()
-    for book in scraper.scrap_popular():
-        pprint(book.as_json)
+    for book in scraper.search("Dark Tower"):
+        pprint(book)
+
+    exit()
+    for book in scraper.iterate_all():
+        pprint(book)
diff --git a/audiobooker/scrappers/storynory.py b/audiobooker/scrappers/storynory.py
index f065ada..97eecfc 100644
--- a/audiobooker/scrappers/storynory.py
+++ b/audiobooker/scrappers/storynory.py
@@ -1,82 +1,71 @@
+from dataclasses import dataclass
+
 import requests
+from sitemapparser import SiteMapParser
 
 from audiobooker.base import AudioBook
+from audiobooker.exceptions import ParseErrorException
 from audiobooker.scrappers import AudioBookSource
+from audiobooker.utils import get_soup, extractor_narrator
 
 
-class StoryNoryAudioBook(AudioBook):
-    base_url = "https://www.storynory.com/"
+@dataclass
+class StoryNoryAudioBook:
+    url: str
+    image: str = ""
 
     def parse_page(self):
+        soup = get_soup(self.url)
         streams = []
-        for url in self.soup.find_all("a"):
-            if url["href"].endswith(".mp3"):
-                if url["href"] not in streams:
-                    streams.append(url["href"])
-
-        title = self.soup.find("title").text
-        img = self.soup.find("img")
-        if img.get("data-ezsrc"):
-            img = img["data-ezsrc"]
-        elif img.get("src"):
-            img = img["src"]
+        for url in soup.find_all("a"):
+            if not url.get("href"):
+                continue
+            if url.get("download") or url["href"].endswith(".mp3"):
+                url = url["href"]
+                if url.startswith("//"):
+                    url = "https:" + url
+                streams.append(url.strip())
+
+        title = soup.find("title").text.strip().replace(" - Storynory", "")
+        img = soup.find("img")
+        if img and img.get("src"):
+            img = img["src"].strip()
+            if img.startswith("//"):
+                img = "https:" + img
         else:
-            img = self.img
-        print(streams)
-        return {"title": title.strip(),
-                "streams": streams,
-                "img": img}
-
-    def from_page(self):
-        data = self.parse_page()
-        self.title = data["title"]
-        self.img = data.get("img", self.img)
-        self.raw.update(data)
-        self._stream_list = data["streams"]
-
-    def __repr__(self):
-        return "StoryNoryAudioBook(" + str(
-            self) + ", " + self.book_id + ")"
+            img = self.image
+        if not streams:
+            raise ParseErrorException("No streams found")
+        for d in soup.find_all("p"):
+            if d.text.lower().startswith("download"):
+                continue
+            desc = d.text.split("\n")[0][:100]
+            break
+        else:
+            desc = ""
+        return AudioBook(
+            title=title.strip(),
+            description=desc,
+            streams=streams,
+            narrator=extractor_narrator(desc),
+            image=img,
+            language="en"
+        )
 
 
 class StoryNory(AudioBookSource):
-    # TODO categories / tags
-    base_url = "https://www.storynory.com"
 
     @classmethod
-    def _parse_page(cls, html, limit=-1):
-        soup = cls._get_soup(html)
-        for entry in soup.find_all("div", {"class": "bf-item"}):
-            try:
-                a = entry.find("a")
-                img = entry.find("img")
-                book = StoryNoryAudioBook(from_data={
-                    "title": entry.text,
-                    "url": a["href"],
-                    "img": img["src"]
-                })
-                book.from_page()  # parse url
-                yield book
-            except:
-                continue
+    def _parse_search_page(cls, url="https://www.storynory.com",
+                           limit=-1, **params):
+        soup = get_soup(url, **params)
 
-    @classmethod
-    def _parse_search_page(cls, html, limit=-1):
-        soup = cls._get_soup(html)
         for entry in soup.find_all("div", {"class": "panel-body"}):
             try:
                 a = entry.find("a")
                 img = entry.find("img")
-                book = StoryNoryAudioBook(from_data={
-                    "title": a.text,
-                    "description": entry.find("p").text,
-                    "url": a["href"],
-                    "img": img["src"] if img else ""
-                })
-                print(book)
-                book.from_page()  # parse url
-                print(book)
-                yield book
+                yield StoryNoryAudioBook(url=a["href"],
+                                         image= img["src"] if img else "").parse_page()
             except:
                 continue
 
@@ -84,62 +73,29 @@ def _parse_search_page(cls, html, limit=-1):
             limit -= 1
             next_page = soup.find("li", {"class": "bpn-next-link"})
             if next_page:
-                html = requests.get(next_page.find("a")["href"]).text
-                for ntry in cls._parse_search_page(html, limit=limit):
+                url = next_page.find("a")["href"]
+                for ntry in cls._parse_search_page(url=url, limit=limit, **params):
                     yield ntry
 
-    @classmethod
-    def scrap_popular(cls, limit=-1, offset=0):
-        html = requests.get(cls.base_url).text
-        soup = cls._get_soup(html)
-        for a in soup.find_all("a"):
-            url = a["href"]
-            if not url.startswith("https://www.storynory.com/"):
-                continue
-            img = a.find("img")
-            if not img:
-                continue
-            p = a.find("p")
-            desc = ""
-            if p:
-                desc = p.text
-            try:
-                book = StoryNoryAudioBook(description=desc,
-                                          url=url,
-                                          title=img["alt"],
-                                          img=img["data-ezsrc"])
-            except:
-                book = StoryNoryAudioBook(description=desc,
-                                          url=url,
-                                          img=img["src"])
-            book.from_page()  # parse book url for streams
-            yield book
-
-    @classmethod
-    def search_audiobooks(cls, since=None, author=None, title=None, tag=None,
-                          limit=25):
-        query = ""
-        if title:
-            query += title + " "
-        if tag:
-            query += tag + " "
-        if author:
-            query += author + " "
-        html = requests.get(cls.base_url, params={"s": query}).text
-        return cls._parse_search_page(html)
-
-    @classmethod
-    def get_audiobook(cls, book_id):
-        url = cls.base_url + '/' + book_id
-        book = StoryNoryAudioBook(url=url)
-        return book
+    def search(self, query):
+        return self._parse_search_page(params={"s": query})
 
-    @classmethod
-    def scrap_all_audiobooks(cls, limit=-1, offset=0):
-        return cls.scrap_popular()
+    def iterate_all(self):
+        for u in [
+            'https://www.storynory.com/post-sitemap1.xml',
+            'https://www.storynory.com/post-sitemap2.xml'
+        ]:
+            sm = SiteMapParser(u)  # reads /sitemap.xml
+            urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
+            for url in urls:
+                try:
+                    yield StoryNoryAudioBook(url=str(url)).parse_page()
+                except ParseErrorException:
+                    # not a book, just a blog post
+                    continue
 
 
 if __name__ == "__main__":
     scraper = StoryNory()
-    for book in scraper.scrap_popular():
-        print(book.as_json)
+    for book in scraper.search("snow white"):
+        print(book)
diff --git a/audiobooker/scrappers/thoughtaudio.py b/audiobooker/scrappers/thoughtaudio.py
index 74361a0..77c449f 100644
--- a/audiobooker/scrappers/thoughtaudio.py
+++ b/audiobooker/scrappers/thoughtaudio.py
@@ -1,123 +1,85 @@
-import requests
+from dataclasses import dataclass
+
 from sitemapparser import SiteMapParser
 
-from audiobooker.base import AudioBook
+from audiobooker.base import AudioBook, BookAuthor, AudiobookNarrator
 from audiobooker.scrappers import AudioBookSource
+from audiobooker.utils import get_soup, extract_year, normalize_name
 
 
-class ThoughtAudioAudioBook(AudioBook):
-    base_url = "http://thoughtaudio.com/"
+@dataclass
+class ThoughtAudioAudioBook:
+    url: str
 
     def parse_page(self):
+        soup = get_soup(self.url)
         streams = []
-        for url in self.soup.find_all("a"):
+        img = None
+        for url in soup.find_all("a"):
             if url["href"].endswith(".mp3"):
                 streams.append(url["href"])
-        for url in self.soup.find_all("iframe"):
+        for url in soup.find_all("iframe"):
             if "youtube" not in url["src"]:
                 continue
+            vid = url["src"].split("/")[-1].split("?")[0]
+            img = f"https://img.youtube.com/vi/{vid}/0.jpg"
             streams.append(
                 url["src"].split("?feature=oembed")[0].
                 replace("https://www.youtube.com/embed/", "https://www.youtube.com/watch?v=")
             )
-        title = self.soup.find("title").text
-        img = self.img
-
-        return {"title": title.strip(),
-                "streams": streams,
-                "img": img}
-
-    def from_page(self):
-        data = self.parse_page()
-        self.title = data["title"]
-        self.img = data.get("img", self.img)
-        self._stream_list = data["streams"]
-        self.raw.update(data)
-
-    def __repr__(self):
-        return "ThoughtAudioAudioBook(" + str(
-            self) + ", " + self.book_id + ")"
+        title = soup.find("title").text.split(" – ThoughtAudio")[0].split(": ")[-1]
+
+        if not title:
+            title = soup.find("span", {"class": "Text-Head"}).text
+
+        narrator = None
+        author = None
+        desc = ""
+        for s in soup.find_all("p"):
+            if "WRITTEN BY:" in s.text:
+                name = s.text.split("WRITTEN BY:")[-1]
+                f, l = normalize_name(name)
+                author = BookAuthor(first_name=f, last_name=l)
+
+            elif "NARRATED BY:" in s.text:
+                name = s.text.split("NARRATED BY:")[-1]
+                f, l = normalize_name(name)
+                narrator = AudiobookNarrator(first_name=f, last_name=l)
+            elif s.text.strip() and narrator and author:
+                desc = s.text.split("\n")[0]
+                break
+        if not img:
+            pics = soup.find_all("img")
+            if len(pics) > 1:
+                img = pics[1]
+            else:
+                img = pics[0]
+        return AudioBook(
+            title=title.strip(),
+            streams=streams,
+            image=img or "",
+            description=desc,
+            narrator=narrator,
+            year=extract_year(desc),
+            authors=[author] if author else [],
+            tags=["ThoughtAudio"],
+            language="en"
+        )
 
 
 class ThoughtAudio(AudioBookSource):
-    base_url = "http://thoughtaudio.com"
-    _tags = ["Philosophy"]
-    _tag_pages = {"Philosophy": 'http://thoughtaudio.com'}
-
-    @classmethod
-    def _parse_page(cls, html, limit=-1):
-        soup = cls._get_soup(html)
-        for entry in soup.find_all("div", {"class": "bf-item"}):
-            try:
-                a = entry.find("a")
-                img = entry.find("img")
-                book = ThoughtAudioAudioBook(from_data={
-                    "title": entry.text,
-                    "url": a["href"],
-                    "img": img["src"]
-                })
-                book.from_page()  # parse url
-                yield book
-            except:
-                continue
 
-    @classmethod
-    def _parse_search_page(cls, html, limit=-1):
-        soup = cls._get_soup(html)
-        for entry in soup.find_all("article"):
-            try:
-                a = entry.find("a")
-                img = entry.find("img")
-                book = ThoughtAudioAudioBook(from_data={
-                    "title": a.text,
-                    "url": a["href"],
-                    "img": img["src"]
-                })
-                book.from_page()  # parse url
-                yield book
-            except:
-                continue
-
-    @classmethod
-    def scrap_popular(cls, limit=-1, offset=0):
-        html = requests.get(cls.base_url).text
-        return cls._parse_page(html)
-
-    @classmethod
-    def search_audiobooks(cls, since=None, author=None, title=None, tag=None,
-                          limit=25):
-        query = ""
-        if title:
-            query += title + " "
-        if tag:
-            query += tag + " "
-        if author:
-            query += author + " "
-        html = requests.get(cls.base_url, params={"s": query}).text
-        return cls._parse_search_page(html)
-
-    @classmethod
-    def get_audiobook(cls, book_id):
-        url = cls.base_url + '/' + book_id
-        book = ThoughtAudioAudioBook(url=url)
-        return book
-
-    @classmethod
-    def scrap_all_audiobooks(cls, limit=-1, offset=0):
+    def iterate_all(self):
         sm = SiteMapParser('http://thoughtaudio.com/wp-sitemap-posts-post-1.xml')  # reads /sitemap.xml
         urls = sm.get_urls()  # returns iterator of sitemapper.Url instances
         for url in urls:
             url = str(url)
-            title = url.strip("/").split("/")[-1].replace("-", " ").title()
-            yield ThoughtAudioAudioBook(url=url, title=title)
+            yield ThoughtAudioAudioBook(url=url).parse_page()
 
 
 if __name__ == "__main__":
     from pprint import pprint
 
     scraper = ThoughtAudio()
-    for book in scraper.search_audiobooks(title="machine"):
-        pprint(book.as_json)
-
-    for book in scraper.scrap_all_audiobooks():
-        pprint(book.as_json)
+    for book in scraper.iterate_all():
+        pprint(book)
diff --git a/audiobooker/utils.py b/audiobooker/utils.py
index 6405502..56179a8 100644
--- a/audiobooker/utils.py
+++ b/audiobooker/utils.py
@@ -1,4 +1,7 @@
 import random
+import re
+
+from bs4 import BeautifulSoup
 
 USER_AGENTS = [
     ('Mozilla/5.0 (X11; Linux x86_64) '
@@ -32,3 +35,58 @@
 
 def random_user_agent():
     return random.choice(USER_AGENTS)
+
+
+def get_html(url, **kwargs):
+    from audiobooker.scrappers import AudioBookSource
+    try:
+        return AudioBookSource.session.get(url, **kwargs).text
+    except Exception as e:
+        try:
+            return AudioBookSource.session.get(url, verify=False, **kwargs).text
+        except:
+            return None
+
+
+def get_soup(url, **kwargs):
+    html = get_html(url, **kwargs)
+    if html:
+        return BeautifulSoup(html, "html.parser")
+
+
+def extract_year(title: str) -> int:
+    match = re.search(r'\b\d{4}\b', title)
+    if match:
+        return int(match.group())
+    return 0
+
+
+def extractor_narrator(title):
+    from audiobooker.base import AudiobookNarrator
+    narrator = None
+    title = title.replace("\xa0", " ").strip()
+    matches = re.findall(r'\b(?:read by|audiobook by|narrated by)\b\s*(.*?)(?:\s*–|$)', title, flags=re.IGNORECASE)
+
+    if matches:
+        narrator_str = matches[0].strip()  # Consider only the first "read by" occurrence
+        # Split the narrator's name using a regex pattern
+        names = re.findall(r'(?:[A-Z]\.)+|\S+', narrator_str)
+        # Ensure we only take up to two words for the narrator's name
+        names = names[:2]
+        if len(names) > 0:
+            first_name = names[0].strip()
+            last_name = " ".join(names[1:]).strip() if len(names) > 1 else ""
+            if last_name and first_name[0].isupper() and not last_name[0].isupper():
+                last_name = ""  # not part of the name
+            narrator = AudiobookNarrator(first_name=first_name.title(),
+                                         last_name=last_name.title())
+    return narrator
+
+
+def normalize_name(name):
+    """convert a name string to first and last name"""
+    name = name.replace("(", "").replace(")", "").title().strip()
+    if " " in name:
+        return name.split(" ", 1)
+    else:
+        return name, ""
diff --git a/examples/search_librivox.py b/examples/search_librivox.py
index 4fe3274..281d3d2 100644
--- a/examples/search_librivox.py
+++ b/examples/search_librivox.py
@@ -17,7 +17,7 @@
 pprint(book.url)
 pprint(book.streams)
 pprint(book.runtime)
-pprint(book.rss_data)
+#pprint(book.rss_data)
 #book.play()
 a = ", ".join([au.first_name + au.last_name for au in book.authors])
 pprint(a)
diff --git a/setup.py b/setup.py
index bbddb4a..a0a12b5 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='audiobooker',
-    version='0.3.1',
+    version='0.5.0',
     packages=['audiobooker', 'audiobooker.scrappers'],
     install_requires=["requests", "bs4", "feedparser", "rapidfuzz",
                       "requests-cache", "site-map-parser"],
diff --git a/test/test_utils.py b/test/test_utils.py
new file mode 100644
index 0000000..692ec70
--- /dev/null
+++ b/test/test_utils.py
@@ -0,0 +1,75 @@
+import unittest
+
+from audiobooker.utils import extract_year
+from audiobooker.utils import extractor_narrator
+
+
+class TestExtractorNarrator(unittest.TestCase):
+    def test_single_read_by(self):
+        title1 = "1996 Stephen King – The Regulators Audiobook read by Frank Muller"
+        narrator1 = extractor_narrator(title1)
+        self.assertEqual(narrator1.first_name, "Frank")
+        self.assertEqual(narrator1.last_name, "Muller")
+
+        title2 = "The Shining Audiobook read by Campbell Scott"
+        narrator2 = extractor_narrator(title2)
+        self.assertEqual(narrator2.first_name, "Campbell")
+        self.assertEqual(narrator2.last_name, "Scott")
+
+        title3 = "Alice In Wonderland read by Natasha now has its own podcast"
+        narrator3 = extractor_narrator(title3)
+        self.assertEqual(narrator3.first_name, "Natasha")
+        self.assertEqual(narrator3.last_name, "")
+
+    def test_audiobook_by(self):
+        title1 = "Harry Potter and the Chamber of Secrets Audiobook by Jim Dale"
+        narrator1 = extractor_narrator(title1)
+        self.assertEqual(narrator1.first_name, "Jim")
+        self.assertEqual(narrator1.last_name, "Dale")
+
+        title2 = "Pride and Prejudice Audiobook by Jane Austen"
+        narrator2 = extractor_narrator(title2)
+        self.assertEqual(narrator2.first_name, "Jane")
+        self.assertEqual(narrator2.last_name, "Austen")
+
+    def test_narrated_by(self):
+        title1 = "The shadow over innsmouth by H.P. Lovecraft, narrated by Wayne June"
+        narrator1 = extractor_narrator(title1)
+        self.assertEqual(narrator1.first_name, "Wayne")
+        self.assertEqual(narrator1.last_name, "June")
+
+        title2 = "The Catcher in the Rye by J.D. Salinger, narrated by Matt Damon"
+        narrator2 = extractor_narrator(title2)
+        self.assertEqual(narrator2.first_name, "Matt")
+        self.assertEqual(narrator2.last_name, "Damon")
+
+    def test_no_narrator(self):
+        title = "The Great Gatsby"
+        narrator = extractor_narrator(title)
+        self.assertIsNone(narrator)
+
+
+class TestExtractYear(unittest.TestCase):
+    def test_year_present(self):
+        title1 = "1996 Stephen King – The Regulators Audiobook read by Frank Muller"
+        self.assertEqual(extract_year(title1), 1996)
+
+        title2 = "Harry Potter and the Chamber of Secrets (1998) Audiobook by Jim Dale"
+        self.assertEqual(extract_year(title2), 1998)
+
+    def test_no_year(self):
+        title = "The Great Gatsby"
+        self.assertEqual(extract_year(title), 0)
+
+    def test_multiple_years(self):
+        title = "The Odyssey (2001) and Moby Dick (1954) Audiobook by Some Narrator"
+        # Only the first year should be extracted
+        self.assertEqual(extract_year(title), 2001)
+
+    def test_year_in_sentence(self):
+        title = "This is a sentence with the year 2022 in it."
+        self.assertEqual(extract_year(title), 2022)
+
+
+if __name__ == '__main__':
+    unittest.main()