diff --git a/CHANGELOG.md b/CHANGELOG.md
index 67eb58f..5a6f82c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,10 +9,16 @@
## Latest changes
+### Feature
+* Add `as_json` keyword parameter to `convert`
### Internal
+* Moves `classes.typeclass` functions to `transformer.py` and renames the `convert` function to `transform`. This is so that we can have a new `convert` function in our `__init__.py` with the new parameter and this feature will not introduce a breaking change.
* Fix CI after main branch change
## 2.0.0
No new features in this release, but we now support python 3.9 and 3.10
diff --git a/setup.cfg b/setup.cfg
index 81afad0..16e905e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -88,3 +88,9 @@ warn_unused_ignores = True
warn_redundant_casts = True
warn_unused_configs = True
warn_unreachable = True
+exclude_lines =
+ pragma: no cover
diff --git a/soup2dict/__init__.py b/soup2dict/__init__.py
index cdc8b84..3e97d0d 100644
--- a/soup2dict/__init__.py
+++ b/soup2dict/__init__.py
@@ -1,95 +1,44 @@
-from typing import Any, Dict, Optional, Union
+import json
+from typing import Union, overload
-from bs4 import BeautifulSoup, element
-from classes import typeclass
+from bs4 import BeautifulSoup
+from typing_extensions import Literal
+from soup2dict.transformer import transform
-def _get_key_name(
- instance: Any,
- instance_type = type(instance)
- if instance_type == element.Tag:
- return instance.name
- return str(instance_type.__name__).lower()
-def _attribute_at_prepender(
- instance: dict,
+def convert(
+ soup: BeautifulSoup,
+ *,
+ as_json: Literal[False],
) -> dict:
- prefixed = {}
- for key, attribute_value in instance.items():
- prefixed['@{key}'.format(key=key)] = attribute_value
- return prefixed
-def convert(instance) -> dict:
- """Convert beautifulsoup to dict. This is a typeclass definition."""
+ """Return dict when as_json is false."""
-def _convert_bs(instance: BeautifulSoup) -> dict:
- """Handle The Soup."""
- return convert(instance.contents)
+def convert(
+ soup: BeautifulSoup,
+ *,
+ as_json: Literal[True],
+) -> str:
+ """Return json string when as_json is True."""
-def _convert_rs(instance: Union[element.ResultSet, list]) -> dict:
- """Handle list and ResultSet types."""
- transformed: Dict[str, Any] = {}
- for soup_element in instance:
- parsed = convert(soup_element)
- if not parsed:
- continue
- key_name = _get_key_name(soup_element)
- dict_value = transformed.get(key_name, [])
- dict_value.append(parsed)
- transformed[key_name] = dict_value
- return transformed
-@convert.instance(element.NavigableString) # type: ignore
-def _convert_ns(
- instance: Union[
- element.NavigableString,
- element.Comment,
- element.CData,
- element.ProcessingInstruction,
- element.XMLProcessingInstruction,
- element.Declaration,
- element.Doctype,
- element.Stylesheet,
- element.Script,
- element.TemplateString,
- ],
-) -> Optional[str]:
- """Handle NavigableString type."""
- return str(instance).strip().strip('\n') or None
+def convert(
+ soup: BeautifulSoup,
+) -> dict:
+ """When as_json is not specified return value is a dict."""
-def _convert_tag(instance: element.Tag) -> dict:
- """Handle Tag type."""
- tag_result = _attribute_at_prepender(instance.attrs)
- tag_result['#text'] = ' '.join(
- [text.replace('\n ', ' ') for text in instance.stripped_strings],
- )
- tag_result.update(convert(instance.contents))
+def convert(
+ soup: BeautifulSoup,
+ *,
+ as_json: bool = False,
+) -> Union[dict, str]:
+ """Run soup to dict transformer and dumps to json if as_json is True."""
+ if as_json:
+ return json.dumps(transform(soup))
- return tag_result
+ return transform(soup)
diff --git a/soup2dict/transformer.py b/soup2dict/transformer.py
new file mode 100644
index 0000000..efdabb5
--- /dev/null
+++ b/soup2dict/transformer.py
@@ -0,0 +1,95 @@
+from typing import Any, Dict, Optional, Union
+from bs4 import BeautifulSoup, element
+from classes import typeclass
+def _get_key_name(
+ instance: Any,
+ instance_type = type(instance)
+ if instance_type == element.Tag:
+ return instance.name
+ return str(instance_type.__name__).lower()
+def _attribute_at_prepender(
+ instance: dict,
+) -> dict:
+ prefixed = {}
+ for key, attribute_value in instance.items():
+ prefixed['@{key}'.format(key=key)] = attribute_value
+ return prefixed
+def transform(instance) -> dict:
+ """Convert beautifulsoup to dict. This is a typeclass definition."""
+@transform.instance(BeautifulSoup) # type: ignore
+def _transform_bs(instance: BeautifulSoup) -> dict:
+ """Handle The Soup."""
+ return transform(instance.contents)
+@transform.instance(element.ResultSet) # type: ignore
+def _transform_rs(instance: Union[element.ResultSet, list]) -> dict:
+ """Handle list and ResultSet types."""
+ transformed: Dict[str, Any] = {}
+ for soup_element in instance:
+ parsed = transform(soup_element)
+ if not parsed:
+ continue
+ key_name = _get_key_name(soup_element)
+ dict_value = transformed.get(key_name, [])
+ dict_value.append(parsed)
+ transformed[key_name] = dict_value
+ return transformed
+@transform.instance(element.NavigableString) # type: ignore
+def _transform_ns(
+ instance: Union[
+ element.NavigableString,
+ element.Comment,
+ element.CData,
+ element.ProcessingInstruction,
+ element.XMLProcessingInstruction,
+ element.Declaration,
+ element.Doctype,
+ element.Stylesheet,
+ element.Script,
+ element.TemplateString,
+ ],
+) -> Optional[str]:
+ """Handle NavigableString type."""
+ return str(instance).strip().strip('\n') or None
+@transform.instance(element.Tag) # type: ignore
+def _transform_tag(instance: element.Tag) -> dict:
+ """Handle Tag type."""
+ tag_result = _attribute_at_prepender(instance.attrs)
+ tag_result['#text'] = ' '.join(
+ [text.replace('\n ', ' ') for text in instance.stripped_strings],
+ )
+ tag_result.update(transform(instance.contents))
+ return tag_result
diff --git a/tests/test_attributes.py b/tests/test_attributes.py
index b68ffdb..b733721 100644
--- a/tests/test_attributes.py
+++ b/tests/test_attributes.py
@@ -8,7 +8,7 @@ def test_attributes_are_at_prepended():
html_doc = """
- soup = BeautifulSoup(html_doc, 'html.parser')
+ soup: BeautifulSoup = BeautifulSoup(html_doc, 'html.parser')
main_element = convert(soup)['main'][0]
assert main_element['@class'] == ['bob', 'arne']
assert main_element['@abc'] == 'test'
diff --git a/tests/test_is_dumped_to_json.py b/tests/test_is_dumped_to_json.py
new file mode 100644
index 0000000..143a8ec
--- /dev/null
+++ b/tests/test_is_dumped_to_json.py
@@ -0,0 +1,45 @@
+import json
+from bs4 import BeautifulSoup
+from soup2dict import convert
+html_doc = """
+ test
+ test
+ test
+expected_result = {
+ 'main': [
+ {'#text': ''},
+ {
+ '#text': 'test test test',
+ 'sub': [
+ {
+ '#text': 'test',
+ 'navigablestring': ['test'],
+ },
+ {
+ '#text': 'test',
+ 'navigablestring': ['test'],
+ },
+ {
+ '#text': 'test',
+ 'navigablestring': ['test'],
+ },
+ ],
+ },
+ ],
+def test_as_json_true_dumps_to_json():
+ """Convert result should be json when as_json is true."""
+ soup = BeautifulSoup(html_doc, 'html.parser')
+ assert convert(soup, as_json=True) == json.dumps(expected_result)