-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #47 from szegedai/duration_parser
Duration parser
- Loading branch information
Showing
6 changed files
with
166 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
from hun_date_parser.date_textualizer.datetime_textualizer import DatetimeTextualizer, datetime2text | ||
from hun_date_parser.date_parser.datetime_extractor import DatetimeExtractor, text2datetime, text2date, text2time | ||
from hun_date_parser.duration_parser.duration_parsers import parse_duration | ||
|
||
__all__ = ["DatetimeTextualizer", "DatetimeExtractor", "datetime2text", "text2datetime", "text2date", "text2time"] | ||
__all__ = ["DatetimeTextualizer", "DatetimeExtractor", "datetime2text", "text2datetime", "text2date", "text2time", | ||
"parse_duration"] | ||
|
||
__version__ = "0.2.8" | ||
__version__ = "0.2.9" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from typing import TypedDict, Optional, Sequence | ||
import re | ||
from hun_date_parser.utils import DateTimePartConatiner, remove_accent, word_to_num, Minute | ||
from hun_date_parser.date_parser.patterns import R_HOUR_MIN_D, R_HOUR_HOUR_D, R_HOUR_D | ||
|
||
|
||
class DateParts(TypedDict): | ||
match: str | ||
date_parts: Sequence[DateTimePartConatiner] | ||
|
||
|
||
def convert_hour_to_minutes(hour_str: Optional[str]) -> float: | ||
"""Converts an hour string to minutes, handling special cases.""" | ||
if hour_str is None: | ||
return 0 | ||
if ",5" in hour_str: | ||
hour_num = word_to_num(hour_str.replace(",5", "")) | ||
if hour_num != -1: | ||
return (hour_num + 0.5) * 60 | ||
else: | ||
hour_num = word_to_num(hour_str) | ||
if hour_num != -1: | ||
return hour_num * 60 | ||
return 0 | ||
|
||
|
||
def convert_quarter_hour(hour_str: Optional[str]) -> int: | ||
"""Converts a quarter hour string to minutes.""" | ||
if hour_str is None: | ||
return 0 | ||
hour_str_no_accent = remove_accent(hour_str) | ||
if "haromnegyed" in hour_str_no_accent: | ||
return 45 | ||
if "negyed" in hour_str: | ||
return 15 | ||
if "fel" in hour_str_no_accent: | ||
return 30 | ||
return 0 | ||
|
||
|
||
def duration_parser(s: str) -> DateParts: | ||
match = re.match(R_HOUR_MIN_D, s) | ||
if match: | ||
hour_w, min_w = match.groups() | ||
mins_1 = convert_hour_to_minutes(hour_w) | ||
mins_2 = word_to_num(min_w) | ||
res_mins = mins_1 + mins_2 | ||
else: | ||
match = re.match(R_HOUR_D, s) | ||
if match: | ||
hour_w = match.groups()[0] | ||
res_mins = convert_hour_to_minutes(hour_w) | ||
else: | ||
match = re.match(R_HOUR_HOUR_D, s) | ||
if match: | ||
hour_w, hour_w_2 = match.groups() | ||
mins_1 = convert_hour_to_minutes(hour_w) | ||
mins_2 = convert_quarter_hour(hour_w_2) | ||
res_mins = mins_1 + mins_2 | ||
else: | ||
res_mins = 0 | ||
|
||
res_date_parts = [Minute(res_mins, "duration_parser")] if res_mins > 0 else [] | ||
|
||
return { | ||
"match": s, | ||
"date_parts": res_date_parts | ||
} | ||
|
||
|
||
def parse_duration(s: str) -> Optional[int]: | ||
""" | ||
Returns the duration in minutes found in the input string. | ||
:param s: Input string containing the duration information. | ||
:return: The duration in minutes as an integer, or None if no valid duration is found. | ||
""" | ||
results = duration_parser(s) | ||
return results["date_parts"][0].value if results["date_parts"] else None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import pytest | ||
from datetime import datetime | ||
|
||
from hun_date_parser.duration_parser.duration_parsers import duration_parser | ||
from hun_date_parser.date_parser.date_parsers import Minute | ||
|
||
|
||
tf_durations = [ | ||
("45 perc", [Minute(45, "duration_parser")]), | ||
("45 percre ", [Minute(45, "duration_parser")]), | ||
("50 perces", [Minute(50, "duration_parser")]), | ||
("120 perc", [Minute(120, "duration_parser")]), | ||
("negyed óráig", [Minute(15, "duration_parser")]), | ||
(" háromnegyed óra", [Minute(45, "duration_parser")]), | ||
("25 perc", [Minute(25, "duration_parser")]), | ||
("26 perc", [Minute(26, "duration_parser")]), | ||
("1 óra 25 perc", [Minute(85, "duration_parser")]), | ||
("1,5 óráig", [Minute(90, "duration_parser")]), | ||
("2,5 óráig", [Minute(150, "duration_parser")]), | ||
("1 óráig", [Minute(60, "duration_parser")]), | ||
("1 és negyed óráig", [Minute(75, "duration_parser")]), | ||
("1 és negyedóráig", [Minute(75, "duration_parser")]), | ||
("egy és fél óráig", [Minute(90, "duration_parser")]), | ||
("félóráig", [Minute(30, "duration_parser")]), | ||
("eddig 3 és negyed óráig", [Minute(195, "duration_parser")]), | ||
("eddig 3 es negyed oraig", [Minute(195, "duration_parser")]), | ||
("eddig: 1,5 óráig", [Minute(90, "duration_parser")]), | ||
("eddig: 45 percre", [Minute(45, "duration_parser")]), | ||
("egy óra 10 percre", [Minute(70, "duration_parser")]), | ||
(": egy óra 10 percre :", [Minute(70, "duration_parser")]), | ||
|
||
("16 percre", [Minute(16, "duration_parser")]), | ||
("99 percre", [Minute(99, "duration_parser")]), | ||
("999 percre", [Minute(999, "duration_parser")]), | ||
("2 óra 16 percre", [Minute(136, "duration_parser")]), | ||
|
||
("3 órára", [Minute(180, "duration_parser")]), | ||
("4 órára", [Minute(240, "duration_parser")]), | ||
("5 órára", [Minute(300, "duration_parser")]), | ||
("6 órára", [Minute(360, "duration_parser")]), | ||
("10 órára", [Minute(600, "duration_parser")]), | ||
|
||
("16", []), | ||
("100 órára", []), | ||
("", []), | ||
(" fél", []), | ||
(" jövő kedd", []), | ||
("délig", []), | ||
] | ||
|
||
@pytest.mark.parametrize("inp, exp", tf_durations) | ||
def test_named_month(inp, exp): | ||
duration_dct = duration_parser(s=inp) | ||
|
||
assert duration_dct["date_parts"] == exp |