diff --git a/city_scrapers/spiders/il_labor.py b/city_scrapers/spiders/il_labor.py deleted file mode 100644 index 56006277e..000000000 --- a/city_scrapers/spiders/il_labor.py +++ /dev/null @@ -1,114 +0,0 @@ -import re -from collections import defaultdict -from datetime import datetime -from itertools import zip_longest - -from city_scrapers_core.constants import BOARD -from city_scrapers_core.items import Meeting -from city_scrapers_core.spiders import CityScrapersSpider - - -def grouper(n, iterable, fillvalue=None): - """From itertools recipes""" - args = [iter(iterable)] * n - return zip_longest(fillvalue=fillvalue, *args) - - -class IlLaborSpider(CityScrapersSpider): - name = "il_labor" - agency = "Illinois Labor Relations Board" - start_urls = ["https://www2.illinois.gov/ilrb/meetings/Pages/default.aspx"] - event_timezone = "America/Chicago" - location = { - "name": "Room S-401", - "address": "160 N LaSalle St, Chicago, IL 60602", - } - - def parse(self, response): - agenda_map = self._parse_links(response) - content_lines = response.css(".soi-article-content * *::text").extract() - clean_lines = [ - re.sub(r"\s+", " ", l).strip() for l in content_lines if l.strip() - ] - content = "\n".join(clean_lines) - meeting_split = re.split(r"^([A-Z ]+)$", content, flags=re.M) - if not re.match(r"^[A-Z ]+$", meeting_split[0]): - meeting_split = meeting_split[1:] - - for title_str, item in grouper(2, meeting_split, fillvalue=""): - title = self._parse_title(title_str) - start = self._parse_start(item) - if not start: - continue - # Links can be reused, so add query string to URL - links = [] - for link in agenda_map[title]: - link["href"] += f"?dt={start.strftime('%Y-%m-%d')}" - links.append(link) - meeting = Meeting( - title=title, - description="", - classification=BOARD, - start=start, - end=None, - time_notes="", - all_day=False, - location=self._parse_location(item), - links=links, - source=response.url, - ) - meeting["status"] = self._get_status( - meeting, text=" ".join([title_str, item]) - ) - meeting["id"] = self._get_id(meeting) - yield meeting - - def _parse_title(self, title_str): - return re.sub(r" meeting$", "", title_str.strip(), flags=re.I).strip().title() - - def _parse_start(self, item): - """Parse start date and time from item text""" - date_line = " ".join(item.split("\n")[1:]) - date_match = re.search(r"[A-Z][a-z]{2,8} \d{1,2}", date_line) - if not date_match: - return - year_str = str(datetime.now().year) - year_match = re.search(r"20\d{2}", date_line) - if year_match: - year_str = year_match.group() - - date_str = " ".join([date_match.group().replace(",", ""), year_str]) - time_match = re.search(r"\d{1,2}:\d{2} ?[apm\.]{2,4}", item, flags=re.I) - time_str = "12:00am" - if time_match: - time_str = re.sub(r"[ \.]", "", time_match.group()) - try: - return datetime.strptime(" ".join([date_str, time_str]), "%B %d %Y %I:%M%p") - except ValueError: - return - - def _parse_location(self, item): - addr_matches = re.findall(r"^\d+ .*$", item, flags=re.M | re.DOTALL) - if "dialing" in item: - return {"name": "", "address": ""} - if "160 N" in item or len(addr_matches) == 0: - return self.location - chi_addrs = [a for a in addr_matches if "Chicago" in a] - if len(chi_addrs) == 0: - addr_str = addr_matches[0] - else: - addr_str = chi_addrs[0] - return { - "name": "", - "address": addr_str.replace(" and", "").strip(), - } - - def _parse_links(self, response): - agenda_map = defaultdict(list) - for link in response.css(".soi-article-content a"): - link_title = " ".join(link.css("*::text").extract()).strip() - meeting_title = self._parse_title(link_title) - agenda_map[meeting_title].append( - {"title": "Agenda", "href": response.urljoin(link.attrib["href"])} - ) - return agenda_map diff --git a/tests/files/il_labor.html b/tests/files/il_labor.html deleted file mode 100644 index 2d2f0d65c..000000000 --- a/tests/files/il_labor.html +++ /dev/null @@ -1,548 +0,0 @@ - - - - - - - - Board Meetings - - Meetings - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - -
- -
- - - -
-
- -
- - -
-
- - - - -
-
- - - -
- -
- - - - -
-
-

- Board Meetings -

-
-
- - -
- - -
- -
- - -
- - -
-
-
- - - -
-
-
- -
-
-
-

The State and Local Panel's of the Illinois Labor Relations Board meet separately on a monthly basis to discuss issues and cases pending before the Panels. Meetings are open to the public and are conducted in accordance with the Illinois Open Meetings Act.

If you would like to receive Board agendas via e-mail, contact - Jodi.Marr@illinois.gov.

 

- LOCAL PANEL MEETING
Tuesday, December 11 at 9:00 a.m.       

160 N. LaSalle Street, Room S-401, Chicago, IL          


- JOINT PANEL MEETING
Tuesday, December 11 at 9:30 a.m.

160 N. LaSalle Street, Room S-401, Chicago, IL


- STATE PANEL MEETING
Tuesday, December 11 at 10:00 a.m.

     160 N. LaSalle Street, Room S-401, Chicago, IL

-     
-
-
- - -
-
- -
-
- -
-
- -
-
- - -
-
-
- -
-
-
- - -
- - - - - -
-
- -
-
- -
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/tests/test_il_labor.py b/tests/test_il_labor.py deleted file mode 100644 index 17371293f..000000000 --- a/tests/test_il_labor.py +++ /dev/null @@ -1,75 +0,0 @@ -from datetime import datetime -from os.path import dirname, join - -import pytest # noqa -from city_scrapers_core.constants import BOARD, PASSED -from city_scrapers_core.utils import file_response -from freezegun import freeze_time - -from city_scrapers.spiders.il_labor import IlLaborSpider - -freezer = freeze_time("2018-12-12") -freezer.start() -test_response = file_response( - join(dirname(__file__), "files", "il_labor.html"), - url="https://www.illinois.gov/ilrb/meetings/Pages/default.aspx", -) -spider = IlLaborSpider() -parsed_items = [item for item in spider.parse(test_response)] -freezer.stop() - - -def test_count(): - assert len(parsed_items) == 3 - - -def test_title(): - assert parsed_items[0]["title"] == "Local Panel" - - -def test_description(): - assert parsed_items[0]["description"] == "" - - -def test_start(): - assert parsed_items[0]["start"] == datetime(2018, 12, 11, 9) - - -def test_end(): - assert parsed_items[0]["end"] is None - - -def test_id(): - assert parsed_items[0]["id"] == "il_labor/201812110900/x/local_panel" - - -def test_links(): - assert parsed_items[0]["links"] == [ - { - "href": "https://www.illinois.gov/ilrb/Documents/LPAgenda.pdf?dt=2018-12-11", # noqa - "title": "Agenda", - } - ] - - -def test_all_day(): - assert parsed_items[0]["all_day"] is False - - -def test_classification(): - assert parsed_items[0]["classification"] == BOARD - - -def test_status(): - assert parsed_items[0]["status"] == PASSED - - -def test_location(): - assert parsed_items[0]["location"] == spider.location - - -def test_source(): - assert ( - parsed_items[0]["source"] - == "https://www.illinois.gov/ilrb/meetings/Pages/default.aspx" - )