diff --git a/city_scrapers/spiders/il_health_facilities.py b/city_scrapers/spiders/il_health_facilities.py deleted file mode 100644 index 7e7f0a699..000000000 --- a/city_scrapers/spiders/il_health_facilities.py +++ /dev/null @@ -1,195 +0,0 @@ -from datetime import datetime - -import scrapy -from city_scrapers_core.constants import BOARD, FORUM, NOT_CLASSIFIED -from city_scrapers_core.items import Meeting -from city_scrapers_core.spiders import CityScrapersSpider - - -class IlHealthFacilitiesSpider(CityScrapersSpider): - name = "il_health_facilities" - agency = "Illinois Health Facilities and Services Review Board" - timezone = "America/Chicago" - start_urls = [ - "https://www2.illinois.gov/sites/hfsrb/events/Pages/Board-Meetings.aspx" - ] - - def parse(self, response): - """ - `parse` should always `yield` Meeting items. - - Change the `_parse_title`, `_parse_start`, etc methods to fit your - scraping needs. - """ - links = response.css("a") - parsed_links = [] - for link_element in links: - inner_link_element = link_element.css("h3") - - if inner_link_element: - href = link_element.attrib["href"] - - parsed_links.append(href) - - for link in parsed_links: - yield scrapy.http.Request(link, callback=self.parse_event_page) - - def parse_event_page(self, response): - # An example demonstrating the structure of the time data on the page: - #
- # Event Type: - # Board Meeting - #
- - event_type_string = item.css("p.soi-eventType").get() - event_type_string = event_type_string.split("")[1].strip().lower() - - if "board" in event_type_string: - return BOARD - - elif "forum" in event_type_string: - return FORUM - else: - return NOT_CLASSIFIED - - def _parse_start(self, item): - """Parse start datetime as a naive datetime object.""" - return None - - def _parse_end(self, item): - """Parse end datetime as a naive datetime object. Added by pipeline if None""" - return None - - def _parse_time_notes(self, item): - """Parse any additional notes on the timing of the meeting""" - return "" - - def _parse_all_day(self, item): - """Parse or generate all-day status. Defaults to False.""" - return False - - def _parse_location(self, item): - """Parse or generate location.""" - - # The address data for this webpage is a little malformed - - location_str1 = item.css("div.soi-event-title::text").get().strip() - location_str2 = item.css("div.soi-event-location-address1::text").get().strip() - location_str3 = item.css("div.soi-event-location-address2::text").get().strip() - - address_string = location_str1 + ", " + location_str2 + location_str3 - - address_string = address_string.replace("`", "") - - return { - "address": address_string, - "name": "", - } - - def _parse_links(self, item): - """Parse or generate links.""" - - links = [ - { - "href": "https://www2.illinois.gov/sites/hfsrb/events/Pages/Board-Meetings.aspx", # noqa - "title": "Board and Subcommittee Meetings", - }, - { - "href": "https://www2.illinois.gov/sites/hfsrb/events/Pages/Previous-Meetings.aspx", # noqa - "title": "Previous Meeting", - }, - { - "href": "https://www2.illinois.gov/sites/hfsrb/events/Pages/Public-Hearing.aspx", # noqa - "title": "Public Hearings", - }, - ] - - return links - - def _parse_source(self, response): - """Parse or generate source.""" - return response.url diff --git a/tests/files/il_health_facilities.html b/tests/files/il_health_facilities.html deleted file mode 100644 index 4bfb2b192..000000000 --- a/tests/files/il_health_facilities.html +++ /dev/null @@ -1,694 +0,0 @@ - - - - -