Skip to content

Commit

Permalink
Handle more issues with column names
Browse files Browse the repository at this point in the history
Column names have extra whitespace in them sometimes.
  • Loading branch information
MatMoore committed Apr 18, 2020
1 parent f545928 commit 90c248d
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
7 changes: 2 additions & 5 deletions covidapi/import_data_jh.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .db.database import SessionLocal, engine, Base
from .jh_cleaning.lookup_table import Matcher
from .jh_cleaning.region_info import RegionNames
from .jh_cleaning.clean_columns import clean_extra_whitespace
from sqlalchemy.orm.exc import NoResultFound
from datetime import datetime, date, timedelta
from requests import Session
Expand Down Expand Up @@ -40,11 +41,7 @@ def fetch_report(self, report_date):

records = []
for record in csv.DictReader((line.decode('utf8') for line in response.iter_lines())):
if '\ufeffProvince/State' in record:
# 2020-03-13 includes this invisible character, which messed up the column names
# see https://github.com/CSSEGISandData/COVID-19/pull/1738
record['Province/State'] = record['\ufeffProvince/State']

record = clean_extra_whitespace(record)
records.append(record)

return records
Expand Down
5 changes: 5 additions & 0 deletions covidapi/jh_cleaning/clean_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def clean_extra_whitespace(record):
"""
Fix column names like '\ufeffFIPS'
"""
return {k.strip('\ufeff'):v for k, v in record.items()}

0 comments on commit 90c248d

Please sign in to comment.