-
Notifications
You must be signed in to change notification settings - Fork 122
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix!:
to_gbq
loads unit8
columns to BigQuery INT64 instead of STR…
…ING (#814) * fix!: `to_gbq` loads `unit8` columns to BigQuery INT64 instead of STRING fix!: `to_gbq` loads naive (no timezone) columns to BigQuery DATETIME instead of TIMESTAMP (#814) fix!: `to_gbq` loads object column containing bool values to BOOLEAN instead of STRING (#814) fix!: `to_gbq` loads object column containing dictionary values to STRUCT instead of STRING (#814) deps: min pyarrow is now 4.0.0 to support compliant nested types (#814) Release-As: 0.24.0
- Loading branch information
Showing
18 changed files
with
997 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,7 @@ | |
UNIT_TEST_EXTRAS = [ | ||
"bqstorage", | ||
"tqdm", | ||
"geopandas", | ||
] | ||
UNIT_TEST_EXTRAS_BY_PYTHON = { | ||
"3.9": [], | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Copyright (c) 2024 pandas-gbq Authors All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# Copyright (c) 2019 pandas-gbq Authors All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. | ||
|
||
import itertools | ||
|
||
import pandas | ||
|
||
|
||
def list_columns_and_indexes(dataframe, index=True): | ||
"""Return all index and column names with dtypes. | ||
Returns: | ||
Sequence[Tuple[str, dtype]]: | ||
Returns a sorted list of indexes and column names with | ||
corresponding dtypes. If an index is missing a name or has the | ||
same name as a column, the index is omitted. | ||
""" | ||
column_names = frozenset(dataframe.columns) | ||
columns_and_indexes = [] | ||
if index: | ||
if isinstance(dataframe.index, pandas.MultiIndex): | ||
for name in dataframe.index.names: | ||
if name and name not in column_names: | ||
values = dataframe.index.get_level_values(name) | ||
columns_and_indexes.append((name, values.dtype)) | ||
else: | ||
if dataframe.index.name and dataframe.index.name not in column_names: | ||
columns_and_indexes.append( | ||
(dataframe.index.name, dataframe.index.dtype) | ||
) | ||
|
||
columns_and_indexes += zip(dataframe.columns, dataframe.dtypes) | ||
return columns_and_indexes | ||
|
||
|
||
def first_valid(series): | ||
first_valid_index = series.first_valid_index() | ||
if first_valid_index is not None: | ||
return series.at[first_valid_index] | ||
|
||
|
||
def first_array_valid(series): | ||
"""Return the first "meaningful" element from the array series. | ||
Here, "meaningful" means the first non-None element in one of the arrays that can | ||
be used for type detextion. | ||
""" | ||
first_valid_index = series.first_valid_index() | ||
if first_valid_index is None: | ||
return None | ||
|
||
valid_array = series.at[first_valid_index] | ||
valid_item = next((item for item in valid_array if not pandas.isna(item)), None) | ||
|
||
if valid_item is not None: | ||
return valid_item | ||
|
||
# Valid item is None because all items in the "valid" array are invalid. Try | ||
# to find a true valid array manually. | ||
for array in itertools.islice(series, first_valid_index + 1, None): | ||
try: | ||
array_iter = iter(array) | ||
except TypeError: | ||
continue # Not an array, apparently, e.g. None, thus skip. | ||
valid_item = next((item for item in array_iter if not pandas.isna(item)), None) | ||
if valid_item is not None: | ||
break | ||
|
||
return valid_item |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright (c) 2019 pandas-gbq Authors All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. | ||
|
||
import collections | ||
|
||
import google.cloud.bigquery | ||
|
||
|
||
def to_schema_fields(schema): | ||
"""Coerce `schema` to a list of schema field instances. | ||
Args: | ||
schema(Sequence[Union[ \ | ||
:class:`~google.cloud.bigquery.schema.SchemaField`, \ | ||
Mapping[str, Any] \ | ||
]]): | ||
Table schema to convert. If some items are passed as mappings, | ||
their content must be compatible with | ||
:meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. | ||
Returns: | ||
Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] | ||
Raises: | ||
Exception: If ``schema`` is not a sequence, or if any item in the | ||
sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` | ||
instance or a compatible mapping representation of the field. | ||
""" | ||
for field in schema: | ||
if not isinstance( | ||
field, (google.cloud.bigquery.SchemaField, collections.abc.Mapping) | ||
): | ||
raise ValueError( | ||
"Schema items must either be fields or compatible " | ||
"mapping representations." | ||
) | ||
|
||
return [ | ||
field | ||
if isinstance(field, google.cloud.bigquery.SchemaField) | ||
else google.cloud.bigquery.SchemaField.from_api_repr(field) | ||
for field in schema | ||
] |
Oops, something went wrong.