Skip to content

Commit

Permalink
fix: ✨ Add rev key to Images in Parquet (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremyarancio authored Dec 19, 2024
1 parent 4c07ac0 commit 061566c
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions openfoodfacts_exports/exports/parquet/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Image(BaseModel):
sizes: dict[str, ImageSize] | None = None
uploaded_t: int | None = None
imgid: int | None = None
rev: int | None = None
uploader: str | None = None

@model_validator(mode="after")
Expand Down Expand Up @@ -242,13 +243,13 @@ def parse_language_fields(cls, data: dict) -> dict:
@classmethod
def parse_images(cls, data: dict) -> dict:
"""Parse images field into a list of dictionaries with `key`, `imgid`,
`sizes`, `uploaded_t`, and `uploader` keys.
`rev`, `sizes`, `uploaded_t`, and `uploader` keys.
In Open Food Facts, images are stored as a dictionary with the image
key as the key and the image data as the value.
To make the schema compatible with Parquet, we convert these fields
into a list of dictionaries with `key`, `imgid`, `sizes`, `uploaded_t`,
into a list of dictionaries with `key`, `imgid`, `rev`, `sizes`, `uploaded_t`,
and `uploader` keys. We copy the image key (ex: `3`, `nutrition_fr`,...)
from the original dictionary and add it as a field under the `key` key.
"""
Expand Down Expand Up @@ -283,6 +284,7 @@ def parse_owner_fields(cls, data: dict):
[
pa.field("key", pa.string(), nullable=True),
pa.field("imgid", pa.int32(), nullable=True),
pa.field("rev", pa.int32(), nullable=True),
pa.field(
"sizes",
pa.struct(
Expand Down

0 comments on commit 061566c

Please sign in to comment.