diff --git a/doc/references/api.yml b/doc/references/api.yml index 96f2748c04..1928057973 100644 --- a/doc/references/api.yml +++ b/doc/references/api.yml @@ -21,7 +21,6 @@ paths: tags: - Questions summary: Get questions for a given product - operationId: getQuestionsBarcode parameters: - name: count in: query @@ -50,13 +49,64 @@ paths: type: array items: type: object + /questions: + get: + tags: + - Questions + summary: Fetch questions + parameters: + - $ref: "#/components/parameters/lang" + - $ref: "#/components/parameters/count" + - $ref: "#/components/parameters/server_domain" + - $ref: "#/components/parameters/insight_types" + - $ref: "#/components/parameters/country" + - $ref: "#/components/parameters/brands" + - $ref: "#/components/parameters/value_tag" + - $ref: "#/components/parameters/page" + - $ref: "#/components/parameters/reserved_barcode" + - $ref: "#/components/parameters/campaign" + - $ref: "#/components/parameters/predictor" + - name: order_by + in: query + description: | + The field to use for ordering results: + - confidence: order by (descending) model confidence, null confidence insights come last + - popularity: order by (descending) popularity (=scan count) + - random: use a random order + schema: + type: string + default: popularity + enum: + - confidence + - random + - popularity + responses: + "200": + description: The questions matching the filters + content: + application/json: + schema: + type: object + properties: + status: + type: string + enum: + - "no_questions" + - "found" + questions: + type: array + items: + type: object + count: + type: integer + description: The total number of results with the provided filters /questions/random: get: tags: - Questions summary: Get random questions - operationId: getQuestionsRandom + deprecated: true parameters: - $ref: "#/components/parameters/lang" - $ref: "#/components/parameters/count" @@ -95,9 +145,9 @@ paths: tags: - Questions summary: Get questions about popular products + deprecated: true description: | Questions are ranked by the product popularity (based on scan count). - operationId: GetQuestionsPopular parameters: - $ref: "#/components/parameters/lang" - $ref: "#/components/parameters/count" @@ -122,7 +172,6 @@ paths: description: | Get number of unanswered questions grouped by `value_tag`. The list is ordered from highest count to lowest. - operationId: GetQuestionsUnanswered parameters: - name: count in: query @@ -173,7 +222,6 @@ paths: tags: - Insights summary: Get a random insight - operationId: GetInsightsRandom parameters: - $ref: "#/components/parameters/insight_type" - $ref: "#/components/parameters/country" @@ -199,7 +247,6 @@ paths: tags: - Insights summary: Get all insights for a specific product - operationId: Getallinsightsforaspecificproduct parameters: - $ref: "#/components/parameters/barcode_path" responses: @@ -211,7 +258,6 @@ paths: tags: - Insights summary: Get a specific insight - operationId: GetInsightsDetail parameters: - name: id in: path @@ -244,7 +290,6 @@ paths: (so it won't be applied), `1` means it is correct (so it will be applied) and `-1` means the insight won't be returned to the user (_skip_). We use the voting mecanism system to remember which insight to skip for a user (authenticated or not). - operationId: Submitanannotation requestBody: required: true content: diff --git a/robotoff/app/api.py b/robotoff/app/api.py index a29e84f3fa..c3af383bf3 100644 --- a/robotoff/app/api.py +++ b/robotoff/app/api.py @@ -5,7 +5,7 @@ import io import tempfile import uuid -from typing import Optional +from typing import Literal, Optional import falcon import orjson @@ -1096,8 +1096,21 @@ def on_get(self, req: falcon.Request, resp: falcon.Response): get_questions_resource_on_get(req, resp, "popularity") +class QuestionsCollectionResource: + def on_get(self, req: falcon.Request, resp: falcon.Response): + order_by = req.get_param("order_by", default="popularity") + + if order_by not in ("random", "popularity", "confidence"): + raise falcon.HTTPBadRequest( + description=f"invalid '{order_by}' value for `order_by` parameter" + ) + get_questions_resource_on_get(req, resp, order_by) + + def get_questions_resource_on_get( - req: falcon.Request, resp: falcon.Response, order_by: str + req: falcon.Request, + resp: falcon.Response, + order_by: Literal["random", "popularity", "confidence"], ): response: JSONType = {} page: int = req.get_param_as_int("page", min_value=1, default=1) @@ -1500,6 +1513,7 @@ def on_get(self, req: falcon.Request, resp: falcon.Response): api.add_route("/api/v1/ann/search/{logo_id:int}", ANNResource()) api.add_route("/api/v1/ann/search", ANNResource()) api.add_route("/api/v1/questions/{barcode}", ProductQuestionsResource()) +api.add_route("/api/v1/questions", QuestionsCollectionResource()) api.add_route("/api/v1/questions/random", RandomQuestionsResource()) api.add_route("/api/v1/questions/popular", PopularQuestionsResource()) api.add_route("/api/v1/questions/unanswered", UnansweredQuestionCollection()) diff --git a/robotoff/app/core.py b/robotoff/app/core.py index d874ad655e..b805b8622b 100644 --- a/robotoff/app/core.py +++ b/robotoff/app/core.py @@ -1,10 +1,10 @@ import datetime import functools from enum import Enum -from typing import Iterable, NamedTuple, Optional, Union +from typing import Iterable, Literal, NamedTuple, Optional, Union import peewee -from peewee import JOIN, fn +from peewee import JOIN, SQL, fn from robotoff import settings from robotoff.app import events @@ -72,7 +72,7 @@ def get_insights( brands: Optional[list[str]] = None, annotated: Optional[bool] = False, annotation: Optional[int] = None, - order_by: Optional[str] = None, + order_by: Optional[Literal["random", "popularity", "n_votes", "confidence"]] = None, value_tag: Optional[str] = None, server_domain: Optional[str] = None, reserved_barcode: Optional[bool] = None, @@ -158,6 +158,9 @@ def get_insights( elif order_by == "popularity": query = query.order_by(ProductInsight.unique_scans_n.desc()) + elif order_by == "confidence": + query = query.order_by(SQL("confidence DESC NULLS LAST")) + elif order_by == "n_votes": query = query.order_by(ProductInsight.n_votes.desc()) diff --git a/tests/integration/models_utils.py b/tests/integration/models_utils.py index afdedd6649..f60b6f80e4 100644 --- a/tests/integration/models_utils.py +++ b/tests/integration/models_utils.py @@ -55,6 +55,7 @@ class Meta: unique_scans_n = 10 annotation = None automatic_processing = False + confidence: Optional[float] = None class PredictionFactory(PeeweeModelFactory): diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py index fb5a087f71..ea72278bb5 100644 --- a/tests/integration/test_api.py +++ b/tests/integration/test_api.py @@ -22,6 +22,7 @@ ) insight_id = "94371643-c2bc-4291-a585-af2cb1a5270a" +DEFAULT_BARCODE = "1" @pytest.fixture(autouse=True) @@ -30,7 +31,7 @@ def _set_up_and_tear_down(peewee_db): # clean db clean_db() # Set up. - ProductInsightFactory(id=insight_id, barcode=1) + ProductInsightFactory(id=insight_id, barcode=DEFAULT_BARCODE) # Run the test case. yield with peewee_db: @@ -56,7 +57,7 @@ def test_random_question(client, mocker): } } mocker.patch("robotoff.insights.question.get_product", return_value=product) - result = client.simulate_get("/api/v1/questions/random") + result = client.simulate_get("/api/v1/questions?order_by=random") assert result.status_code == 200 assert result.json == { @@ -85,7 +86,7 @@ def test_random_question_user_has_already_seen(client, mocker, peewee_db): device_id="device1", ) - result = client.simulate_get("/api/v1/questions/random?device_id=device1") + result = client.simulate_get("/api/v1/questions?order_by=random&device_id=device1") assert result.status_code == 200 assert result.json == {"count": 0, "questions": [], "status": "no_questions"} @@ -93,7 +94,7 @@ def test_random_question_user_has_already_seen(client, mocker, peewee_db): def test_popular_question(client, mocker): mocker.patch("robotoff.insights.question.get_product", return_value={}) - result = client.simulate_get("/api/v1/questions/popular") + result = client.simulate_get("/api/v1/questions?order_by=popularity") assert result.status_code == 200 assert result.json == { @@ -121,25 +122,25 @@ def test_popular_question_pagination(client, mocker, peewee_db): for i in range(0, 12): ProductInsightFactory(barcode=i, unique_scans_n=100 - i) - result = client.simulate_get("/api/v1/questions/popular?count=5&page=1") + result = client.simulate_get("/api/v1/questions?order_by=popularity&count=5&page=1") assert result.status_code == 200 data = result.json assert data["count"] == 12 assert data["status"] == "found" assert [q["barcode"] for q in data["questions"]] == ["0", "1", "2", "3", "4"] - result = client.simulate_get("/api/v1/questions/popular?count=5&page=2") + result = client.simulate_get("/api/v1/questions?order_by=popularity&count=5&page=2") assert result.status_code == 200 data = result.json assert data["count"] == 12 assert data["status"] == "found" assert [q["barcode"] for q in data["questions"]] == ["5", "6", "7", "8", "9"] - result = client.simulate_get("/api/v1/questions/popular?count=5&page=3") + result = client.simulate_get("/api/v1/questions?order_by=popularity&count=5&page=3") assert result.status_code == 200 data = result.json assert data["count"] == 12 assert data["status"] == "found" assert [q["barcode"] for q in data["questions"]] == ["10", "11"] - result = client.simulate_get("/api/v1/questions/popular?count=5&page=4") + result = client.simulate_get("/api/v1/questions?order_by=popularity&count=5&page=4") assert result.status_code == 200 data = result.json assert data["count"] == 12 @@ -147,6 +148,32 @@ def test_popular_question_pagination(client, mocker, peewee_db): assert len(data["questions"]) == 0 +def test_question_rank_by_confidence(client, mocker, peewee_db): + mocker.patch("robotoff.insights.question.get_source_image_url", return_value=None) + + with peewee_db: + ProductInsight.delete().execute() # remove default sample + ProductInsightFactory( + barcode="1", type="category", value_tag="en:salmon", confidence=0.9 + ) + ProductInsightFactory( + barcode="3", type="category", value_tag="en:breads", confidence=0.4 + ) + ProductInsightFactory( + barcode="2", type="label", value_tag="en:eu-organic", confidence=0.7 + ) + ProductInsightFactory( + barcode="4", type="brand", value_tag="carrefour", confidence=None + ) + + result = client.simulate_get("/api/v1/questions?order_by=confidence") + assert result.status_code == 200 + data = result.json + assert data["count"] == 4 + assert data["status"] == "found" + assert [q["barcode"] for q in data["questions"]] == ["1", "2", "3", "4"] + + def test_barcode_question_not_found(client): result = client.simulate_get("/api/v1/questions/2")