diff --git a/.dockerignore b/.dockerignore index 67127e1b4..10b14d499 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,3 +17,4 @@ ui2/.vscode ui2/.pnp.cjs ui2/.pnp.loader.mjs ui2/.env.development.local +ui2/public/papermerge-runtime-config.js diff --git a/.gitignore b/.gitignore index a26e09bd5..62e679277 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ simplest.yml ui/public/runtime/config.js .ruff_cache/ .pytest_cache/ +.enwardrc diff --git a/docker/standard/Dockerfile b/docker/standard/Dockerfile index 89931a98e..8d7e63824 100644 --- a/docker/standard/Dockerfile +++ b/docker/standard/Dockerfile @@ -14,11 +14,14 @@ ENV CORE_APP=/core_app ENV PAPERMERGE__DATABASE__URL=sqlite:////db/db.sqlite3 ENV PAPERMERGE__AUTH__USERNAME=admin ENV PAPERMERGE__AUTH__EMAIL=admin@example.com -ENV PAPERMERGE__OCR__DEFAULT_LANGUAGE=deu ENV PAPERMERGE__MAIN__API_PREFIX="" +ENV PAPERMERGE__OCR__LANG_CODES="deu,eng,ron" +ENV PAPERMERGE__OCR__DEFAULT_LANG_CODE="deu" +ENV PAPERMERGE__OCR__AUTOMATIC="false" RUN apk update && apk add linux-headers python3-dev \ gcc \ + curl \ libc-dev \ supervisor \ imagemagick \ @@ -27,6 +30,7 @@ RUN apk update && apk add linux-headers python3-dev \ poppler-utils RUN pip install --upgrade poetry roco==0.4.2 +RUN curl -L -o /bin/env2js https://github.com/papermerge/env2js/releases/download/0.2/env2js.amd64 COPY poetry.lock pyproject.toml README.md LICENSE ${CORE_APP}/ @@ -37,6 +41,7 @@ COPY docker/standard/entrypoint.sh /entrypoint.sh COPY docker/standard/bundles/supervisor/* /etc/papermerge/ COPY docker/standard/bundles/nginx/* /etc/nginx/ COPY docker/standard/logging.yaml /etc/papermerge/ +COPY docker/standard/core.js.tmpl /${CORE_APP}/core.js.tmpl COPY ./papermerge ${CORE_APP}/papermerge/ COPY alembic.ini ${CORE_APP}/ diff --git a/docker/standard/core.js.tmpl b/docker/standard/core.js.tmpl new file mode 100644 index 000000000..54aff6e42 --- /dev/null +++ b/docker/standard/core.js.tmpl @@ -0,0 +1,5 @@ +window.__PAPERMERGE_RUNTIME_CONFIG__ = { + ocr__lang_codes: "{{ .PAPERMERGE__OCR__LANG_CODES }}", + ocr__default_lang_code: "{{ .PAPERMERGE__OCR__DEFAULT_LANG_CODE }}", + ocr__automatic: {{ .PAPERMERGE__OCR__AUTOMATIC }} +} diff --git a/docker/standard/entrypoint.sh b/docker/standard/entrypoint.sh index 4e84aa15d..b2e199272 100644 --- a/docker/standard/entrypoint.sh +++ b/docker/standard/entrypoint.sh @@ -55,8 +55,15 @@ case $CMD in ;; server) exec_init + # TODO: replace roco with env2js roco > /usr/share/nginx/html/auth_server/papermerge-runtime-config.js - roco > /usr/share/nginx/html/ui/papermerge-runtime-config.js + /bin/env2js -f /core_app/core.js.tmpl > /usr/share/nginx/html/ui/papermerge-runtime-config.js + exec /usr/bin/supervisord -c /etc/papermerge/supervisord.conf + ;; + server_without_init) + # TODO: replace roco with env2js + roco > /usr/share/nginx/html/auth_server/papermerge-runtime-config.js + /bin/env2js -f /core_app/core.js.tmpl > /usr/share/nginx/html/ui/papermerge-runtime-config.js exec /usr/bin/supervisord -c /etc/papermerge/supervisord.conf ;; create_token.sh) diff --git a/papermerge/core/config.py b/papermerge/core/config.py index 1b647aa92..9ff883910 100644 --- a/papermerge/core/config.py +++ b/papermerge/core/config.py @@ -19,7 +19,15 @@ class Settings(BaseSettings): papermerge__main__cf_domain: str | None = None papermerge__database__url: str = "sqlite:////db/db.sqlite3" papermerge__redis__url: str | None = None - papermerge__ocr__default_language: str = 'deu' + papermerge__ocr__default_lang_code: str = 'deu' + # When is OCR triggered ? + # `ocr__automatic` = True means that OCR will be performed without + # end user intervention i.e. via background scheduler like celery scheduler + # `ocr__automatic` = False means that OCR will be performed only + # if requested by end user. In this case user can choose to + # start schedule OCR on upload; also in this case use can choose to + # scheduler OCR later on any document. + papermerge__ocr__automatic: bool = False papermerge__search__url: str | None = None diff --git a/papermerge/core/features/document/cli/cli.py b/papermerge/core/features/document/cli/cli.py new file mode 100644 index 000000000..9aafdee86 --- /dev/null +++ b/papermerge/core/features/document/cli/cli.py @@ -0,0 +1,32 @@ +import uuid +import typer + +from papermerge.core.tasks import send_task +from papermerge.core.db.engine import Session + +from papermerge.core import dbapi, constants, types + + +app = typer.Typer(help="OCR tasks") + + +@app.command() +def schedule_ocr(node_id: uuid.UUID, force: bool = False, lang: str | None = None): + """Schedules OCR for given node ID""" + with Session() as db_session: + node_type: types.CType = dbapi.get_node_type(db_session, node_id) + + if node_type == "document": + if lang is None: + lang = dbapi.get_document_lang(db_session, node_id) + send_task( + constants.WORKER_OCR_DOCUMENT, + kwargs={ + "document_id": str(node_id), + "lang": lang, + }, + route_name="ocr", + ) + else: + # get all descendants of node_id + pass diff --git a/papermerge/core/features/document/db/api.py b/papermerge/core/features/document/db/api.py index 7487bac62..97ab3c72e 100644 --- a/papermerge/core/features/document/db/api.py +++ b/papermerge/core/features/document/db/api.py @@ -27,10 +27,11 @@ ) from papermerge.core.features.document.schema import DocumentCFVRow from papermerge.core.features.document.ordered_document_cfv import OrderedDocumentCFV +from papermerge.core import config from .selectors import select_doc_cfv, select_docs_by_type - +settings = config.get_settings() logger = logging.getLogger(__name__) @@ -634,6 +635,17 @@ def upload( route_name="s3", ) + if not settings.papermerge__ocr__automatic: + if doc.ocr is True: + # user chose "schedule OCR" when uploading document + tasks.send_task( + constants.WORKER_OCR_DOCUMENT, + kwargs={ + "document_id": str(doc.id), + "lang": doc.lang, + }, + route_name="ocr", + ) return validated_model, None diff --git a/ui2/index.html b/ui2/index.html index 04c1169a9..1fa75fa9e 100644 --- a/ui2/index.html +++ b/ui2/index.html @@ -9,6 +9,7 @@
+