diff --git a/api/core/workflow/nodes/document_extractor/entities.py b/api/core/workflow/nodes/document_extractor/entities.py index 7e9ffaa889b988..412c3427947fd4 100644 --- a/api/core/workflow/nodes/document_extractor/entities.py +++ b/api/core/workflow/nodes/document_extractor/entities.py @@ -1,7 +1,9 @@ from collections.abc import Sequence +from typing import Optional from core.workflow.nodes.base import BaseNodeData class DocumentExtractorNodeData(BaseNodeData): variable_selector: Sequence[str] + output_image: Optional[bool] = False diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index 0b1dc611c59da2..253e5bcc6e5eee 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -5,6 +5,7 @@ import os import tempfile from typing import cast +from uuid import uuid4 import docx import pandas as pd @@ -12,14 +13,16 @@ import yaml # type: ignore from configs import dify_config -from core.file import File, FileTransferMethod, file_manager +from core.file import File, FileTransferMethod, FileType, file_manager from core.helper import ssrf_proxy from core.variables import ArrayFileSegment from core.variables.segments import FileSegment from core.workflow.entities.node_entities import NodeRunResult from core.workflow.nodes.base import BaseNode from core.workflow.nodes.enums import NodeType +from libs.login import current_user from models.workflow import WorkflowNodeExecutionStatus +from services.file_service import FileService from .entities import DocumentExtractorNodeData from .exc import DocumentExtractorError, FileDownloadError, TextExtractionError, UnsupportedFileTypeError @@ -38,6 +41,8 @@ class DocumentExtractorNode(BaseNode[DocumentExtractorNodeData]): def _run(self): variable_selector = self.node_data.variable_selector + output_image = self.node_data.output_image + variable = self.graph_runtime_state.variable_pool.get(variable_selector) if variable is None: @@ -50,23 +55,27 @@ def _run(self): value = variable.value inputs = {"variable_selector": variable_selector} process_data = {"documents": value if isinstance(value, list) else [value]} - + images = [] try: if isinstance(value, list): + if output_image: + images = _extract_images_from_file(files=value) extracted_text_list = list(map(_extract_text_from_file, value)) return NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=inputs, process_data=process_data, - outputs={"text": extracted_text_list}, + outputs={"text": extracted_text_list, "images": images}, ) elif isinstance(value, File): + if output_image: + images = _extract_images_from_file([value]) extracted_text = _extract_text_from_file(value) return NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=inputs, process_data=process_data, - outputs={"text": extracted_text}, + outputs={"text": extracted_text, "images": images}, ) else: raise DocumentExtractorError(f"Unsupported variable type: {type(value)}") @@ -180,6 +189,58 @@ def _extract_text_from_pdf(file_content: bytes) -> str: raise TextExtractionError(f"Failed to extract text from PDF: {str(e)}") from e +def _extract_images_from_pdf(file: File) -> list[File]: + file_content = _download_file_content(file) + images = [] + try: + pdf_file = pypdfium2.PdfDocument(file_content, autoclose=True) + for page in pdf_file: + page_bitmap = page.render(scale=5) + image = page_bitmap.to_pil() + byte_io = io.BytesIO() + image.save(byte_io, format="PNG") + img_bytes = byte_io.getvalue() + image_upload_file = FileService.upload_file( + content=img_bytes, user=current_user, mimetype="image/png", filename=f"{uuid4()}.png" + ) + images.append( + File( + tenant_id=image_upload_file.tenant_id, + type=FileType.IMAGE, + transfer_method=FileTransferMethod.LOCAL_FILE, + remote_url=image_upload_file.source_url, + related_id=image_upload_file.id, + filename=image_upload_file.name, + extension=image_upload_file.extension, + mime_type=image_upload_file.mime_type, + size=image_upload_file.size, + storage_key=image_upload_file.key, + ) + ) + + return images + except Exception as e: + raise Exception(f"Failed to convert PDF to images: {e}") + + +def _extract_images_from_file(files: list[File]): + try: + for file in files: + if file.extension: + if file.extension == ".pdf": + return _extract_images_from_pdf(file=file) + return [] + elif file.mime_type: + if file.mime_type == "application/pdf": + return _extract_images_from_pdf(file=file) + return [] + else: + raise UnsupportedFileTypeError("Unable to determine file type: MIME type or file extension is missing") + return [] + except Exception as e: + raise TextExtractionError(f"Failed to extract image from PDF: {str(e)}") from e + + def _extract_text_from_doc(file_content: bytes) -> str: """ Extract text from a DOC/DOCX file. diff --git a/web/app/components/base/radio-group/index.tsx b/web/app/components/base/radio-group/index.tsx new file mode 100644 index 00000000000000..a1cfb06e6afdee --- /dev/null +++ b/web/app/components/base/radio-group/index.tsx @@ -0,0 +1,40 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import s from './style.module.css' +import cn from '@/utils/classnames' + +type OPTION = { + label: string + value: any +} + +type Props = { + className?: string + options: OPTION[] + value: any + onChange: (value: any) => void +} + +const RadioGroup: FC = ({ + className = '', + options, + value, + onChange, +}) => { + return ( +
+ {options.map(item => ( +
onChange(item.value)} + > +
+
{item.label}
+
+ ))} +
+ ) +} +export default React.memo(RadioGroup) diff --git a/web/app/components/base/radio-group/style.module.css b/web/app/components/base/radio-group/style.module.css new file mode 100644 index 00000000000000..22c29c6a423ee7 --- /dev/null +++ b/web/app/components/base/radio-group/style.module.css @@ -0,0 +1,24 @@ +.item { + @apply grow flex items-center h-8 px-2.5 rounded-lg bg-gray-25 border border-gray-100 cursor-pointer space-x-2; +} + +.item:hover { + background-color: #ffffff; + border-color: #B2CCFF; + box-shadow: 0px 12px 16px -4px rgba(16, 24, 40, 0.08), 0px 4px 6px -2px rgba(16, 24, 40, 0.03); +} + +.item.checked { + background-color: #ffffff; + border-color: #528BFF; + box-shadow: 0px 1px 2px 0px rgba(16, 24, 40, 0.06), 0px 1px 3px 0px rgba(16, 24, 40, 0.10); +} + +.radio { + @apply w-4 h-4 border-[2px] border-gray-200 rounded-full; +} + +.item.checked .radio { + border-width: 5px; + border-color: #155eef; +} \ No newline at end of file diff --git a/web/app/components/workflow/nodes/_base/components/variable/utils.ts b/web/app/components/workflow/nodes/_base/components/variable/utils.ts index 715ad1c7b175ad..3769a7426d06ff 100644 --- a/web/app/components/workflow/nodes/_base/components/variable/utils.ts +++ b/web/app/components/workflow/nodes/_base/components/variable/utils.ts @@ -268,6 +268,10 @@ const formatItem = ( variable: 'text', type: (data as DocExtractorNodeType).is_array_file ? VarType.arrayString : VarType.string, }, + { + variable: 'images', + type: VarType.arrayFile, + }, ] break } diff --git a/web/app/components/workflow/nodes/document-extractor/panel.tsx b/web/app/components/workflow/nodes/document-extractor/panel.tsx index 1e26fe4c337ee2..dab4f8babef397 100644 --- a/web/app/components/workflow/nodes/document-extractor/panel.tsx +++ b/web/app/components/workflow/nodes/document-extractor/panel.tsx @@ -14,6 +14,7 @@ import Field from '@/app/components/workflow/nodes/_base/components/field' import { BlockEnum, type NodePanelProps } from '@/app/components/workflow/types' import I18n from '@/context/i18n' import { LanguagesSupported } from '@/i18n/language' +import RadioGroup from '@/app/components/base/radio-group' const i18nPrefix = 'workflow.nodes.docExtractor' @@ -44,12 +45,13 @@ const Panel: FC> = ({ const { readOnly, inputs, + handleConfigChanges, handleVarChanges, filterVar, } = useConfig(id, data) return ( -
+
> = ({
+ + <> + handleConfigChanges({ + output_image: val === 'image', + })} + /> + +
- +
+ + +
diff --git a/web/app/components/workflow/nodes/document-extractor/types.ts b/web/app/components/workflow/nodes/document-extractor/types.ts index 8ab75921097398..81f1ec8146bf5c 100644 --- a/web/app/components/workflow/nodes/document-extractor/types.ts +++ b/web/app/components/workflow/nodes/document-extractor/types.ts @@ -3,4 +3,5 @@ import type { CommonNodeType, ValueSelector } from '@/app/components/workflow/ty export type DocExtractorNodeType = CommonNodeType & { variable_selector: ValueSelector is_array_file: boolean + output_image: boolean } diff --git a/web/app/components/workflow/nodes/document-extractor/use-config.ts b/web/app/components/workflow/nodes/document-extractor/use-config.ts index 9406c125f046f8..b16a7fbdb77447 100644 --- a/web/app/components/workflow/nodes/document-extractor/use-config.ts +++ b/web/app/components/workflow/nodes/document-extractor/use-config.ts @@ -2,7 +2,7 @@ import { useCallback, useMemo } from 'react' import produce from 'immer' import { useStoreApi } from 'reactflow' -import type { ValueSelector, Var } from '../../types' +import type { DocumentExtractorConfig, ValueSelector, Var } from '../../types' import { VarType } from '../../types' import { type DocExtractorNodeType } from './types' import useNodeCrud from '@/app/components/workflow/nodes/_base/hooks/use-node-crud' @@ -55,11 +55,19 @@ const useConfig = (id: string, payload: DocExtractorNodeType) => { setInputs(newInputs) }, [getType, inputs, setInputs]) + const handleConfigChanges = useCallback((config: DocumentExtractorConfig) => { + const newInputs = produce(inputs, (draft) => { + draft.output_image = config.output_image + }) + setInputs(newInputs) + }, [inputs, setInputs]) + return { readOnly, inputs, filterVar, handleVarChanges, + handleConfigChanges, } } diff --git a/web/app/components/workflow/types.ts b/web/app/components/workflow/types.ts index 6d0fabd90ef8c1..3e8301f3eefb84 100644 --- a/web/app/components/workflow/types.ts +++ b/web/app/components/workflow/types.ts @@ -396,3 +396,6 @@ export type VisionSetting = { variable_selector: ValueSelector detail: Resolution } +export type DocumentExtractorConfig = { + output_image: boolean +} diff --git a/web/i18n/en-US/workflow.ts b/web/i18n/en-US/workflow.ts index fab25fa50958e2..93a4c285f52b8c 100644 --- a/web/i18n/en-US/workflow.ts +++ b/web/i18n/en-US/workflow.ts @@ -671,8 +671,12 @@ const translation = { }, docExtractor: { inputVar: 'Input Variable', + output_image: 'Image output (PDF only)', + output_text: 'Text output', + output_format: 'output format', outputVars: { text: 'Extracted text', + images: 'Extracted images', }, supportFileTypes: 'Support file types: {{types}}.', learnMore: 'Learn more', diff --git a/web/i18n/zh-Hans/workflow.ts b/web/i18n/zh-Hans/workflow.ts index dfad9208e73f4b..94d5991101accf 100644 --- a/web/i18n/zh-Hans/workflow.ts +++ b/web/i18n/zh-Hans/workflow.ts @@ -671,8 +671,12 @@ const translation = { }, docExtractor: { inputVar: '输入变量', + output_image: '图片输出(只支持PDF)', + output_text: '文本输出', + output_format: '输出类型', outputVars: { text: '提取的文本', + images: '提取的图片列表', }, supportFileTypes: '支持的文件类型: {{types}}。', learnMore: '了解更多', diff --git a/web/i18n/zh-Hant/workflow.ts b/web/i18n/zh-Hant/workflow.ts index a78c6a2f04d4ea..8e3d851b6fa846 100644 --- a/web/i18n/zh-Hant/workflow.ts +++ b/web/i18n/zh-Hant/workflow.ts @@ -664,8 +664,12 @@ const translation = { addNote: '添加註釋', }, docExtractor: { + output_image: '圖片輸出(只支持PDF)', + output_text: '文本輸出', + output_format: '輸出类型', outputVars: { - text: '提取的文字', + text: '提取的文本', + images: '提取的圖片列表', }, learnMore: '瞭解更多資訊', inputVar: '輸入變數',