Base64ImageJoiner
Join multiple lists of Base64Images into a single list.
# If you need help with the YAML format, have a look at [https://docs.cloud.deepset.ai/v2.0/docs/create-a-pipeline#create-a-pipeline-using-pipeline-editor](https://docs.cloud.deepset.ai/v2.0/docs/create-a-pipeline#create-a-pipeline-using-pipeline-editor).
# This section defines components that you want to use in your pipelines. Each component must have a name and a type. You can also set the component's parameters here.
# The name is up to you, you can give your component a friendly name. You then use components' names when specifying the connections in the pipeline.
# Type is the class path of the component. You can check the type on the component's documentation page.
components:
BM25Retriever:
type: haystack_integrations.components.retrievers.opensearch.bm25_retriever.OpenSearchBM25Retriever
init_parameters:
document_store:
type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
init_parameters:
hosts:
index: ''
max_chunk_bytes: 104857600
embedding_dim: 1024
return_embedding: false
method:
mappings:
settings:
create_index: true
http_auth:
use_ssl:
verify_certs:
timeout:
top_k: 20
fuzziness: 0
Embedder:
type: deepset_cloud_custom_nodes.embedders.nvidia.text_embedder.DeepsetNvidiaTextEmbedder
init_parameters:
normalize_embeddings: true
model: BAAI/bge-m3
EmbeddingRetriever:
type: haystack_integrations.components.retrievers.opensearch.embedding_retriever.OpenSearchEmbeddingRetriever
init_parameters:
document_store:
type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
init_parameters:
hosts:
index: ''
max_chunk_bytes: 104857600
embedding_dim: 1024
return_embedding: false
method:
mappings:
settings:
create_index: true
http_auth:
use_ssl:
verify_certs:
timeout:
top_k: 20
DocumentJoiner:
type: haystack.components.joiners.document_joiner.DocumentJoiner
init_parameters:
join_mode: concatenate
Ranker:
type: deepset_cloud_custom_nodes.rankers.nvidia.ranker.DeepsetNvidiaRanker
init_parameters:
model: BAAI/bge-reranker-v2-m3
top_k: 5
MetaFieldGroupingRanker:
type: haystack.components.rankers.meta_field_grouping_ranker.MetaFieldGroupingRanker
init_parameters:
group_by: file_id
sort_docs_by: split_id
FileDownloader:
type: deepset_cloud_custom_nodes.augmenters.deepset_file_downloader.DeepsetFileDownloader
init_parameters:
file_extensions:
- .pdf
- .png
- .jpeg
- .jpg
- .gif
FileToBase64Image:
type: deepset_cloud_custom_nodes.converters.file_to_image.DeepsetFileToBase64Image
init_parameters:
detail: auto
PDFToBase64Image:
type: deepset_cloud_custom_nodes.converters.pdf_to_image.DeepsetPDFDocumentToBase64Image
init_parameters:
detail: high
missing_page_number: all_pages
PromptBuilder:
type: haystack.components.builders.prompt_builder.PromptBuilder
init_parameters:
required_variables: '*'
template: |
Answer the questions briefly and precisely using the images and text passages provided.
Only use images and text passages that are related to the question to answer it.
Give reasons for your answer.
In your answer, only refer to images and text passages that are relevant in answering the query.
Each image is related to exactly one document. You see the images in exactly the same order as the documents.
Only use references in the form [NUMBER OF IMAGE] if you are using information from an image.
Or [NUMBER OF DOCUMENT] if you are using information from a document.
For example, for Document [1] use the reference [1]. For Image 1 use reference [1] as well.
These are the documents:
{%- if documents|length > 0 %}
{%- for document in documents %}
Document [{{ loop.index }}] :
Name of Source File: {{ document.meta.file_name }}
Relates to image: [{{ loop.index }}]
{{ document.content }}
{% endfor -%}
{%- else %}
No relevant documents found.
Respond with "Sorry, no matching documents were found, please adjust the filters or try a different question."
{% endif %}
Question: {{ question }}
Answer:
LLM:
type: deepset_cloud_custom_nodes.generators.openai_vision.DeepsetOpenAIVisionGenerator
init_parameters:
api_key: {"type": "env_var", "env_vars": ["OPENAI_API_KEY"], "strict": false}
model: gpt-4o
generation_kwargs:
max_tokens: 650
temperature: 0
seed: 0
AnswerBuilder:
type: deepset_cloud_custom_nodes.augmenters.deepset_answer_builder.DeepsetAnswerBuilder
init_parameters:
reference_pattern: acm
MetadataRouter:
type: haystack.components.routers.metadata_router.MetadataRouter
init_parameters:
rules:
pdf:
operator: OR
conditions:
- field: meta.mime_type
operator: ==
value: application/pdf
image:
operator: OR
conditions:
- field: meta.mime_type
operator: ==
value: image/png
- field: meta.mime_type
operator: ==
value: image/jpg
- field: meta.mime_type
operator: ==
value: image/jpeg
- field: meta.mime_type
operator: ==
value: image/gif
RankSorter:
type: haystack.components.converters.output_adapter.OutputAdapter
init_parameters:
output_type: List[deepset_cloud_custom_nodes.dataclasses.chat_message_with_images.Base64Image]
unsafe: true
template: "{{ images|sort(attribute=\"meta._rank\") }}"
RankAdder:
type: haystack.components.converters.output_adapter.OutputAdapter
init_parameters:
output_type: List[haystack.Document]
custom_filters: ''
unsafe: true
template: |
{%- for document in documents -%}
{%- set _ = document.meta.update({'_rank': loop.index}) -%}
{%- endfor -%}
{{ documents }}
Base64ImageJoiner:
type: deepset_cloud_custom_nodes.joiners.base64_image_joiner.Base64ImageJoiner
init_parameters: {}
connections: # Defines how the components are connected
- sender: BM25Retriever.documents
receiver: DocumentJoiner.documents
- sender: EmbeddingRetriever.documents
receiver: DocumentJoiner.documents
- sender: PromptBuilder.prompt
receiver: LLM.prompt
- sender: PromptBuilder.prompt
receiver: AnswerBuilder.prompt
- sender: Embedder.embedding
receiver: EmbeddingRetriever.query_embedding
- sender: DocumentJoiner.documents
receiver: Ranker.documents
- sender: Ranker.documents
receiver: MetaFieldGroupingRanker.documents
- sender: MetaFieldGroupingRanker.documents
receiver: FileDownloader.documents
- sender: MetadataRouter.image
receiver: FileToBase64Image.documents
- sender: MetadataRouter.pdf
receiver: PDFToBase64Image.documents
- sender: RankSorter.output
receiver: LLM.images
- sender: LLM.replies
receiver: AnswerBuilder.replies
- sender: PDFToBase64Image.base64_images
receiver: Base64ImageJoiner.images
- sender: FileToBase64Image.base64_images
receiver: Base64ImageJoiner.images
- sender: Base64ImageJoiner.images
receiver: RankSorter.images
- sender: RankAdder.output
receiver: MetadataRouter.documents
- sender: FileDownloader.documents
receiver: RankAdder.documents
- sender: RankAdder.output
receiver: AnswerBuilder.documents
- sender: RankAdder.output
receiver: PromptBuilder.documents
inputs: # Define the inputs for your pipeline
query: # These components will receive the query as input
- "BM25Retriever.query"
- "PromptBuilder.question"
- "AnswerBuilder.query"
- Embedder.text
- Ranker.query
filters: # These components will receive a potential query filter as input
- "BM25Retriever.filters"
- "EmbeddingRetriever.filters"
files:
- FileDownloader.sources
outputs: # Defines the output of your pipeline
documents: "RankAdder.output" # The output of the pipeline is the retrieved documents
answers: "AnswerBuilder.answers" # The output of the pipeline is the generated answers
max_runs_per_component: 100
metadata: {}
Updated 4 days ago