IngestDocument IngestDocument
IngestDocument Certified

yaml
type: "io.kestra.plugin.ai.rag.IngestDocument"
yaml
id: document_ingestion
namespace: company.ai

tasks:
  - id: ingest
    type: io.kestra.plugin.ai.rag.IngestDocument
    provider:
      type: io.kestra.plugin.ai.provider.GoogleGemini
      modelName: gemini-embedding-exp-03-07
      apiKey: "{{ kv('GEMINI_API_KEY') }}"
    embeddings:
      type: io.kestra.plugin.ai.embeddings.KestraKVStore
    drop: true
    fromExternalURLs:
      - https://raw.githubusercontent.com/kestra-io/docs/refs/heads/main/content/blogs/release-0-24.md
Properties
Definitions
baseUrl*Requiredstring
collectionName*Requiredstring
type*Requiredobject
connection*Required
hosts*Requiredarray
SubTypestring
Min items1
basicAuth
passwordstring
usernamestring
headersarray
SubTypestring
pathPrefixstring
strictDeprecationModebooleanstring
trustAllSslbooleanstring
indexName*Requiredstring
type*Requiredobject
type*Requiredobject
kvNamestring
Default{{flow.id}}-embedding-store
createTable*Requiredbooleanstring
databaseUrl*Requiredstring
fieldName*Requiredstring
password*Requiredstring
tableName*Requiredstring
type*Requiredobject
username*Requiredstring
columnDefinitionsarray
SubTypestring
indexesarray
SubTypestring
metadataStorageModestring
DefaultCOLUMN_PER_KEY
token*Requiredstring
type*Requiredobject
autoFlushOnDeletebooleanstring
autoFlushOnInsertbooleanstring
collectionNamestring
consistencyLevelstring
databaseNamestring
hoststring
idFieldNamestring
indexTypestring
metadataFieldNamestring
metricTypestring
passwordstring
portintegerstring
retrieveEmbeddingsOnSearchbooleanstring
textFieldNamestring
uristring
usernamestring
vectorFieldNamestring
collectionName*Requiredstring
host*Requiredstring
indexName*Requiredstring
scheme*Requiredstring
type*Requiredobject
createIndexbooleanstring
databasestring
metadataFieldNamesarray
SubTypestring
optionsobject
passwordstring
usernamestring
database*Requiredstring
host*Requiredstring
password*Requiredstring
port*Requiredintegerstring
table*Requiredstring
type*Requiredobject
user*Requiredstring
useIndexbooleanstring
Defaultfalse
apiKey*Requiredstring
cloud*Requiredstring
index*Requiredstring
region*Requiredstring
type*Requiredobject
namespacestring
apiKey*Requiredstring
collectionName*Requiredstring
host*Requiredstring
port*Requiredintegerstring
type*Requiredobject
host*Requiredstring
port*Requiredintegerstring
type*Requiredobject
indexNamestring
Defaultembedding-index
accessKeyId*Requiredstring
accessKeySecret*Requiredstring
endpoint*Requiredstring
instanceName*Requiredstring
type*Requiredobject
metadataSchemaListarray
analyzerstring
Possible Values
SingleWordMaxWordMinWordSplitFuzzy
analyzerParameter
dateFormatsarray
SubTypestring
enableHighlightingboolean
enableSortAndAggboolean
fieldNamestring
fieldTypestring
Possible Values
LONGDOUBLEBOOLEANKEYWORDTEXTNESTEDGEO_POINTDATEVECTORFUZZY_KEYWORDIPJSONUNKNOWN
indexboolean
indexOptionsstring
Possible Values
DOCSFREQSPOSITIONSOFFSETS
isArrayboolean
jsonTypestring
Possible Values
FLATTENNESTED
sourceFieldNamesarray
SubTypestring
storeboolean
subFieldSchemasarray
analyzerstring
Possible Values
SingleWordMaxWordMinWordSplitFuzzy
analyzerParameter
dateFormatsarray
SubTypestring
enableHighlightingboolean
enableSortAndAggboolean
fieldNamestring
fieldTypestring
Possible Values
LONGDOUBLEBOOLEANKEYWORDTEXTNESTEDGEO_POINTDATEVECTORFUZZY_KEYWORDIPJSONUNKNOWN
indexboolean
indexOptionsstring
Possible Values
DOCSFREQSPOSITIONSOFFSETS
isArrayboolean
jsonTypestring
Possible Values
FLATTENNESTED
sourceFieldNamesarray
SubTypestring
storeboolean
subFieldSchemasarray
vectorOptions
vectorOptions
dataTypestring
dimensioninteger
metricTypestring
Possible Values
EUCLIDEANCOSINEDOT_PRODUCT
apiKey*Requiredstring
host*Requiredstring
type*Requiredobject
avoidDupsbooleanstring
consistencyLevelstring
Possible Values
ONEQUORUMALL
grpcPortintegerstring
metadataFieldNamestring
metadataKeysarray
SubTypestring
objectClassstring
portintegerstring
schemestring
securedGrpcbooleanstring
useGrpcForInsertsbooleanstring
Definitions
accessKeyId*Requiredstring
modelName*Requiredstring
secretAccessKey*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
modelTypestring
DefaultCOHERE
Possible Values
COHERETITAN
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
maxTokensintegerstring
endpoint*Requiredstring
modelName*Requiredstring
type*Requiredobject
apiKeystring
baseUrlstring
caPemstring
clientIdstring
clientPemstring
clientSecretstring
serviceVersionstring
tenantIdstring
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
Defaulthttps://dashscope-intl.aliyuncs.com/api/v1
caPemstring
clientPemstring
enableSearchbooleanstring
maxTokensintegerstring
repetitionPenaltynumberstring
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
Defaulthttps://api.deepseek.com/v1
caPemstring
clientPemstring
gitHubToken*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
embeddingModelConfiguration
maxRetriesintegerstring
outputDimensionalityintegerstring
taskTypestring
Possible Values
RETRIEVAL_QUERYRETRIEVAL_DOCUMENTSEMANTIC_SIMILARITYCLASSIFICATIONCLUSTERINGQUESTION_ANSWERINGFACT_VERIFICATION
timeoutstring
Formatduration
titleMetadataKeystring
endpoint*Requiredstring
location*Requiredstring
modelName*Requiredstring
project*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
Defaulthttps://router.huggingface.co/v1
caPemstring
clientPemstring
baseUrl*Requiredstring
modelName*Requiredstring
type*Requiredobject
caPemstring
clientPemstring
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
compartmentId*Requiredstring
modelName*Requiredstring
region*Requiredstring
type*Requiredobject
authProviderstring
baseUrlstring
caPemstring
clientPemstring
endpoint*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
Defaulthttps://api.openai.com/v1
caPemstring
clientPemstring
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
apiKey*Requiredstring
modelName*Requiredstring
projectId*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
accountId*Requiredstring
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
caPemstring
clientPemstring
apiKey*Requiredstring
modelName*Requiredstring
type*Requiredobject
baseUrlstring
Defaulthttps://open.bigmodel.cn/
caPemstring
clientPemstring
maxRetriesintegerstring
maxTokenintegerstring
stopsarray
SubTypestring
Definitions
maxOverlapSizeInChars*Requiredinteger
maxSegmentSizeInChars*Requiredinteger
splitterstring
DefaultRECURSIVE
Possible Values
RECURSIVEPARAGRAPHLINESENTENCEWORD
Defaultfalse
Definitions
content*Requiredstring
metadataobject
SubTypestring
SubTypestring
SubTypestring
Unitrecords
Unittoken
Unittoken
Unittoken