DeduplicateItems
Deduplicate a line-oriented file by key.
DeduplicateItems
Deduplicate a line-oriented file by key.
yaml
type: io.kestra.plugin.core.storage.DeduplicateItemsExamples
yaml
id: deduplicate_items
namespace: company.team
tasks:
- id: generate_files
type: io.kestra.plugin.scripts.shell.Script
script: |
cat <<EOF > my_data.csv
order_id,customer_name,customer_email,product_id,price
1,Kelly Olsen,kelly@example.com,20,166.89
2,Miguel Moore,mccarthylee@example.net,14,171.63
3,Kelly Olsen,kelly@example.com,20,166.89
4,Jessica White,jessica@example.com,12,50.62
5,Jessica White,jessica@example.com,12,50.62
EOF
outputFiles:
- "my_data.csv"
- id: csv_to_ion
type: io.kestra.plugin.serdes.csv.CsvToIon
from: "{{ outputs.generate_files.outputFiles['my_data.csv'] }}"
- id: dedup
type: io.kestra.plugin.core.storage.DeduplicateItems
from: "{{ outputs.csv_to_ion.uri }}"
expr: "{{ customer_email }}"
Properties
expr *RequiredNon-dynamicstring
from *Requiredstring
Outputs
droppedItemsTotal integer
numKeys integer
processedItemsTotal integer
uri string
Format
uri