Pseudonymize

Pseudonymize

Certified

Pseudonymize PII fields in a structured file

yaml
type: io.kestra.plugin.datagen.core.Pseudonymize
yaml
id: pseudonymize_customer_csv
namespace: company.team

tasks:
  - id: pseudonymize
    type: io.kestra.plugin.datagen.core.Pseudonymize
    from: "{{ inputs.file }}"
    contentType: CSV
    locale: ["en", "US"]
    fields:
      first_name: "#{name.first_name}"
      last_name: "#{name.last_name}"
      email: "#{internet.emailAddress}"
      phone: "#{phoneNumber.cellPhone}"
      address: "#{address.fullAddress}"

  - id: log
    type: io.kestra.plugin.core.log.Log
    message: "Pseudonymized {{ outputs.pseudonymize.count }} records: {{ outputs.pseudonymize.uri }}"

yaml
id: pseudonymize_customer_json
namespace: company.team

tasks:
  - id: pseudonymize
    type: io.kestra.plugin.datagen.core.Pseudonymize
    from: "{{ outputs.previous_task.uri }}"
    contentType: JSON
    locale: ["fr", "FR"]
    fields:
      "user.profile.fullName": "#{name.fullName}"
      "user.profile.email": "#{internet.emailAddress}"
      "user.address.city": "#{address.city}"
      "user.address.zipCode": "#{address.zipCode}"

  - id: log
    type: io.kestra.plugin.core.log.Log
    message: "Pseudonymized {{ outputs.pseudonymize.count }} JSON records — result at {{ outputs.pseudonymize.uri }}"

yaml
id: nightly_pseudonymize_export
namespace: company.team

triggers:
  - id: nightly
    type: io.kestra.plugin.core.trigger.Schedule
    cron: "0 2 * * *"

tasks:
  - id: pseudonymize
    type: io.kestra.plugin.datagen.core.Pseudonymize
    from: "{{ vars.daily_export_uri }}"
    contentType: CSV
    locale: ["en", "US"]
    fields:
      customer_name: "#{name.fullName}"
      customer_email: "#{internet.emailAddress}"
      customer_phone: "#{phoneNumber.cellPhone}"
      national_id: "#{idNumber.ssnValid}"

  - id: log
    type: io.kestra.plugin.core.log.Log
    message: "Nightly pseudonymization complete — {{ outputs.pseudonymize.count }} records written to {{ outputs.pseudonymize.uri }}"

yaml
id: pseudonymize_superhero_csv
namespace: company.team

tasks:
  - id: working_directory
    type: io.kestra.plugin.core.flow.WorkingDir
    tasks:
      - id: generate_csv_script
        type: io.kestra.plugin.scripts.python.Commands
        taskRunner:
          type: io.kestra.plugin.core.runner.Process
        outputFiles:
          - "heroes.csv"
        commands:
          - |
            cat << 'PYEOF' > generate.py
            import csv

            data = [
                {"id": 1,  "name": "Clark Kent",      "age": 35},
                {"id": 2,  "name": "Bruce Wayne",      "age": 38},
                {"id": 3,  "name": "Diana Prince",     "age": 30},
                {"id": 4,  "name": "Peter Parker",     "age": 22},
                {"id": 5,  "name": "Tony Stark",       "age": 48},
                {"id": 6,  "name": "Natasha Romanoff", "age": 35},
                {"id": 7,  "name": "Steve Rogers",     "age": 32},
                {"id": 8,  "name": "Bruce Banner",     "age": 40},
                {"id": 9,  "name": "Barry Allen",      "age": 28},
                {"id": 10, "name": "Wanda Maximoff",   "age": 26},
            ]

            with open("heroes.csv", "w", newline="") as f:
                writer = csv.DictWriter(f, fieldnames=["id", "name", "age"])
                writer.writeheader()
                writer.writerows(data)
            PYEOF
            python generate.py

  - id: pseudonymize
    type: io.kestra.plugin.datagen.core.Pseudonymize
    from: "{{ outputs.generate_csv_script.outputFiles['heroes.csv'] }}"
    contentType: CSV
    fields:
      name: "#{name.fullName}"

  - id: log
    type: io.kestra.plugin.core.log.Log
    message: "Pseudonymized {{ outputs.pseudonymize.count }} superhero records — result at {{ outputs.pseudonymize.uri }}"
Properties
Possible Values
CSVJSONION
SubTypestring
Default0
Formaturi