Last updated Mar 24, 2024

Flow Descriptor

Skip to examples

The fluid descriptor for provisioning a Flow (aka data pipeline).

Specification

JSON Schema

1
2
# flow
# The fluid descriptor for provisioning a Flow (aka data pipeline).

apiVersion: "v1alpha1"                                             # [required] [enum] The current version of this resource | Choose one of either: v1alpha1
id: "id"                                                           # [required] [flow-ari] An unique identifier for the flow being described
name: "name"                                                       # [required] [non-empty string] The name of the resource being described
summary: "summary"                                                 # [required] [non-empty string] Short description about the resource being described
documentation: "documentation"                                     # [optional] [uri] URL link to documentation about the resource being described
status: "Archived"                                                 # [optional] [enum] Lifecyle status of the resource being described | Choose one of either: Archived, Available, Decommissioned, Deleted, Deprecated, Development, Preview
managed: "full"                                                    # [optional] [enum] Indicates if the resource is fully managed (full, or if omitted) or metadata managed (metadata) | Choose one of either: full, metadata
contacts:                                                          # [optional] Contact objects for the resource being described
  # Any of:
  author:                                                          # [optional] [non-empty array] List of Slack channels and/or emails to contact for questions about the descriptor
    ## One of:
    - email: "email"                                               # [optional] [email contact] A contact email address
    ## OR
    - slack: "slack"                                               # [optional] [non-empty slack contact] Slack channel for contacting or alerting purposes
    ## End one of
  # OR
  support:                                                         # [optional] [non-empty array] List of Slack channels and/or emails to contact for help about the descriptor
    ## One of:
    - email: "email"                                               # [optional] [email contact] A contact email address
    ## OR
    - slack: "slack"                                               # [optional] [non-empty slack contact] Slack channel for contacting or alerting purposes
    ## End one of
  # OR
  operations:                                                      # [optional] [non-empty array] List of Slack channels and/or on-call schedules to send alerts and notifications related to the descriptor
    ## One of:
    - slack: "slack"                                               # [optional] [non-empty slack contact] Slack channel for contacting or alerting purposes
    ## OR
    - opsgenie: "opsgenie"                                         # [optional] [non-empty opsgenie contact] Name of your Opsgenie schedule. Can be found here https://atlassian.app.opsgenie.com/schedule/whoIsOnCall
    ## End one of
  # End any of
product: "product"                                                 # [required] [product-ari] Product this flow belongs to
config:                                                            # [optional] [array] Optional configuration definitions
  - target:                                                        # [optional] Target environment this configuration applies to. Ommitting a key matches all values for that key. Omitting this whole block is equivalent to not setting all keys
      environment: "development"                                   # [optional] [enum] environment this applies to | Choose one of either: development, staging, production
    variables:                                                     # [required] key value pairs for in-descriptor configuration
      yourPropertyName:                                            # [additional property] key value pairs for in-descriptor configuration
       secretName: "secretName"                                    # [required] [string] name of the secret to reference
condition:                                                         # [required] Condition on which this flow starts
  # One of:
  alwaysOn:                                                        # [optional] Streaming pipeline condition object
    maxRestarts: -1                                                # [optional] [integer] Maximum number of restarts for alwaysOn pipeline. Use -1 for unlimited restarts, 0 for no restarts and positive integer for limited restarts. (Will be ignored for federal environments) (min: -1) (default: -1)
    processInterval: 10000                                         # [optional] [integer] Interval at which data is processed in microbatch (in milliseconds) (min: 1000) (max: 600000)
  # OR
  backfill:                                                        # [optional] Backfill schedule object
    cron: "cron"                                                   # [required] [non-empty string] Standard unix CRON schedule informs Airflow to use period grouping based on this
    startDate: "startDate"                                         # [required] [non-empty date-time] Start timestamp for backfill schedule
    endDate: "endDate"                                             # [required] [non-empty date-time] End timestamp for backfill schedule
    maxRetries: 0                                                  # [optional] [integer] Number of retries for a scheduled pipeline. Use 0 for no retries and positive integer for limited retries. Ignore for default 3 retries (min: 0)
  # OR
  schedule:                                                        # [optional] CRON schedule object
    cron: "cron"                                                   # [required] [non-empty string] Standard unix CRON schedule
    startDate: "startDate"                                         # [required] [non-empty date-time] Start timestamp for cron schedule
    endDate: "endDate"                                             # [optional] [non-empty date-time] End timestamp for cron schedule
    maxRetries: 0                                                  # [optional] [integer] Number of retries for a scheduled pipeline. Use 0 for no retries and positive integer for limited retries. Ignore for default 3 retries (min: 0)
  # OR
  once:                                                            # [optional]  Run it Once
    maxRetries: 0                                                  # [optional] [integer] Number of retries for a scheduled pipeline. Use 0 for no retries and positive integer for limited retries. Ignore for default 3 retries (min: 0)
  # End one of
dependencies:                                                      # [optional] Details of the upstream flows this flow is dependent on
  flows:                                                           # [required] [non-empty array] List of details upstream flows
    - id: "id"                                                     # [required] [flow-ari] An unique identifier for the upstream flow being described
      lookbackTime: "lookbackTime"                                 # [required] [duration] Lookback time for the upstream flow
      observationWindow: "observationWindow"                       # [required] [duration] The window of time to look for the successful run for the upstream flow in
      status: success                                              # [optional] [enum] Status of the upstream flow on which downstream flow depends | Choose one of either: success, failure
      timeout: PT15M                                               # [optional] [duration] ISO8601 formatted duration representing the time after which the flow will timeout if its waiting for an upstream flow to complete
alert:                                                             # [optional] Flow alerts
  failure:                                                         # [required] Flow failure alerts
    priority: "P1"                                                 # [required] [enum] Flow priority | Choose one of either: P1, P2, P3, P4
    runbook: "runbook"                                             # [optional] [uri] Flow runbook
    notifications:                                                 # [required] Environment(s) for which to trigger a notification.
      # Any of:
      development:                                                 # [optional] [non-empty array] Array of notification targets
        "slack"                                                    # [optional] [enum] undefined | Choose one of either: slack, opsgenie
      # OR
      staging:                                                     # [optional] [non-empty array] Array of notification targets
        "slack"                                                    # [optional] [enum] undefined | Choose one of either: slack, opsgenie
      # OR
      production:                                                  # [optional] [non-empty array] Array of notification targets
        "slack"                                                    # [optional] [enum] undefined | Choose one of either: slack, opsgenie
      # End any of
steps:                                                             # [required] [non-empty array] list of flow steps
  - name: "name"                                                   # [required] [non-empty string] The name of this step (must be unique in this flow)
    summary: "summary"                                             # [optional] [non-empty string] Short description of this step
    timeout: "timeout"                                             # [optional] [duration] ISO8601 formatted duration representing the timeout before this step is terminated; if omitted then step can run for as long as the underlying system allows
    parents:                                                       # [optional] [non-empty array] List of step identifiers this step depends on; if omitted it is assumed to be the root step
      "parents"                                                    # [optional] [string] Name of parent step
    transformation:                                                # [required] The transformation logic
      # One of:
      fivetranIngest:                                              # [optional] Object for Fivetran
        connector:                                                 # [required] Different ingest connectors supported for fivetran
          ## One of:
          linkedin_ads:                                            # [optional] LinkedIn Ads connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for LinkedIn Ads Connector
              sync_mode: "SpecificAccounts"                        # [required] [enum] Whether to sync all accounts or specific accounts | Choose one of either: SpecificAccounts, AllAccounts
              accounts:                                            # [optional] [array] Specific accounts to sync
                "accounts"                                         # [optional] [non-empty string] Accounts to be synced
              ad_analytics: "SpecificReports"                      # [required] [enum] Whether to sync all reports or specific reports | Choose one of either: SpecificReports, AllReports
              reports:                                             # [optional] [array] Specific reports to sync
                "reports"                                          # [optional] [non-empty string] Reports to be synced
              view_through_attribution_window_size: "DAY_1"        # [required] [enum] The time frame of the view-through attribution window | Choose one of either: DAY_1, DAY_7, DAY_28, DAY_30
              post_click_attribution_window_size: "DAY_1"          # [required] [enum] The time frame of the post-click attribution window | Choose one of either: DAY_1, DAY_7, DAY_28, DAY_30
              timeframe_months: "THREE"                            # [required] [enum] Number of months' worth of reporting data you'd like to include in your initial sync | Choose one of either: THREE, SIX, TWELVE, TWENTY_FOUR, ALL_TIME
          ## OR
          workday_strategic_sourcing:                              # [optional] Workday Strategic Sourcing Connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Workday Strategic Sourcing Connector
              region: "region"                                     # [required] [non-empty string] The region of the Workday instance
              x_api_key: "x_api_key"                               # [required] [non-empty string] The API key for the Workday Strategic Sourcing connector
              x_user_token: "x_user_token"                         # [required] [non-empty string] The user token for the Workday Strategic Sourcing connector
              x_user_email: "x_user_email"                         # [required] [non-empty string] The user email for the Workday Strategic Sourcing connector
          ## OR
          coupa:                                                   # [optional] Coupa Connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Coupa Connector
              instance: "instance"                                 # [required] [non-empty string] The instance name of your Coupa account in the URL.
              client_id: "client_id"                               # [required] [non-empty string] Your Coupa client_id.
              client_secret: "client_secret"                       # [required] [non-empty string] Your Coupa client_secret.
          ## OR
          concur:                                                  # [optional] Concur Connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Concur Connector
              auth_mode: "PasswordGrant"                           # [required] [enum] Authentication mode for Concur | Choose one of either: PasswordGrant, CompanyLevel
              client_id: "client_id"                               # [required] [non-empty string] Client ID for the connector
              client_secret: "client_secret"                       # [required] [non-empty string] Client Secret for the connector
              username: "username"                                 # [optional] [non-empty string] Username for the connector
              login_password: "login_password"                     # [optional] [non-empty string] Password for the connector
              company_uuid: "company_uuid"                         # [optional] [non-empty string] Company UUID for the connector
              company_request_token: "company_request_token"       # [optional] [non-empty string] Company Request Token for the connector
              region: "region"                                     # [required] [non-empty string] Region for the connector
          ## OR
          workday:                                                 # [optional] Workday Raas Connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Workday Raas Connector
              report_url: "report_url"                             # [required] [non-empty string] URL for a live custom report
              primary_keys:                                        # [required] [array] Primary key(s) to be set in the destination table for the custom report
                "primary_keys"                                     # [required] [non-empty string] Primary key for the table
              table: "table"                                       # [required] [non-empty string] The table name within the schema to which connector will sync the data
              user_name: "user_name"                               # [required] [non-empty string] Workday username for the connector
              password: "password"                                 # [required] [non-empty string] Workday password for the connector
              report_format_type: "json"                           # [optional] [enum] Workday report format type for the connector | Choose one of either: json, csv
              support_nested_columns: true                         # [optional] [boolean] Unpack the nested columns and sync them separately
          ## OR
          workday_hcm:                                             # [optional] Workday HCM Connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Workday HCM Connector
              domain_host_name: "domain_host_name"                 # [required] [non-empty string] Domain Host name for the connector
              username: "username"                                 # [required] [non-empty string] Username for the connector
              password: "password"                                 # [required] [non-empty string] Password for the connector
          ## OR
          qualtrics:                                               # [optional] Qualtrics Connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Qualtrics Connector
              data_center: "data_center"                           # [required] [non-empty string] Data center ID of the Qualtrics account
              api_token: "api_token"                               # [required] [non-empty string] API token of the Qualtrics account
              api_requests_per_minute: 1                           # [required] [integer] Allowed number of API requests to Qualtrics per minute (min: 1)
          ## OR
          fivetran_log:                                            # [optional] Fivetran Log connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Fivetran Log Connector
              group_name: "group_name"                             # [optional] [non-empty string] The group name of the target_group_id
              is_account_level_connector: true                     # [optional] [boolean] Retrieve account-level logs
          ## OR
          intercom:                                                # [optional] Intercom connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            auth:                                                  # [optional] Auth for Intercom Connector
              access_token: "access_token"                         # [required] [non-empty string] The long-lived Access Token carries the information necessary for API resources to fetch data
          ## OR
          marketo:                                                 # [optional] Marketo connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Marketo Connector
              endpoint: "endpoint"                                 # [required] [non-empty string] Marketo REST API endpoint
              identity: "identity"                                 # [required] [non-empty string] Marketo REST API identity URL
              user_id: "user_id"                                   # [required] [non-empty string] Marketo SOAP API User ID
              encryption_key: "encryption_key"                     # [required] [non-empty string] Marketo SOAP API Encryption Key
              soap_uri: "soap_uri"                                 # [required] [non-empty string] Marketo SOAP API Endpoint
              client_id: "client_id"                               # [required] [non-empty string] Marketo REST API Client ID
              client_secret: "client_secret"                       # [required] [non-empty string] Marketo REST API Client Secret
              api_quota: 1                                         # [optional] [integer] Allowed number of API requests to Marketo instance per day (min: 1)
          ## OR
          oracle_fusion_cloud_apps_crm:                            # [optional] Oracle Fusion Cloud apps CRM connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Oracle Fusion Cloud apps CRM Connector
              schema_prefix: "schema_prefix"                       # [required] [non-empty string] The connector schema prefix has to be unique within the group (destination)
              server_url: "server_url"                             # [required] [non-empty string] The Oracle Fusion Cloud Instance URL
              username: "username"                                 # [required] [non-empty string] The Oracle Fusion Cloud username
              password: "password"                                 # [required] [non-empty string] The Oracle Fusion Cloud user password
          ## OR
          oracle_fusion_cloud_apps_fscm:                           # [optional] Oracle Fusion Cloud apps FSCM connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Oracle Fusion Cloud apps FSCM Connector
              schema_prefix: "schema_prefix"                       # [required] [non-empty string] The connector schema prefix has to be unique within the group (destination)
              server_url: "server_url"                             # [required] [non-empty string] The Oracle Fusion Cloud Instance URL
              username: "username"                                 # [required] [non-empty string] The Oracle Fusion Cloud username
              password: "password"                                 # [required] [non-empty string] The Oracle Fusion Cloud user password
          ## OR
          salesforce:                                              # [optional] Salesforce connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Salesforce Connector
              base_url: "base_url"                                 # [optional] [non-empty string] The custom Salesforce domain
              sync_formula_fields: true                            # [optional] [boolean] Sync formula fields
            auth:                                                  # [optional] Config for Salesforce Connector
              refresh_token: "refresh_token"                       # [required] [non-empty string] The long-lived Refresh token along with the client_id and client_secret parameters carry the information necessary to get a new access token for API resources
              client_access:                                       # [optional] Client access for Salesforce Connector
                client_id: "client_id"                             # [required] [non-empty string] Client ID of your Salesforce client application
                client_secret: "client_secret"                     # [required] [non-empty string] Client Secret of your Salesforce client application
          ## OR
          youtube_analytics:                                       # [optional] Youtube Analytics connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Youtube Analytics Connector
              content_owner_id: "content_owner_id"                 # [required] [non-empty string] Used only for Content Owner reports. The ID of the content owner for whom the API request is being made
            auth:                                                  # [optional] Config for Youtube Analytics Connector
              refresh_token: "refresh_token"                       # [required] [non-empty string] The long-lived Refresh token along with the client_id and client_secret parameters carry the information necessary to get a new access token for API resources
              client_access:                                       # [optional] Client access for YouTube Analytics Connector
                client_id: "client_id"                             # [required] [non-empty string] Client ID of your YouTube Analytics client application
                client_secret: "client_secret"                     # [required] [non-empty string] Client Secret of your YouTube Analytics client application
          ## OR
          netsuite_suiteanalytics:                                 # [optional] Netsuite Suite Analytics connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Netsuite Suite Analytics Connector
              host: "host"                                         # [required] [non-empty string] The NetSuite service host address
              port: "port"                                         # [required] [non-empty string] The NetSuite service host port
              datasource: "datasource"                             # [required] [non-empty string] The NetSuite data source value: NetSuite.com
              account: "account"                                   # [required] [non-empty string] The NetSuite Account ID
              role: "role"                                         # [required] [non-empty string] The NetSuite Role ID for connection
              email: "email"                                       # [optional] [non-empty string] The NetSuite user's email address
              password: "password"                                 # [optional] [non-empty string] The NetSuite user's password
              consumer_key: "consumer_key"                         # [optional] [non-empty string] The NetSuite consumer key obtained when creating an Integration Record
              consumer_secret: "consumer_secret"                   # [optional] [non-empty string] The NetSuite consumer secret obtained when creating an Integration Record
              token_key: "token_key"                               # [optional] [non-empty string] The NetSuite token ID obtained when creating an Access Token
              token_secret: "token_secret"                         # [optional] [non-empty string] The NetSuite token password obtained when creating an Access Token
          ## OR
          bingads:                                                 # [optional] Microsoft Advertising connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Microsoft Advertising Connector
              sync_mode: "SpecificAccounts"                        # [optional] [enum] Whether to sync all accounts or specific accounts | Choose one of either: SpecificAccounts, AllAccounts
              accounts:                                            # [optional] [array] Specific accounts to sync
                "accounts"                                         # [optional] [non-empty string] Accounts to be synced
              timeframe_months: "THREE"                            # [optional] [enum] Number of months' worth of reporting data you'd like to include in your initial sync | Choose one of either: THREE, SIX, TWELVE, TWENTY_FOUR, ALL_TIME
              rollback_window_size: 35                             # [optional] [enum] The time frame of the rollback sync to capture conversions | Choose one of either: 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95
            auth:                                                  # [optional] Config for Microsoft Advertising Connector
              refresh_token: "refresh_token"                       # [required] [non-empty string] The long-lived Refresh token along with the client_id and client_secret parameters carry the information necessary to get a new access token for API resources
              client_access:                                       # [optional] Client access for Microsoft Advertising Connector
                client_id: "client_id"                             # [required] [non-empty string] Client ID of your Microsoft Advertising client application
                client_secret: "client_secret"                     # [required] [non-empty string] Client Secret of your Microsoft Advertising client application
          ## OR
          facebook_ads:                                            # [optional] Facebook Ads connector
            update_schema: true                                    # [optional] [boolean] Should the schema be updated for Fivetran connector
            paused: true                                           # [optional] [boolean] Should the connector be paused when created
            config:                                                # [optional] Config for Facebook Ads Connector
              sync_mode: "AllAccounts"                             # [required] [enum] Option to select connector should sync all accounts or specific accounts | Choose one of either: AllAccounts, SpecificAccounts
              accounts:                                            # [required] [array] List of accounts of which connector will sync the data
                "accounts"                                         # [required] [non-empty string] Accounts to sync
              custom_tables:                                       # [optional] [array] List of custom tables
                - table_name: "table_name"                         # [optional] [non-empty string] The table name within the schema to which the connector will sync the data
                  fields:                                          # [optional] [array] List of fields which connector will sync
                    "account_currency"                             # [optional] [enum] Accounts to sync | Choose one of either: account_currency, account_id, account_name, action_values, actions, ad_id, ad_name, adset_id, adset_name, attribution_setting, buying_type, campaign_id, campaign_name, canvas_avg_view_percent, canvas_avg_view_time, catalog_segment_value, clicks, conversion_rate_ranking, conversion_values, conversions, cost_per_10_sec_video_view, cost_per_action_type, cost_per_conversion, cost_per_estimated_ad_recallers, cost_per_inline_link_click, cost_per_inline_post_engagement, cost_per_outbound_click, cost_per_thruplay, cost_per_unique_action_type, cost_per_unique_click, cost_per_unique_inline_link_click, cost_per_unique_outbound_click, cpc, cpm, cpp, ctr, engagement_rate_ranking, estimated_ad_recall_rate, estimated_ad_recallers, frequency, full_view_impressions, full_view_reach, gender_targeting, impressions, inline_link_click_ctr, inline_link_clicks, inline_post_engagement, instant_experience_clicks_to_open, instant_experience_clicks_to_start, instant_experience_outbound_clicks, labels, location, mobile_app_purchase_roas, objective, outbound_clicks, outbound_clicks_ctr, purchase_roas, quality_ranking, reach, relevance_score, social_spend, spend, unique_actions, unique_clicks, unique_ctr, unique_inline_link_click_ctr, unique_inline_link_clicks, unique_link_clicks_ctr, unique_outbound_clicks, unique_outbound_clicks_ctr, video_10_sec_watched_actions, video_30_sec_watched_actions, video_avg_percent_watched_actions, video_avg_time_watched_actions, video_p100_watched_actions, video_p25_watched_actions, video_p50_watched_actions, video_p75_watched_actions, video_p95_watched_actions, video_play_actions, video_play_curve_actions, video_thruplay_watched_actions, website_ctr, website_purchase_roas
                  breakdowns:                                      # [optional] [array] List of breakdowns which connector will sync
                    "ad_format_asset"                              # [optional] [enum] Accounts to sync | Choose one of either: ad_format_asset, age, app_id, body_asset, call_to_action_asset, country, description_asset, dma, gender, frequency_value, hourly_stats_aggregated_by_advertiser_time_zone, hourly_stats_aggregated_by_audience_time_zone, image_asset, impression_device, link_url_asset, place_page_id, device_platform, product_id, publisher_platform, platform_position, region, skan_conversion_id, title_asset, video_asset
                  action_breakdowns:                               # [optional] [array] List of action_breakdowns which connector will sync
                    "action_carousel_card_id"                      # [optional] [enum] Accounts to sync | Choose one of either: action_carousel_card_id, action_carousel_card_name, action_canvas_component_name, action_destination, action_device, action_reaction, action_target_id, action_type, action_video_sound, action_video_type
                  aggregation: "Day"                               # [optional] [enum] Options to select aggregation duration | Choose one of either: Day, Week, Month
                  config_type: "Prebuilt"                          # [optional] [enum] Option to select Prebuilt Reports or Custom Reports | Choose one of either: Prebuilt, Custom
                  prebuilt_report_name: "ACTION_CANVAS_COMPONENT"  # [optional] [enum] The report name to which connector will sync the data | Choose one of either: ACTION_CANVAS_COMPONENT, ACTION_CAROUSEL_CARD, ACTION_CONVERSION_DEVICE, ACTION_PRODUCT_ID, ACTION_REACTIONS, ACTION_VIDEO_SOUND, ACTION_VIDEO_VIEW_TYPE, BASIC_AD, BASIC_AD_SET, BASIC_ALL_LEVELS, BASIC_CAMPAIGN, DELIVERY_PLATFORM, DELIVERY_PLATFORM_AND_DEVICE, DELIVERY_PURCHASE_ROAS, DEMOGRAPHICS_AGE, DEMOGRAPHICS_AGE_AND_GENDER, DEMOGRAPHICS_COUNTRY, DEMOGRAPHICS_DMA_REGION, DEMOGRAPHICS_GENDER, DEMOGRAPHICS_REGION
                  action_report_time: "impression"                 # [optional] [enum] The report time of action stats | Choose one of either: impression, conversion, mixed
                  click_attribution_window: "NONE"                 # [optional] [enum] The report time of action stats | Choose one of either: NONE, DAY_1, DAY_7
                  view_attribution_window: "NONE"                  # [optional] [enum] The report time of action stats | Choose one of either: NONE, DAY_1, DAY_7
              timeframe_months: "THREE"                            # [optional] [enum] Number of months' worth of reporting data you'd like to include in your initial sync | Choose one of either: THREE, SIX, TWELVE, TWENTY_FOUR, ALL_TIME
              sync_metadata: true                                  # [optional] [boolean] Parameter defining whether to enable or disable metadata synchronisation
            auth:                                                  # [optional] Config for Youtube Analytics Connector
              refresh_token: "refresh_token"                       # [required] [non-empty string] The long-lived Refresh token along with the client_id and client_secret parameters carry the information necessary to get a new access token for API resources
              client_access:                                       # [optional] Client access for YouTube Analytics Connector
                client_id: "client_id"                             # [required] [non-empty string] Client ID of your YouTube Analytics client application
                client_secret: "client_secret"                     # [required] [non-empty string] Client Secret of your YouTube Analytics client application
          ## End one of
        output:                                                    # [required] Object for Fivetran Destination
          database: "database"                                     # [required] [database-ari] Database to store Fivetran tables
      # OR
      dbt:                                                         # [optional] Object for dbt transformations
        projectName: "projectName"                                 # [required] [non-empty string] dbt Project Name
        imageTag: "imageTag"                                       # [required] [non-empty string] dbt Docker Image Tag
        filter:                                                    # [optional] Filter to include/exclude the dbt models
          ## Any of:
          include:                                                 # [optional] [non-empty array] List of model/tag/path to include in dbt run
            "include"                                              # [optional] [non-empty string] 
          ## OR
          exclude:                                                 # [optional] [non-empty array] List of model/tag/path to exclude in dbt run
            "exclude"                                              # [optional] [non-empty string] 
          ## End any of
      # OR
      tableQuality:                                                # [optional] [non-empty array] Table Quality Array
        - table: "table"                                           # [required] [table-ari] Table ID
          rules:                                                   # [required] [array] List of rules to be applied to the table
            - ruleId: "ruleId"                                     # [required] [non-empty string] Rule ID
              summary: "summary"                                   # [optional] [non-empty string] Summary of the rule
              check: "check"                                       # [required] [check-ari] Check to be applied to the table
              fatal: false                                         # [optional] [boolean] Whether the rule is fatal
              parameters:                                          # [optional] [non-empty array] undefined
                yourPropertyName:                                  # [additional property] undefined
      # OR
      fileIngest:                                                  # [optional] Object for File Ingestion
        source: "source"                                           # [optional] [non-empty string] External S3 bucket path
        iamRole: "iamRole"                                         # [optional] [non-empty string] IAM Role ARN
        mode: "append"                                             # [required] [enum] Ingestion Mode | Choose one of either: append, overwrite
        fileType:                                                  # [required] File Type
          format: "csv"                                            # [required] [enum] Format of the input file. eg csv, jsonlines, parquet | Choose one of either: csv, jsonlines, parquet
          delimiter: "delimiter"                                   # [optional] [non-empty string] Delimiter to parse the input file
        nodeType: "nodeType"                                       # [optional] [non-empty string] Node type to be used for driver and worker nodes in cluster
        autoscale:                                                 # [optional] Minimum and Maximum number of nodes to be used during autoscaling
          minimum: 1                                               # [required] [integer] Minimum nodes (min: 1) (max: 20)
          maximum: 1                                               # [required] [integer] Maximum nodes (min: 1) (max: 20)
        output: "output"                                           # [required] [table-ari] Destination table
        derivedColumns:                                            # [optional] [array] undefined
          - column: "column"                                       # [required] [attribute-ari] ARI of the derived column
            derivedFrom: "derivedFrom"                             # [required] [attribute-ari] ARI of the column to derive data from
            expression:                                            # [required] Object which describes how data is derived/transformed from the source column to the target column
              ## One of:
              predefined: "toDate"                                 # [optional] [enum] Platform-defined transformation rules - these are hardcoded and only work on certain column types | Choose one of either: toDate, toTimestamp, vpcAccountFromFilename, vpcRegionFromFilename, vpcYearFromFilename, vpcMonthFromFilename, vpcDayFromFilename
              ## End one of
      # OR
      socratesStreamIngest:                                        # [optional] Object for Socrates Streams Ingestion
        stream: "stream"                                           # [required] [stream-ari] Source stream
        table: "table"                                             # [required] [table-ari] Destination table
        avi: "avi"                                                 # [required] [string] AVI of each stream
        schemaAri: "schemaAri"                                     # [required] [string] Schema ARI for the stream
        clusterSize: SMALL                                         # [optional] [enum] T-Shirt size to be used for the cluster. Allowed Values : SMALL, MEDIUM, LARGE and XLARGE. Default Value : SMALL | Choose one of either: SMALL, MEDIUM, LARGE, XLARGE
      # End one of
outputs:                                                           # [optional] Flow outputs
  # Any of:
  tables:                                                          # [optional] [non-empty array] List of output tables
    "tables"                                                       # [optional] [table-ari] Table identifier
  # OR
  views:                                                           # [optional] [non-empty array] List of output views
    "views"                                                        # [optional] [view-ari] View identifier
  # End any of

Examples

socrates-streams-flow

1
2
apiVersion: v1alpha1
id: ari:cloud:data-lake::descriptor/flow/d317629b-2348-480f-af01-90ccc050802a
name: socrates_streams_ingest_flow
summary: A fully defined flow descriptor for socrates streams ingestion.
documentation: https://developer.atlassian.com/
product: ari:cloud:data-lake::descriptor/product/719d7e21-63ba-4bc5-a9b0-8ce1d2c4a288
contacts:
  author:
    - email: "pperson16@atlassian.com"
    - slack: "socrates-vnext"
  support:
    - email: "pperson17@atlassian.com"
    - slack: "socrates-vnext"
  operations:
    - opsgenie: "Vaporeon - Example Roster UNPAID"
    - slack: "socrates-vnext"
condition:
  alwaysOn: {}
steps:
  - name: ingest
    summary: "Run the Socrates Stream ingestion"
    transformation:
      socratesStreamIngest:
        stream: ari:cloud:data-lake::descriptor/stream/2d7b7657-cf6d-4a50-9b1c-67934914c593
        table: ari:cloud:data-lake::descriptor/table/ec919101-8240-4d35-8eeb-fbcda29a3272
        avi: avi:browser-metrics.page-visible:performance:measured
        schemaAri: ari:cloud:platform-services::streamhub-schema/analytics-service/analytics_enriched_operational_event_schema_v2.0.json
        clusterSize: SMALL

continuous-file-ingest-flow

1
2
apiVersion: v1alpha1
id: ari:cloud:data-lake::descriptor/flow/03b69f58-8843-45b4-85ec-5fedfd7bce99
name: continuous_file_ingest_flow
summary: A fully defined flow descriptor for continuous file ingestion
product: ari:cloud:data-lake::descriptor/product/1776ef05-8d17-4b3a-ba07-44fedca58cfd
contacts:
  author:
    - email: "pperson16@atlassian.com"
    - slack: "socrates-vnext"
  support:
    - email: "pperson17@atlassian.com"
    - slack: "socrates-vnext"
  operations:
    - opsgenie: "Vaporeon - Example Roster UNPAID"
    - slack: "socrates-vnext"
condition:
  alwaysOn: {}
steps:
  - name: file_ingestion
    summary: "Continuous File Ingestion"
    transformation:
      fileIngest:
        source: "s3://bucket/underscore_path/to/files"
        iamRole:
          awsAccountId: "123456789012"
          roleName: "my-role-abc"
        mode: "append"
        fileType:
          format: "csv"
          delimiter: ','
        autoscale:
          minimum: 1
          maximum: 5
        output: "ari:cloud:data-lake::descriptor/table/26098c54-99e0-477a-bef0-3d4f6bdc8d60"

batch-file-ingest-flow

1
2
apiVersion: v1alpha1
id: ari:cloud:data-lake::descriptor/flow/03b69f58-8843-45b4-85ec-5fedfd7bce99
name: batch_file_ingest_flow
summary: A fully defined flow descriptor for batch file ingestion
product: ari:cloud:data-lake::descriptor/product/1776ef05-8d17-4b3a-ba07-44fedca58cfd
contacts:
  author:
    - email: "pperson16@atlassian.com"
    - slack: "socrates-vnext"
  support:
    - email: "pperson17@atlassian.com"
    - slack: "socrates-vnext"
  operations:
    - opsgenie: "Vaporeon - Example Roster UNPAID"
    - slack: "socrates-vnext"
condition:
  schedule:
    cron: "* * * * *"
    startDate: "2023-08-30T00:00:00Z"
steps:
  - name: file_ingestion
    summary: "Batch File Ingestion"
    transformation:
      fileIngest:
        source: "s3://bucket/underscore_path/to/files"
        iamRole:
          awsAccountId: "123456789012"
          roleName: "my-role-abc"
        mode: "append"
        fileType:
          format: "csv"
          delimiter: ','
        autoscale:
          minimum: 1
          maximum: 5
        output: "ari:cloud:data-lake::descriptor/table/26098c54-99e0-477a-bef0-3d4f6bdc8d60"

fivetran-ingest-flow

1
2
apiVersion: v1alpha1
id: ari:cloud:data-lake::descriptor/flow/03b69f58-8843-45b4-85ec-5fedfd7bce99
name: fivetran_ingest_flow
summary: A fully defined flow descriptor for fivetran log connector
product: ari:cloud:data-lake::descriptor/product/1776ef05-8d17-4b3a-ba07-44fedca58cfd
contacts:
  author:
    - email: "pperson16@atlassian.com"
    - slack: "socrates-vnext"
  support:
    - email: "pperson17@atlassian.com"
    - slack: "socrates-vnext"
  operations:
    - opsgenie: "Vaporeon - Example Roster UNPAID"
    - slack: "socrates-vnext"
condition:
  schedule:
    cron: "* * * * *"
    startDate: "2023-08-30T00:00:00Z"
steps:
  - name: ingest
    summary: "Run the Fivetran ingestion"
    transformation:
      fivetranIngest:
        connector:
          fivetran_log:
            paused: true
            update_schema: false
            config:
              group_name: "group_name"
              is_account_level_connector: true
        output:
          database: "ari:cloud:data-lake::descriptor/database/26098c54-99e0-477a-bef0-3d4f6bdc8d60"

Rate this page: