The fluid descriptor for provisioning a Flow (aka data pipeline).
1 2# flow # The fluid descriptor for provisioning a Flow (aka data pipeline). apiVersion: "v1alpha1" # [required] [enum] The current version of this resource | Choose one of either: v1alpha1 id: "id" # [required] [flow-ari] An unique identifier for the flow being described name: "name" # [required] [non-empty string] The name of the resource being described summary: "summary" # [required] [non-empty string] Short description about the resource being described documentation: "documentation" # [optional] [uri] URL link to documentation about the resource being described status: "Archived" # [optional] [enum] Lifecyle status of the resource being described | Choose one of either: Archived, Available, Decommissioned, Deleted, Deprecated, Development, Preview managed: "full" # [optional] [enum] Indicates if the resource is fully managed (full, or if omitted) or metadata managed (metadata) | Choose one of either: full, metadata contacts: # [optional] Contact objects for the resource being described # Any of: author: # [optional] [non-empty array] List of Slack channels and/or emails to contact for questions about the descriptor ## One of: - email: "email" # [optional] [email contact] A contact email address ## OR - slack: "slack" # [optional] [non-empty slack contact] Slack channel for contacting or alerting purposes ## End one of # OR support: # [optional] [non-empty array] List of Slack channels and/or emails to contact for help about the descriptor ## One of: - email: "email" # [optional] [email contact] A contact email address ## OR - slack: "slack" # [optional] [non-empty slack contact] Slack channel for contacting or alerting purposes ## End one of # OR operations: # [optional] [non-empty array] List of Slack channels and/or on-call schedules to send alerts and notifications related to the descriptor ## One of: - slack: "slack" # [optional] [non-empty slack contact] Slack channel for contacting or alerting purposes ## OR - opsgenie: "opsgenie" # [optional] [non-empty opsgenie contact] Name of your Opsgenie schedule. Can be found here https://atlassian.app.opsgenie.com/schedule/whoIsOnCall ## End one of # End any of product: "product" # [required] [product-ari] Product this flow belongs to config: # [optional] [array] Optional configuration definitions - target: # [optional] Target environment this configuration applies to. Ommitting a key matches all values for that key. Omitting this whole block is equivalent to not setting all keys environment: "development" # [optional] [enum] environment this applies to | Choose one of either: development, staging, production variables: # [required] key value pairs for in-descriptor configuration yourPropertyName: # [additional property] key value pairs for in-descriptor configuration secretName: "secretName" # [required] [string] name of the secret to reference condition: # [required] Condition on which this flow starts # One of: alwaysOn: # [optional] Streaming pipeline condition object maxRestarts: -1 # [optional] [integer] Maximum number of restarts for alwaysOn pipeline. Use -1 for unlimited restarts, 0 for no restarts and positive integer for limited restarts. (Will be ignored for federal environments) (min: -1) (default: -1) processInterval: 10000 # [optional] [integer] Interval at which data is processed in microbatch (in milliseconds) (min: 1000) (max: 600000) # OR backfill: # [optional] Backfill schedule object cron: "cron" # [required] [non-empty string] Standard unix CRON schedule informs Airflow to use period grouping based on this startDate: "startDate" # [required] [non-empty date-time] Start timestamp for backfill schedule endDate: "endDate" # [required] [non-empty date-time] End timestamp for backfill schedule maxRetries: 0 # [optional] [integer] Number of retries for a scheduled pipeline. Use 0 for no retries and positive integer for limited retries. Ignore for default 3 retries (min: 0) # OR schedule: # [optional] CRON schedule object cron: "cron" # [required] [non-empty string] Standard unix CRON schedule startDate: "startDate" # [required] [non-empty date-time] Start timestamp for cron schedule endDate: "endDate" # [optional] [non-empty date-time] End timestamp for cron schedule maxRetries: 0 # [optional] [integer] Number of retries for a scheduled pipeline. Use 0 for no retries and positive integer for limited retries. Ignore for default 3 retries (min: 0) # OR once: # [optional] Run it Once maxRetries: 0 # [optional] [integer] Number of retries for a scheduled pipeline. Use 0 for no retries and positive integer for limited retries. Ignore for default 3 retries (min: 0) # End one of dependencies: # [optional] Details of the upstream flows this flow is dependent on flows: # [required] [non-empty array] List of details upstream flows - id: "id" # [required] [flow-ari] An unique identifier for the upstream flow being described lookbackTime: "lookbackTime" # [required] [duration] Lookback time for the upstream flow observationWindow: "observationWindow" # [required] [duration] The window of time to look for the successful run for the upstream flow in status: success # [optional] [enum] Status of the upstream flow on which downstream flow depends | Choose one of either: success, failure timeout: PT15M # [optional] [duration] ISO8601 formatted duration representing the time after which the flow will timeout if its waiting for an upstream flow to complete alert: # [optional] Flow alerts failure: # [required] Flow failure alerts priority: "P1" # [required] [enum] Flow priority | Choose one of either: P1, P2, P3, P4 runbook: "runbook" # [optional] [uri] Flow runbook notifications: # [required] Environment(s) for which to trigger a notification. # Any of: development: # [optional] [non-empty array] Array of notification targets "slack" # [optional] [enum] undefined | Choose one of either: slack, opsgenie # OR staging: # [optional] [non-empty array] Array of notification targets "slack" # [optional] [enum] undefined | Choose one of either: slack, opsgenie # OR production: # [optional] [non-empty array] Array of notification targets "slack" # [optional] [enum] undefined | Choose one of either: slack, opsgenie # End any of steps: # [required] [non-empty array] list of flow steps - name: "name" # [required] [non-empty string] The name of this step (must be unique in this flow) summary: "summary" # [optional] [non-empty string] Short description of this step timeout: "timeout" # [optional] [duration] ISO8601 formatted duration representing the timeout before this step is terminated; if omitted then step can run for as long as the underlying system allows parents: # [optional] [non-empty array] List of step identifiers this step depends on; if omitted it is assumed to be the root step "parents" # [optional] [string] Name of parent step transformation: # [required] The transformation logic # One of: fivetranIngest: # [optional] Object for Fivetran connector: # [required] Different ingest connectors supported for fivetran ## One of: linkedin_ads: # [optional] LinkedIn Ads connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for LinkedIn Ads Connector sync_mode: "SpecificAccounts" # [required] [enum] Whether to sync all accounts or specific accounts | Choose one of either: SpecificAccounts, AllAccounts accounts: # [optional] [array] Specific accounts to sync "accounts" # [optional] [non-empty string] Accounts to be synced ad_analytics: "SpecificReports" # [required] [enum] Whether to sync all reports or specific reports | Choose one of either: SpecificReports, AllReports reports: # [optional] [array] Specific reports to sync "reports" # [optional] [non-empty string] Reports to be synced view_through_attribution_window_size: "DAY_1" # [required] [enum] The time frame of the view-through attribution window | Choose one of either: DAY_1, DAY_7, DAY_28, DAY_30 post_click_attribution_window_size: "DAY_1" # [required] [enum] The time frame of the post-click attribution window | Choose one of either: DAY_1, DAY_7, DAY_28, DAY_30 timeframe_months: "THREE" # [required] [enum] Number of months' worth of reporting data you'd like to include in your initial sync | Choose one of either: THREE, SIX, TWELVE, TWENTY_FOUR, ALL_TIME ## OR workday_strategic_sourcing: # [optional] Workday Strategic Sourcing Connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Workday Strategic Sourcing Connector region: "region" # [required] [non-empty string] The region of the Workday instance x_api_key: "x_api_key" # [required] [non-empty string] The API key for the Workday Strategic Sourcing connector x_user_token: "x_user_token" # [required] [non-empty string] The user token for the Workday Strategic Sourcing connector x_user_email: "x_user_email" # [required] [non-empty string] The user email for the Workday Strategic Sourcing connector ## OR coupa: # [optional] Coupa Connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Coupa Connector instance: "instance" # [required] [non-empty string] The instance name of your Coupa account in the URL. client_id: "client_id" # [required] [non-empty string] Your Coupa client_id. client_secret: "client_secret" # [required] [non-empty string] Your Coupa client_secret. ## OR concur: # [optional] Concur Connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Concur Connector auth_mode: "PasswordGrant" # [required] [enum] Authentication mode for Concur | Choose one of either: PasswordGrant, CompanyLevel client_id: "client_id" # [required] [non-empty string] Client ID for the connector client_secret: "client_secret" # [required] [non-empty string] Client Secret for the connector username: "username" # [optional] [non-empty string] Username for the connector login_password: "login_password" # [optional] [non-empty string] Password for the connector company_uuid: "company_uuid" # [optional] [non-empty string] Company UUID for the connector company_request_token: "company_request_token" # [optional] [non-empty string] Company Request Token for the connector region: "region" # [required] [non-empty string] Region for the connector ## OR workday: # [optional] Workday Raas Connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Workday Raas Connector report_url: "report_url" # [required] [non-empty string] URL for a live custom report primary_keys: # [required] [array] Primary key(s) to be set in the destination table for the custom report "primary_keys" # [required] [non-empty string] Primary key for the table table: "table" # [required] [non-empty string] The table name within the schema to which connector will sync the data user_name: "user_name" # [required] [non-empty string] Workday username for the connector password: "password" # [required] [non-empty string] Workday password for the connector report_format_type: "json" # [optional] [enum] Workday report format type for the connector | Choose one of either: json, csv support_nested_columns: true # [optional] [boolean] Unpack the nested columns and sync them separately ## OR workday_hcm: # [optional] Workday HCM Connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Workday HCM Connector domain_host_name: "domain_host_name" # [required] [non-empty string] Domain Host name for the connector username: "username" # [required] [non-empty string] Username for the connector password: "password" # [required] [non-empty string] Password for the connector ## OR qualtrics: # [optional] Qualtrics Connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Qualtrics Connector data_center: "data_center" # [required] [non-empty string] Data center ID of the Qualtrics account api_token: "api_token" # [required] [non-empty string] API token of the Qualtrics account api_requests_per_minute: 1 # [required] [integer] Allowed number of API requests to Qualtrics per minute (min: 1) ## OR fivetran_log: # [optional] Fivetran Log connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Fivetran Log Connector group_name: "group_name" # [optional] [non-empty string] The group name of the target_group_id is_account_level_connector: true # [optional] [boolean] Retrieve account-level logs ## OR intercom: # [optional] Intercom connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created auth: # [optional] Auth for Intercom Connector access_token: "access_token" # [required] [non-empty string] The long-lived Access Token carries the information necessary for API resources to fetch data ## OR marketo: # [optional] Marketo connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Marketo Connector endpoint: "endpoint" # [required] [non-empty string] Marketo REST API endpoint identity: "identity" # [required] [non-empty string] Marketo REST API identity URL user_id: "user_id" # [required] [non-empty string] Marketo SOAP API User ID encryption_key: "encryption_key" # [required] [non-empty string] Marketo SOAP API Encryption Key soap_uri: "soap_uri" # [required] [non-empty string] Marketo SOAP API Endpoint client_id: "client_id" # [required] [non-empty string] Marketo REST API Client ID client_secret: "client_secret" # [required] [non-empty string] Marketo REST API Client Secret api_quota: 1 # [optional] [integer] Allowed number of API requests to Marketo instance per day (min: 1) ## OR oracle_fusion_cloud_apps_crm: # [optional] Oracle Fusion Cloud apps CRM connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Oracle Fusion Cloud apps CRM Connector schema_prefix: "schema_prefix" # [required] [non-empty string] The connector schema prefix has to be unique within the group (destination) server_url: "server_url" # [required] [non-empty string] The Oracle Fusion Cloud Instance URL username: "username" # [required] [non-empty string] The Oracle Fusion Cloud username password: "password" # [required] [non-empty string] The Oracle Fusion Cloud user password ## OR oracle_fusion_cloud_apps_fscm: # [optional] Oracle Fusion Cloud apps FSCM connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Oracle Fusion Cloud apps FSCM Connector schema_prefix: "schema_prefix" # [required] [non-empty string] The connector schema prefix has to be unique within the group (destination) server_url: "server_url" # [required] [non-empty string] The Oracle Fusion Cloud Instance URL username: "username" # [required] [non-empty string] The Oracle Fusion Cloud username password: "password" # [required] [non-empty string] The Oracle Fusion Cloud user password ## OR salesforce: # [optional] Salesforce connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Salesforce Connector base_url: "base_url" # [optional] [non-empty string] The custom Salesforce domain sync_formula_fields: true # [optional] [boolean] Sync formula fields auth: # [optional] Config for Salesforce Connector refresh_token: "refresh_token" # [required] [non-empty string] The long-lived Refresh token along with the client_id and client_secret parameters carry the information necessary to get a new access token for API resources client_access: # [optional] Client access for Salesforce Connector client_id: "client_id" # [required] [non-empty string] Client ID of your Salesforce client application client_secret: "client_secret" # [required] [non-empty string] Client Secret of your Salesforce client application ## OR youtube_analytics: # [optional] Youtube Analytics connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Youtube Analytics Connector content_owner_id: "content_owner_id" # [required] [non-empty string] Used only for Content Owner reports. The ID of the content owner for whom the API request is being made auth: # [optional] Config for Youtube Analytics Connector refresh_token: "refresh_token" # [required] [non-empty string] The long-lived Refresh token along with the client_id and client_secret parameters carry the information necessary to get a new access token for API resources client_access: # [optional] Client access for YouTube Analytics Connector client_id: "client_id" # [required] [non-empty string] Client ID of your YouTube Analytics client application client_secret: "client_secret" # [required] [non-empty string] Client Secret of your YouTube Analytics client application ## OR netsuite_suiteanalytics: # [optional] Netsuite Suite Analytics connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Netsuite Suite Analytics Connector host: "host" # [required] [non-empty string] The NetSuite service host address port: "port" # [required] [non-empty string] The NetSuite service host port datasource: "datasource" # [required] [non-empty string] The NetSuite data source value: NetSuite.com account: "account" # [required] [non-empty string] The NetSuite Account ID role: "role" # [required] [non-empty string] The NetSuite Role ID for connection email: "email" # [optional] [non-empty string] The NetSuite user's email address password: "password" # [optional] [non-empty string] The NetSuite user's password consumer_key: "consumer_key" # [optional] [non-empty string] The NetSuite consumer key obtained when creating an Integration Record consumer_secret: "consumer_secret" # [optional] [non-empty string] The NetSuite consumer secret obtained when creating an Integration Record token_key: "token_key" # [optional] [non-empty string] The NetSuite token ID obtained when creating an Access Token token_secret: "token_secret" # [optional] [non-empty string] The NetSuite token password obtained when creating an Access Token ## OR bingads: # [optional] Microsoft Advertising connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Microsoft Advertising Connector sync_mode: "SpecificAccounts" # [optional] [enum] Whether to sync all accounts or specific accounts | Choose one of either: SpecificAccounts, AllAccounts accounts: # [optional] [array] Specific accounts to sync "accounts" # [optional] [non-empty string] Accounts to be synced timeframe_months: "THREE" # [optional] [enum] Number of months' worth of reporting data you'd like to include in your initial sync | Choose one of either: THREE, SIX, TWELVE, TWENTY_FOUR, ALL_TIME rollback_window_size: 35 # [optional] [enum] The time frame of the rollback sync to capture conversions | Choose one of either: 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95 auth: # [optional] Config for Microsoft Advertising Connector refresh_token: "refresh_token" # [required] [non-empty string] The long-lived Refresh token along with the client_id and client_secret parameters carry the information necessary to get a new access token for API resources client_access: # [optional] Client access for Microsoft Advertising Connector client_id: "client_id" # [required] [non-empty string] Client ID of your Microsoft Advertising client application client_secret: "client_secret" # [required] [non-empty string] Client Secret of your Microsoft Advertising client application ## OR facebook_ads: # [optional] Facebook Ads connector update_schema: true # [optional] [boolean] Should the schema be updated for Fivetran connector paused: true # [optional] [boolean] Should the connector be paused when created config: # [optional] Config for Facebook Ads Connector sync_mode: "AllAccounts" # [required] [enum] Option to select connector should sync all accounts or specific accounts | Choose one of either: AllAccounts, SpecificAccounts accounts: # [required] [array] List of accounts of which connector will sync the data "accounts" # [required] [non-empty string] Accounts to sync custom_tables: # [optional] [array] List of custom tables - table_name: "table_name" # [optional] [non-empty string] The table name within the schema to which the connector will sync the data fields: # [optional] [array] List of fields which connector will sync "account_currency" # [optional] [enum] Accounts to sync | Choose one of either: account_currency, account_id, account_name, action_values, actions, ad_id, ad_name, adset_id, adset_name, attribution_setting, buying_type, campaign_id, campaign_name, canvas_avg_view_percent, canvas_avg_view_time, catalog_segment_value, clicks, conversion_rate_ranking, conversion_values, conversions, cost_per_10_sec_video_view, cost_per_action_type, cost_per_conversion, cost_per_estimated_ad_recallers, cost_per_inline_link_click, cost_per_inline_post_engagement, cost_per_outbound_click, cost_per_thruplay, cost_per_unique_action_type, cost_per_unique_click, cost_per_unique_inline_link_click, cost_per_unique_outbound_click, cpc, cpm, cpp, ctr, engagement_rate_ranking, estimated_ad_recall_rate, estimated_ad_recallers, frequency, full_view_impressions, full_view_reach, gender_targeting, impressions, inline_link_click_ctr, inline_link_clicks, inline_post_engagement, instant_experience_clicks_to_open, instant_experience_clicks_to_start, instant_experience_outbound_clicks, labels, location, mobile_app_purchase_roas, objective, outbound_clicks, outbound_clicks_ctr, purchase_roas, quality_ranking, reach, relevance_score, social_spend, spend, unique_actions, unique_clicks, unique_ctr, unique_inline_link_click_ctr, unique_inline_link_clicks, unique_link_clicks_ctr, unique_outbound_clicks, unique_outbound_clicks_ctr, video_10_sec_watched_actions, video_30_sec_watched_actions, video_avg_percent_watched_actions, video_avg_time_watched_actions, video_p100_watched_actions, video_p25_watched_actions, video_p50_watched_actions, video_p75_watched_actions, video_p95_watched_actions, video_play_actions, video_play_curve_actions, video_thruplay_watched_actions, website_ctr, website_purchase_roas breakdowns: # [optional] [array] List of breakdowns which connector will sync "ad_format_asset" # [optional] [enum] Accounts to sync | Choose one of either: ad_format_asset, age, app_id, body_asset, call_to_action_asset, country, description_asset, dma, gender, frequency_value, hourly_stats_aggregated_by_advertiser_time_zone, hourly_stats_aggregated_by_audience_time_zone, image_asset, impression_device, link_url_asset, place_page_id, device_platform, product_id, publisher_platform, platform_position, region, skan_conversion_id, title_asset, video_asset action_breakdowns: # [optional] [array] List of action_breakdowns which connector will sync "action_carousel_card_id" # [optional] [enum] Accounts to sync | Choose one of either: action_carousel_card_id, action_carousel_card_name, action_canvas_component_name, action_destination, action_device, action_reaction, action_target_id, action_type, action_video_sound, action_video_type aggregation: "Day" # [optional] [enum] Options to select aggregation duration | Choose one of either: Day, Week, Month config_type: "Prebuilt" # [optional] [enum] Option to select Prebuilt Reports or Custom Reports | Choose one of either: Prebuilt, Custom prebuilt_report_name: "ACTION_CANVAS_COMPONENT" # [optional] [enum] The report name to which connector will sync the data | Choose one of either: ACTION_CANVAS_COMPONENT, ACTION_CAROUSEL_CARD, ACTION_CONVERSION_DEVICE, ACTION_PRODUCT_ID, ACTION_REACTIONS, ACTION_VIDEO_SOUND, ACTION_VIDEO_VIEW_TYPE, BASIC_AD, BASIC_AD_SET, BASIC_ALL_LEVELS, BASIC_CAMPAIGN, DELIVERY_PLATFORM, DELIVERY_PLATFORM_AND_DEVICE, DELIVERY_PURCHASE_ROAS, DEMOGRAPHICS_AGE, DEMOGRAPHICS_AGE_AND_GENDER, DEMOGRAPHICS_COUNTRY, DEMOGRAPHICS_DMA_REGION, DEMOGRAPHICS_GENDER, DEMOGRAPHICS_REGION action_report_time: "impression" # [optional] [enum] The report time of action stats | Choose one of either: impression, conversion, mixed click_attribution_window: "NONE" # [optional] [enum] The report time of action stats | Choose one of either: NONE, DAY_1, DAY_7 view_attribution_window: "NONE" # [optional] [enum] The report time of action stats | Choose one of either: NONE, DAY_1, DAY_7 timeframe_months: "THREE" # [optional] [enum] Number of months' worth of reporting data you'd like to include in your initial sync | Choose one of either: THREE, SIX, TWELVE, TWENTY_FOUR, ALL_TIME sync_metadata: true # [optional] [boolean] Parameter defining whether to enable or disable metadata synchronisation auth: # [optional] Config for Youtube Analytics Connector refresh_token: "refresh_token" # [required] [non-empty string] The long-lived Refresh token along with the client_id and client_secret parameters carry the information necessary to get a new access token for API resources client_access: # [optional] Client access for YouTube Analytics Connector client_id: "client_id" # [required] [non-empty string] Client ID of your YouTube Analytics client application client_secret: "client_secret" # [required] [non-empty string] Client Secret of your YouTube Analytics client application ## End one of output: # [required] Object for Fivetran Destination database: "database" # [required] [database-ari] Database to store Fivetran tables # OR dbt: # [optional] Object for dbt transformations projectName: "projectName" # [required] [non-empty string] dbt Project Name imageTag: "imageTag" # [required] [non-empty string] dbt Docker Image Tag filter: # [optional] Filter to include/exclude the dbt models ## Any of: include: # [optional] [non-empty array] List of model/tag/path to include in dbt run "include" # [optional] [non-empty string] ## OR exclude: # [optional] [non-empty array] List of model/tag/path to exclude in dbt run "exclude" # [optional] [non-empty string] ## End any of # OR tableQuality: # [optional] [non-empty array] Table Quality Array - table: "table" # [required] [table-ari] Table ID rules: # [required] [array] List of rules to be applied to the table - ruleId: "ruleId" # [required] [non-empty string] Rule ID summary: "summary" # [optional] [non-empty string] Summary of the rule check: "check" # [required] [check-ari] Check to be applied to the table fatal: false # [optional] [boolean] Whether the rule is fatal parameters: # [optional] [non-empty array] undefined yourPropertyName: # [additional property] undefined # OR fileIngest: # [optional] Object for File Ingestion source: "source" # [optional] [non-empty string] External S3 bucket path iamRole: "iamRole" # [optional] [non-empty string] IAM Role ARN mode: "append" # [required] [enum] Ingestion Mode | Choose one of either: append, overwrite fileType: # [required] File Type format: "csv" # [required] [enum] Format of the input file. eg csv, jsonlines, parquet | Choose one of either: csv, jsonlines, parquet delimiter: "delimiter" # [optional] [non-empty string] Delimiter to parse the input file nodeType: "nodeType" # [optional] [non-empty string] Node type to be used for driver and worker nodes in cluster autoscale: # [optional] Minimum and Maximum number of nodes to be used during autoscaling minimum: 1 # [required] [integer] Minimum nodes (min: 1) (max: 20) maximum: 1 # [required] [integer] Maximum nodes (min: 1) (max: 20) output: "output" # [required] [table-ari] Destination table derivedColumns: # [optional] [array] undefined - column: "column" # [required] [attribute-ari] ARI of the derived column derivedFrom: "derivedFrom" # [required] [attribute-ari] ARI of the column to derive data from expression: # [required] Object which describes how data is derived/transformed from the source column to the target column ## One of: predefined: "toDate" # [optional] [enum] Platform-defined transformation rules - these are hardcoded and only work on certain column types | Choose one of either: toDate, toTimestamp, vpcAccountFromFilename, vpcRegionFromFilename, vpcYearFromFilename, vpcMonthFromFilename, vpcDayFromFilename ## End one of # OR socratesStreamIngest: # [optional] Object for Socrates Streams Ingestion stream: "stream" # [required] [stream-ari] Source stream table: "table" # [required] [table-ari] Destination table avi: "avi" # [required] [string] AVI of each stream schemaAri: "schemaAri" # [required] [string] Schema ARI for the stream clusterSize: SMALL # [optional] [enum] T-Shirt size to be used for the cluster. Allowed Values : SMALL, MEDIUM, LARGE and XLARGE. Default Value : SMALL | Choose one of either: SMALL, MEDIUM, LARGE, XLARGE # End one of outputs: # [optional] Flow outputs # Any of: tables: # [optional] [non-empty array] List of output tables "tables" # [optional] [table-ari] Table identifier # OR views: # [optional] [non-empty array] List of output views "views" # [optional] [view-ari] View identifier # End any of
1 2apiVersion: v1alpha1 id: ari:cloud:data-lake::descriptor/flow/d317629b-2348-480f-af01-90ccc050802a name: socrates_streams_ingest_flow summary: A fully defined flow descriptor for socrates streams ingestion. documentation: https://developer.atlassian.com/ product: ari:cloud:data-lake::descriptor/product/719d7e21-63ba-4bc5-a9b0-8ce1d2c4a288 contacts: author: - email: "pperson16@atlassian.com" - slack: "socrates-vnext" support: - email: "pperson17@atlassian.com" - slack: "socrates-vnext" operations: - opsgenie: "Vaporeon - Example Roster UNPAID" - slack: "socrates-vnext" condition: alwaysOn: {} steps: - name: ingest summary: "Run the Socrates Stream ingestion" transformation: socratesStreamIngest: stream: ari:cloud:data-lake::descriptor/stream/2d7b7657-cf6d-4a50-9b1c-67934914c593 table: ari:cloud:data-lake::descriptor/table/ec919101-8240-4d35-8eeb-fbcda29a3272 avi: avi:browser-metrics.page-visible:performance:measured schemaAri: ari:cloud:platform-services::streamhub-schema/analytics-service/analytics_enriched_operational_event_schema_v2.0.json clusterSize: SMALL
1 2apiVersion: v1alpha1 id: ari:cloud:data-lake::descriptor/flow/03b69f58-8843-45b4-85ec-5fedfd7bce99 name: continuous_file_ingest_flow summary: A fully defined flow descriptor for continuous file ingestion product: ari:cloud:data-lake::descriptor/product/1776ef05-8d17-4b3a-ba07-44fedca58cfd contacts: author: - email: "pperson16@atlassian.com" - slack: "socrates-vnext" support: - email: "pperson17@atlassian.com" - slack: "socrates-vnext" operations: - opsgenie: "Vaporeon - Example Roster UNPAID" - slack: "socrates-vnext" condition: alwaysOn: {} steps: - name: file_ingestion summary: "Continuous File Ingestion" transformation: fileIngest: source: "s3://bucket/underscore_path/to/files" iamRole: awsAccountId: "123456789012" roleName: "my-role-abc" mode: "append" fileType: format: "csv" delimiter: ',' autoscale: minimum: 1 maximum: 5 output: "ari:cloud:data-lake::descriptor/table/26098c54-99e0-477a-bef0-3d4f6bdc8d60"
1 2apiVersion: v1alpha1 id: ari:cloud:data-lake::descriptor/flow/03b69f58-8843-45b4-85ec-5fedfd7bce99 name: batch_file_ingest_flow summary: A fully defined flow descriptor for batch file ingestion product: ari:cloud:data-lake::descriptor/product/1776ef05-8d17-4b3a-ba07-44fedca58cfd contacts: author: - email: "pperson16@atlassian.com" - slack: "socrates-vnext" support: - email: "pperson17@atlassian.com" - slack: "socrates-vnext" operations: - opsgenie: "Vaporeon - Example Roster UNPAID" - slack: "socrates-vnext" condition: schedule: cron: "* * * * *" startDate: "2023-08-30T00:00:00Z" steps: - name: file_ingestion summary: "Batch File Ingestion" transformation: fileIngest: source: "s3://bucket/underscore_path/to/files" iamRole: awsAccountId: "123456789012" roleName: "my-role-abc" mode: "append" fileType: format: "csv" delimiter: ',' autoscale: minimum: 1 maximum: 5 output: "ari:cloud:data-lake::descriptor/table/26098c54-99e0-477a-bef0-3d4f6bdc8d60"
1 2apiVersion: v1alpha1 id: ari:cloud:data-lake::descriptor/flow/03b69f58-8843-45b4-85ec-5fedfd7bce99 name: fivetran_ingest_flow summary: A fully defined flow descriptor for fivetran log connector product: ari:cloud:data-lake::descriptor/product/1776ef05-8d17-4b3a-ba07-44fedca58cfd contacts: author: - email: "pperson16@atlassian.com" - slack: "socrates-vnext" support: - email: "pperson17@atlassian.com" - slack: "socrates-vnext" operations: - opsgenie: "Vaporeon - Example Roster UNPAID" - slack: "socrates-vnext" condition: schedule: cron: "* * * * *" startDate: "2023-08-30T00:00:00Z" steps: - name: ingest summary: "Run the Fivetran ingestion" transformation: fivetranIngest: connector: fivetran_log: paused: true update_schema: false config: group_name: "group_name" is_account_level_connector: true output: database: "ari:cloud:data-lake::descriptor/database/26098c54-99e0-477a-bef0-3d4f6bdc8d60"
Rate this page: