Skip to content

Instantly share code, notes, and snippets.

@MatthewDavidCampbell
Last active January 24, 2023 13:12
Show Gist options
  • Select an option

  • Save MatthewDavidCampbell/5f843c1e6896f98bb321d2b17706fce8 to your computer and use it in GitHub Desktop.

Select an option

Save MatthewDavidCampbell/5f843c1e6896f98bb321d2b17706fce8 to your computer and use it in GitHub Desktop.
Azure Data Factory Copy Activity
{
"name": "AzureSearch",
"properties": {
"parameters": {
"SearchUrl": {
"type": "string"
}
},
"annotations": [],
"type": "RestService",
"typeProperties": {
"url": "@{linkedService().SearchUrl}",
"enableServerCertificateValidation": true,
"authenticationType": "Anonymous"
}
},
"type": "Microsoft.DataFactory/factories/linkedservices"
}
{
"name": "AzureSearch",
"properties": {
"linkedServiceName": {
"referenceName": "AzureSearch",
"type": "LinkedServiceReference",
"parameters": {
"SearchUrl": {
"value": "@dataset().SearchUrl",
"type": "Expression"
}
}
},
"parameters": {
"SearchUrl": {
"type": "string"
},
"ResourcePath": {
"type": "string"
}
},
"annotations": [],
"type": "RestResource",
"typeProperties": {
"relativeUrl": {
"value": "@dataset().ResourcePath",
"type": "Expression"
}
},
"schema": [
{
"type": "object",
"properties": {
"value": {
"type": "array",
"items": {
"type": "object"
}
}
}
}
]
}
}
{
"name": "Search",
"properties": {
"activities": [
{
"name": "Copy",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "RestSource",
"httpRequestTimeout": "00:01:40",
"requestInterval": "00.00:00:00.010",
"requestMethod": "GET",
"additionalHeaders": {
"api-key": {
"value": "@pipeline().parameters.SourceSearchApiKey",
"type": "Expression"
}
},
"paginationRules": {
"QueryParameters.{offset}": "RANGE:0:1:10"
}
},
"sink": {
"type": "RestSink",
"httpRequestTimeout": "00:01:40",
"requestInterval": 10,
"requestMethod": "POST",
"writeBatchSize": 10000,
"additionalHeaders": {
"api-key": {
"value": "@pipeline().parameters.SinkSearchApiKey",
"type": "Expression"
}
},
"httpCompressionType": "none"
},
"enableStaging": false
},
"inputs": [
{
"referenceName": "AzureSearch",
"type": "DatasetReference",
"parameters": {
"SearchUrl": {
"value": "@pipeline().parameters.SourceSearchUrl",
"type": "Expression"
},
"ResourcePath": "docs?api-version=2021-04-30-Preview&$top=10&$skip={offset}&$orderBy=Skapad desc"
}
}
],
"outputs": [
{
"referenceName": "AzureSearch",
"type": "DatasetReference",
"parameters": {
"SearchUrl": {
"value": "@pipeline().parameters.SinkSearchUrl",
"type": "Expression"
},
"ResourcePath": "docs/index?api-version=2021-04-30-Preview"
}
}
]
}
],
"parameters": {
"SourceSearchUrl": {
"type": "string"
},
"SourceSearchApiKey": {
"type": "string"
},
"SinkSearchUrl": {
"type": "string"
},
"SinkSearchApiKey": {
"type": "string"
}
},
"annotations": []
}
}
{
"name": "Search",
"properties": {
"activities": [
{
"name": "Copy",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "RestSource",
"httpRequestTimeout": "00:01:40",
"requestInterval": "00.00:00:00.010",
"requestMethod": "GET",
"additionalHeaders": {
"api-key": {
"value": "@pipeline().parameters.SourceSearchApiKey",
"type": "Expression"
}
},
"paginationRules": {
"QueryParameters.{offset}": "RANGE:0:1:1"
}
},
"sink": {
"type": "RestSink",
"httpRequestTimeout": "00:01:40",
"requestInterval": 10,
"requestMethod": "POST",
"writeBatchSize": 1000,
"httpCompressionType": "none",
"writeBehavior": "upload"
},
"enableStaging": false,
"translator": {
"type": "TabularTranslator",
"mappings": [
{
"source": {
"path": "['Key']"
},
"sink": {
"name": "Key",
"type": "String"
}
},
{
"source": {
"path": "['BlobNamn']"
},
"sink": {
"name": "BlobNamn",
"type": "String"
}
},
{
"source": {
"path": "['FilNamn']"
},
"sink": {
"name": "FilNamn",
"type": "String"
}
},
{
"source": {
"path": "['StanfordDokumentSchema']"
},
"sink": {
"name": "StanfordDokumentSchema",
"type": "String"
}
},
{
"source": {
"path": "['StanfordDokumentTyp']"
},
"sink": {
"name": "StanfordDokumentTyp",
"type": "String"
}
},
{
"source": {
"path": "['Skapad']"
},
"sink": {
"name": "Skapad",
"type": "DateTimeOffset"
}
},
{
"source": {
"path": "['UtforandeAvverkningsforetag']"
},
"sink": {
"name": "UtforandeAvverkningsforetag",
"type": "String"
}
},
{
"source": {
"path": "['EkonomiskdimensionUtforandeAvverkningsforetag']"
},
"sink": {
"name": "EkonomiskdimensionUtforandeAvverkningsforetag",
"type": "String"
}
},
{
"source": {
"path": "['Gpx']"
},
"sink": {
"name": "Gpx",
"type": "String"
}
},
{
"source": {
"path": "['SdcId']"
},
"sink": {
"name": "SdcId",
"type": "String"
}
},
{
"source": {
"path": "['SaknarMetadata']"
},
"sink": {
"name": "SaknarMetadata",
"type": "Boolean"
}
},
{
"source": {
"path": "['Initialiserad']"
},
"sink": {
"name": "Initialiserad",
"type": "DateTimeOffset"
}
},
{
"source": {
"path": "['FilTyp']"
},
"sink": {
"name": "FilTyp",
"type": "String"
}
},
{
"source": {
"path": "['Kompression']"
},
"sink": {
"name": "Kompression",
"type": "String"
}
},
{
"source": {
"path": "['BereknatFilNamn']"
},
"sink": {
"name": "BereknatFilNamn",
"type": "String"
}
},
{
"source": {
"path": "['SdcFileId']"
},
"sink": {
"name": "SdcFileId",
"type": "String"
}
},
{
"source": {
"path": "['Kella']"
},
"sink": {
"name": "Kella",
"type": "String"
}
},
{
"source": {
"path": "['DocumentId']"
},
"sink": {
"name": "DocumentId",
"type": "String"
}
},
{
"source": {
"path": "['MetadataVersion']"
},
"sink": {
"name": "MetadataVersion",
"type": "Int32"
}
}
],
"collectionReference": "$.value",
"mapComplexValuesToString": false
}
},
"inputs": [
{
"referenceName": "AzureSearch",
"type": "DatasetReference",
"parameters": {
"SearchUrl": {
"value": "@pipeline().parameters.SourceSearchUrl",
"type": "Expression"
},
"ResourcePath": "docs?api-version=2021-04-30-Preview&$top=1&$skip={offset}&$orderBy=Skapad desc"
}
}
],
"outputs": [
{
"referenceName": "AzureSearchEndpoint",
"type": "DatasetReference"
}
]
}
],
"parameters": {
"SourceSearchUrl": {
"type": "string"
},
"SourceSearchApiKey": {
"type": "string"
},
"SinkSearchUrl": {
"type": "string"
},
"SinkSearchApiKey": {
"type": "string"
}
},
"annotations": []
}
}
{
"source": {
"type": "RestSource",
"httpRequestTimeout": "00:01:40",
"requestInterval": "00.00:00:00.010",
"requestMethod": "GET",
"additionalHeaders": {
"api-key": "<secret>"
},
"paginationRules": {
"QueryParameters.{offset}": "RANGE:0:1:10"
}
},
"sink": {
"type": "RestSink",
"httpRequestTimeout": "00:01:40",
"requestInterval": 10,
"requestMethod": "POST",
"writeBatchSize": 10000,
"additionalHeaders": {
"api-key": "<secret>"
},
"httpCompressionType": "none"
},
"enableStaging": false
}
{
"dataRead": 15892,
"dataWritten": 0,
"sourcePeakConnections": 1,
"sinkPeakConnections": 1,
"rowsRead": 1,
"rowsCopied": 0,
"copyDuration": 7,
"throughput": 15.892,
"errors": [
{
"Code": 23353,
"Message": "Failure happened on 'Sink' side. ErrorCode=RestCallFailedWithClientError,'Type=Microsoft.DataTransfer.Common.Shared.HybridDeliveryException,Message=Rest call failed with client error, status code 400 BadRequest, please check your activity settings.\nRequest URL: <secret>/indexes/stanfordfiles/docs/index?api-version=2021-04-30-Preview.\nResponse: {\"error\":{\"code\":\"\",\"message\":\"The request is invalid. Details: parameters : An unexpected 'StartArray' node was found when reading from the JSON reader. A 'StartObject' node was expected.\"}},Source=Microsoft.DataTransfer.ClientLibrary,'",
"EventType": 0,
"Category": 5,
"Data": {
"FailureInitiator": "Sink"
},
"MsgId": null,
"ExceptionType": null,
"Source": null,
"StackTrace": null,
"InnerEventInfos": []
}
],
"effectiveIntegrationRuntime": "AutoResolveIntegrationRuntime (North Europe)",
"usedDataIntegrationUnits": 4,
"billingReference": {
"activityType": "DataMovement",
"billableDuration": [
{
"meterType": "AzureIR",
"duration": 0.016666666666666666,
"unit": "DIUHours"
}
]
},
"usedParallelCopies": 1,
"executionDetails": [
{
"source": {
"type": "RestService"
},
"sink": {
"type": "RestService"
},
"status": "Failed",
"start": "1/23/2023, 2:50:44 PM",
"duration": 7,
"usedDataIntegrationUnits": 4,
"usedParallelCopies": 1,
"profile": {
"queue": {
"status": "Completed",
"duration": 5
},
"transfer": {
"status": "Completed",
"duration": 1,
"details": {
"readingFromSource": {
"type": "RestService",
"workingDuration": 0,
"timeToFirstByte": 0
},
"writingToSink": {
"type": "RestService",
"workingDuration": 0
}
}
}
},
"detailedDurations": {
"queuingDuration": 5,
"timeToFirstByte": 0,
"transferDuration": 1
}
}
],
"dataConsistencyVerification": {
"VerificationResult": "NotVerified"
},
"durationInQueue": {
"integrationRuntimeQueue": 0
}
}
@MatthewDavidCampbell
Copy link
Author

Uploaded Pipeline 3.b with a Search sink since it worked however not for string array data types.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment