Related Template(s)
MongoDB-to-BigQuery
Template Version
v2
What happened?
I have function that checks if a field is true or not. If its true then it returns null to skip saving that document into BigQuery.
I have tried doing a return undefined, return “” and i keep getting the same issue which is
com.google.cloud.teleport.v2.common.UncaughtExceptionLogger - The template launch failed.
java.lang.IllegalArgumentException: schema can not be null
Below is a code snippet
function deliveries_transform(input_doc) {
var doc = JSON.parse(input_doc)
// Filters
if (doc.has_parent) {
return null;
}
//return after stringifying
return JSON.stringify(doc);
}
I referred to the example stated in this link
https://cloud.google.com/dataflow/docs/guides/templates/create-template-udf#filter_events
The job was created using the google console and not via api or sdk.
Relevant log output
[
{
"insertId": "",
"jsonPayload": {
"line": "exec.go:66",
"message": "com.google.cloud.teleport.v2.common.UncaughtExceptionLogger - The template launch failed.njava.lang.IllegalArgumentException: schema can not be nullntat org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument(Preconditions.java:143)ntat org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$Write.withSchema(BigQueryIO.java:2679)ntat com.google.cloud.teleport.v2.mongodb.templates.MongoDbToBigQuery.run(MongoDbToBigQuery.java:154)ntat com.google.cloud.teleport.v2.mongodb.templates.MongoDbToBigQuery.main(MongoDbToBigQuery.java:96)n"
},
"resource": {
"type": "dataflow_step",
"labels": {
"region": "",
"project_id": "",
"step_id": "",
"job_name": "mongodb-to-bigquery-batch",
"job_id": ""
}
},
"timestamp": "2024-02-12T21:45:00.037010Z",
"severity": "ERROR",
"labels": {
"compute.googleapis.com/resource_name": "",
"dataflow.googleapis.com/region": "us-east4",
"dataflow.googleapis.com/job_id": "",
"compute.googleapis.com/resource_id": "",
"compute.googleapis.com/resource_type": "",
"dataflow.googleapis.com/job_name": "mongodb-to-bigquery-batch"
},
"logName": "",
"receiveTimestamp": "2024-02-12T21:45:02.855403339Z",
"errorGroups": [
{
"id": "CPXppsbT8JP4nQE"
}
]
},
{
"insertId": "",
"jsonPayload": {
"message": "Error: Template launch failed: exit status 1",
"line": "launch.go:80"
},
"resource": {
"type": "dataflow_step",
"labels": {
"job_name": "mongodb-to-bigquery-batch",
"job_id": "",
"step_id": "",
"project_id": "",
"region": ""
}
},
"timestamp": "",
"severity": "ERROR",
"labels": {
"dataflow.googleapis.com/region": "",
"dataflow.googleapis.com/job_id": "",
"compute.googleapis.com/resource_id": "",
"compute.googleapis.com/resource_type": "",
"compute.googleapis.com/resource_name": "",
"dataflow.googleapis.com/job_name": "mongodb-to-bigquery-batch"
},
"logName": "",
"receiveTimestamp": "2024-02-12T21:45:02.855403339Z"
},
{
"textPayload": "Error occurred in the launcher container: Template launch failed. See console logs.",
"insertId": "xl5y9bd22ed",
"resource": {
"type": "dataflow_step",
"labels": {
"project_id": "",
"job_id": "2024-02-12_13_43_46-15601135711795228441",
"job_name": "mongodb-to-bigquery-batch",
"step_id": "",
"region": ""
}
},
"timestamp": "2024-02-12T21:47:43.432514787Z",
"severity": "ERROR",
"labels": {
"dataflow.googleapis.com/job_id": "2024-02-12_13_43_46-15601135711795228441",
"dataflow.googleapis.com/region": ",
"dataflow.googleapis.com/log_type": "",
"dataflow.googleapis.com/job_name": "mongodb-to-bigquery-batch"
},
"logName": "",
"receiveTimestamp": "2024-02-12T21:47:43.962727013Z"
}
]
I tried these
function deliveries_transform(input_doc) {
var doc = JSON.parse(input_doc)
// Filters
if (doc.has_parent) {
return null;
}
//return after stringifying
return JSON.stringify(doc);
}
function deliveries_transform(input_doc) {
var doc = JSON.parse(input_doc)
// Filters
if (doc.has_parent) {
return undefined;
}
//return after stringifying
return JSON.stringify(doc);
}
function deliveries_transform(input_doc) {
var doc = JSON.parse(input_doc)
// Filters
if (doc.has_parent) {
return "";
}
//return after stringifying
return JSON.stringify(doc);
}
I was expecting that during the dataflow job, it will skip inserting that current document and move to the next one