StreamingFile connector example
StreamingFile connector example
Parent document: StreamingFile connector
{
"job": {
"writer": {
"hdfs": {
"replication": 1,
"dump_type": "hdfs.dump_type.json",
"compression_codec": "none"
},
"dump": {
"output_dir": "file:///tmp/streaming_file_hdfs/",
"format": {
"type": "hdfs"
}
},
"class": "com.bytedance.bitsail.connector.legacy.streamingfile.sink.FileSystemSinkFunctionDAGBuilder",
"partition_infos": "[{\"name\":\"date\",\"type\":\"TIME\"},{\"name\":\"hour\",\"type\":\"TIME\"}]",
"enable_event_time": true,
"event_time_fields": "timestamp"
}
}
}
HIVE' Example (Static Partition)
job writer's configuration
{
"job": {
"writer": {
"db_name": "default",
"table_name": "bitsail_test_static",
"hdfs": {
"dump_type": "hdfs.dump_type.json"
},
"dump": {
"format": {
"type": "hive"
}
},
"metastore_properties": "{\"hive.metastore.uris\":\"thrift://localhost:9083\"}",
"partition_infos": "[{\"name\":\"date\",\"type\":\"TIME\"},{\"name\":\"hour\",\"type\":\"TIME\"}]",
"enable_event_time": "true",
"event_time_fields": "timestamp",
"class": "com.bytedance.bitsail.connector.legacy.streamingfile.sink.FileSystemSinkFunctionDAGBuilder",
"source_schema": "[{\"name\":\"id\",\"type\":\"bigint\"},{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"bigint\"}]",
"sink_schema": "[{\"name\":\"id\",\"type\":\"bigint\"},{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"bigint\"}]"
}
}
}
job's example table ddl.
CREATE TABLE IF NOT EXISTS `default`.`bitsail_test_static`
(
`id` bigint,
`text` string,
`timestamp` bigint
) PARTITIONED BY (`date` string, `hour` string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
HIVE' Example (Dynamic Partition)
job writer's configuration(dynamic)
{
"job": {
"writer": {
"db_name": "default",
"table_name": "bitsail_test_dynamic",
"hdfs": {
"dump_type": "hdfs.dump_type.json"
},
"dump": {
"format": {
"type": "hive"
}
},
"metastore_properties": "{\"hive.metastore.uris\":\"thrift://localhost:9083\"}",
"partition_infos": "[{\"name\":\"date\",\"type\":\"TIME\"},{\"name\":\"hour\",\"type\":\"TIME\"},{\"name\":\"app_name\",\"type\":\"DYNAMIC\"}]",
"enable_event_time": "true",
"event_time_fields": "timestamp",
"class": "com.bytedance.bitsail.connector.legacy.streamingfile.sink.FileSystemSinkFunctionDAGBuilder",
"source_schema": "[{\"name\":\"id\",\"type\":\"bigint\"},{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"bigint\"},{\"name\":\"app_name\",\"type\":\"string\"}]",
"sink_schema": "[{\"name\":\"id\",\"type\":\"bigint\"},{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"bigint\"},{\"name\":\"app_name\",\"type\":\"string\"}]"
}
}
}
job's example table ddl (dynamic).
CREATE TABLE IF NOT EXISTS `default`.`bitsail_test_dynamic`
(
`id` bigint,
`text` string,
`timestamp` bigint
) PARTITIONED BY (`date` string, `hour` string, `app_name` string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'