0%

Trino-Trial

阅读更多

1 Development

Trino in a Docker container

1
2
3
docker run --name trino -d -p 8080:8080 trinodb/trino:449

docker exec -it trino trino
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
trino> show catalogs;
Catalog
---------
jmx
memory
system
tpcds
tpch
(5 rows)

trino> show schemas from tpch;
Schema
--------------------
information_schema
sf1
sf100
sf1000
sf10000
sf100000
sf300
sf3000
sf30000
tiny
(10 rows)

trino> select * from tpch.tiny.customer limit 1;

2 Generate TPCH/TPCDS into Hive

Hive connector

Start a hive cluster via docker compose according to docker-hive

  • Hive Metastore:
    • Port (docker ps): 9083
    • ServiceName (docker-compose [ -f xxx.yml ] config --services): hive-metastore
  • Network Name (docker network ls): docker-hive_default

Then, add a hive catalog:

1
2
3
4
5
6
7
8
9
docker run --name trino --network docker-hive_default -d -p 5005:5005 trinodb/trino:449
docker exec -it trino bash -c 'echo -e "connector.name=hive\nhive.metastore.uri=thrift://hive-metastore:9083\nhive.compression-codec=LZ4" > /etc/trino/catalog/hive.properties'

# Execute this if you want to debug trino
docker exec -it trino bash -c 'echo "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005" >> /etc/trino/jvm.config'
# Or
docker exec -it trino bash -c 'echo -e "-Xdebug\n-Xrunjdwp:server=y,transport=dt_socket,address=*:5005,suspend=n" >> /etc/trino/jvm.config'

docker restart trino
1
2
3
docker exec -it trino trino --catalog hive

trino> show schemas

2.1 TPC-DS

2.1.1 Create Table Statement for Hive

2.1.1.1 Hive Syntax

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
CREATE TABLE `call_center`(
`cc_call_center_sk` INT,
`cc_call_center_id` VARCHAR(16),
`cc_rec_start_date` DATE,
`cc_rec_end_date` DATE,
`cc_closed_date_sk` INT,
`cc_open_date_sk` INT,
`cc_name` VARCHAR(50),
`cc_class` VARCHAR(50),
`cc_employees` INT,
`cc_sq_ft` INT,
`cc_hours` VARCHAR(20),
`cc_manager` VARCHAR(40),
`cc_mkt_id` INT,
`cc_mkt_class` VARCHAR(50),
`cc_mkt_desc` VARCHAR(100),
`cc_market_manager` VARCHAR(40),
`cc_division` INT,
`cc_division_name` VARCHAR(50),
`cc_company` INT,
`cc_company_name` VARCHAR(50),
`cc_street_number` VARCHAR(10),
`cc_street_name` VARCHAR(60),
`cc_street_type` VARCHAR(15),
`cc_suite_number` VARCHAR(10),
`cc_city` VARCHAR(60),
`cc_county` VARCHAR(30),
`cc_state` VARCHAR(2),
`cc_zip` VARCHAR(10),
`cc_country` VARCHAR(20),
`cc_gmt_offset` DECIMAL(5,2),
`cc_tax_percentage` DECIMAL(5,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `catalog_page`(
`cp_catalog_page_sk` INT,
`cp_catalog_page_id` VARCHAR(16),
`cp_start_date_sk` INT,
`cp_end_date_sk` INT,
`cp_department` VARCHAR(50),
`cp_catalog_number` INT,
`cp_catalog_page_number` INT,
`cp_description` VARCHAR(100),
`cp_type` VARCHAR(100)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `catalog_returns`(
`cr_returned_date_sk` INT,
`cr_returned_time_sk` INT,
`cr_item_sk` INT,
`cr_refunded_customer_sk` INT,
`cr_refunded_cdemo_sk` INT,
`cr_refunded_hdemo_sk` INT,
`cr_refunded_addr_sk` INT,
`cr_returning_customer_sk` INT,
`cr_returning_cdemo_sk` INT,
`cr_returning_hdemo_sk` INT,
`cr_returning_addr_sk` INT,
`cr_call_center_sk` INT,
`cr_catalog_page_sk` INT,
`cr_ship_mode_sk` INT,
`cr_warehouse_sk` INT,
`cr_reason_sk` INT,
`cr_order_number` INT,
`cr_return_quantity` INT,
`cr_return_amount` DECIMAL(7,2),
`cr_return_tax` DECIMAL(7,2),
`cr_return_amt_inc_tax` DECIMAL(7,2),
`cr_fee` DECIMAL(7,2),
`cr_return_ship_cost` DECIMAL(7,2),
`cr_refunded_cash` DECIMAL(7,2),
`cr_reversed_charge` DECIMAL(7,2),
`cr_store_credit` DECIMAL(7,2),
`cr_net_loss` DECIMAL(7,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `catalog_sales`(
`cs_sold_date_sk` INT,
`cs_sold_time_sk` INT,
`cs_ship_date_sk` INT,
`cs_bill_customer_sk` INT,
`cs_bill_cdemo_sk` INT,
`cs_bill_hdemo_sk` INT,
`cs_bill_addr_sk` INT,
`cs_ship_customer_sk` INT,
`cs_ship_cdemo_sk` INT,
`cs_ship_hdemo_sk` INT,
`cs_ship_addr_sk` INT,
`cs_call_center_sk` INT,
`cs_catalog_page_sk` INT,
`cs_ship_mode_sk` INT,
`cs_warehouse_sk` INT,
`cs_item_sk` INT,
`cs_promo_sk` INT,
`cs_order_number` INT,
`cs_quantity` INT,
`cs_wholesale_cost` DECIMAL(7,2),
`cs_list_price` DECIMAL(7,2),
`cs_sales_price` DECIMAL(7,2),
`cs_ext_discount_amt` DECIMAL(7,2),
`cs_ext_sales_price` DECIMAL(7,2),
`cs_ext_wholesale_cost` DECIMAL(7,2),
`cs_ext_list_price` DECIMAL(7,2),
`cs_ext_tax` DECIMAL(7,2),
`cs_coupon_amt` DECIMAL(7,2),
`cs_ext_ship_cost` DECIMAL(7,2),
`cs_net_paid` DECIMAL(7,2),
`cs_net_paid_inc_tax` DECIMAL(7,2),
`cs_net_paid_inc_ship` DECIMAL(7,2),
`cs_net_paid_inc_ship_tax` DECIMAL(7,2),
`cs_net_profit` DECIMAL(7,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `customer`(
`c_customer_sk` INT,
`c_customer_id` VARCHAR(16),
`c_current_cdemo_sk` INT,
`c_current_hdemo_sk` INT,
`c_current_addr_sk` INT,
`c_first_shipto_date_sk` INT,
`c_first_sales_date_sk` INT,
`c_salutation` VARCHAR(10),
`c_first_name` VARCHAR(20),
`c_last_name` VARCHAR(30),
`c_preferred_cust_flag` VARCHAR(1),
`c_birth_day` INT,
`c_birth_month` INT,
`c_birth_year` INT,
`c_birth_country` VARCHAR(20),
`c_login` VARCHAR(13),
`c_email_address` VARCHAR(50),
`c_last_review_date` VARCHAR(10)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `customer_address`(
`ca_address_sk` INT,
`ca_address_id` VARCHAR(16),
`ca_street_number` VARCHAR(10),
`ca_street_name` VARCHAR(60),
`ca_street_type` VARCHAR(15),
`ca_suite_number` VARCHAR(10),
`ca_city` VARCHAR(60),
`ca_county` VARCHAR(30),
`ca_state` VARCHAR(2),
`ca_zip` VARCHAR(10),
`ca_country` VARCHAR(20),
`ca_gmt_offset` DECIMAL(5,2),
`ca_location_type` VARCHAR(20)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `customer_demographics`(
`cd_demo_sk` INT,
`cd_gender` VARCHAR(1),
`cd_marital_status` VARCHAR(1),
`cd_education_status` VARCHAR(20),
`cd_purchase_estimate` INT,
`cd_credit_rating` VARCHAR(10),
`cd_dep_count` INT,
`cd_dep_employed_count` INT,
`cd_dep_college_count` INT
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `date_dim`(
`d_date_sk` INT,
`d_date_id` VARCHAR(16),
`d_date` DATE,
`d_month_seq` INT,
`d_week_seq` INT,
`d_quarter_seq` INT,
`d_year` INT,
`d_dow` INT,
`d_moy` INT,
`d_dom` INT,
`d_qoy` INT,
`d_fy_year` INT,
`d_fy_quarter_seq` INT,
`d_fy_week_seq` INT,
`d_day_name` VARCHAR(9),
`d_quarter_name` VARCHAR(6),
`d_holiday` VARCHAR(1),
`d_weekend` VARCHAR(1),
`d_following_holiday` VARCHAR(1),
`d_first_dom` INT,
`d_last_dom` INT,
`d_same_day_ly` INT,
`d_same_day_lq` INT,
`d_current_day` VARCHAR(1),
`d_current_week` VARCHAR(1),
`d_current_month` VARCHAR(1),
`d_current_quarter` VARCHAR(1),
`d_current_year` VARCHAR(1)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `household_demographics`(
`hd_demo_sk` INT,
`hd_income_band_sk` INT,
`hd_buy_potential` VARCHAR(15),
`hd_dep_count` INT,
`hd_vehicle_count` INT
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `income_band`(
`ib_income_band_sk` INT,
`ib_lower_bound` INT,
`ib_upper_bound` INT
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `inventory`(
`inv_date_sk` INT,
`inv_item_sk` INT,
`inv_warehouse_sk` INT,
`inv_quantity_on_hand` INT
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `item`(
`i_item_sk` INT,
`i_item_id` VARCHAR(16),
`i_rec_start_date` DATE,
`i_rec_end_date` DATE,
`i_item_desc` VARCHAR(200),
`i_current_price` DECIMAL(7,2),
`i_wholesale_cost` DECIMAL(7,2),
`i_brand_id` INT,
`i_brand` VARCHAR(50),
`i_class_id` INT,
`i_class` VARCHAR(50),
`i_category_id` INT,
`i_category` VARCHAR(50),
`i_manufact_id` INT,
`i_manufact` VARCHAR(50),
`i_size` VARCHAR(20),
`i_formulation` VARCHAR(20),
`i_color` VARCHAR(20),
`i_units` VARCHAR(10),
`i_container` VARCHAR(10),
`i_manager_id` INT,
`i_product_name` VARCHAR(50)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `promotion`(
`p_promo_sk` INT,
`p_promo_id` VARCHAR(16),
`p_start_date_sk` INT,
`p_end_date_sk` INT,
`p_item_sk` INT,
`p_cost` DECIMAL(15,2),
`p_response_target` INT,
`p_promo_name` VARCHAR(50),
`p_channel_dmail` VARCHAR(1),
`p_channel_email` VARCHAR(1),
`p_channel_catalog` VARCHAR(1),
`p_channel_tv` VARCHAR(1),
`p_channel_radio` VARCHAR(1),
`p_channel_press` VARCHAR(1),
`p_channel_event` VARCHAR(1),
`p_channel_demo` VARCHAR(1),
`p_channel_details` VARCHAR(100),
`p_purpose` VARCHAR(15),
`p_discount_active` VARCHAR(1)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `reason`(
`r_reason_sk` INT,
`r_reason_id` VARCHAR(16),
`r_reason_desc` VARCHAR(100)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `ship_mode`(
`sm_ship_mode_sk` INT,
`sm_ship_mode_id` VARCHAR(16),
`sm_type` VARCHAR(30),
`sm_code` VARCHAR(10),
`sm_carrier` VARCHAR(20),
`sm_contract` VARCHAR(20)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `store`(
`s_store_sk` INT,
`s_store_id` VARCHAR(16),
`s_rec_start_date` DATE,
`s_rec_end_date` DATE,
`s_closed_date_sk` INT,
`s_store_name` VARCHAR(50),
`s_number_employees` INT,
`s_floor_space` INT,
`s_hours` VARCHAR(20),
`s_manager` VARCHAR(40),
`s_market_id` INT,
`s_geography_class` VARCHAR(100),
`s_market_desc` VARCHAR(100),
`s_market_manager` VARCHAR(40),
`s_division_id` INT,
`s_division_name` VARCHAR(50),
`s_company_id` INT,
`s_company_name` VARCHAR(50),
`s_street_number` VARCHAR(10),
`s_street_name` VARCHAR(60),
`s_street_type` VARCHAR(15),
`s_suite_number` VARCHAR(10),
`s_city` VARCHAR(60),
`s_county` VARCHAR(30),
`s_state` VARCHAR(2),
`s_zip` VARCHAR(10),
`s_country` VARCHAR(20),
`s_gmt_offset` DECIMAL(5,2),
`s_tax_precentage` DECIMAL(5,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `store_returns`(
`sr_returned_date_sk` INT,
`sr_return_time_sk` INT,
`sr_item_sk` INT,
`sr_customer_sk` INT,
`sr_cdemo_sk` INT,
`sr_hdemo_sk` INT,
`sr_addr_sk` INT,
`sr_store_sk` INT,
`sr_reason_sk` INT,
`sr_ticket_number` INT,
`sr_return_quantity` INT,
`sr_return_amt` DECIMAL(7,2),
`sr_return_tax` DECIMAL(7,2),
`sr_return_amt_inc_tax` DECIMAL(7,2),
`sr_fee` DECIMAL(7,2),
`sr_return_ship_cost` DECIMAL(7,2),
`sr_refunded_cash` DECIMAL(7,2),
`sr_reversed_charge` DECIMAL(7,2),
`sr_store_credit` DECIMAL(7,2),
`sr_net_loss` DECIMAL(7,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `store_sales`(
`ss_sold_date_sk` INT,
`ss_sold_time_sk` INT,
`ss_item_sk` INT,
`ss_customer_sk` INT,
`ss_cdemo_sk` INT,
`ss_hdemo_sk` INT,
`ss_addr_sk` INT,
`ss_store_sk` INT,
`ss_promo_sk` INT,
`ss_ticket_number` INT,
`ss_quantity` INT,
`ss_wholesale_cost` DECIMAL(7,2),
`ss_list_price` DECIMAL(7,2),
`ss_sales_price` DECIMAL(7,2),
`ss_ext_discount_amt` DECIMAL(7,2),
`ss_ext_sales_price` DECIMAL(7,2),
`ss_ext_wholesale_cost` DECIMAL(7,2),
`ss_ext_list_price` DECIMAL(7,2),
`ss_ext_tax` DECIMAL(7,2),
`ss_coupon_amt` DECIMAL(7,2),
`ss_net_paid` DECIMAL(7,2),
`ss_net_paid_inc_tax` DECIMAL(7,2),
`ss_net_profit` DECIMAL(7,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `time_dim`(
`t_time_sk` INT,
`t_time_id` VARCHAR(16),
`t_time` INT,
`t_hour` INT,
`t_minute` INT,
`t_second` INT,
`t_am_pm` VARCHAR(2),
`t_shift` VARCHAR(20),
`t_sub_shift` VARCHAR(20),
`t_meal_time` VARCHAR(20)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `warehouse`(
`w_warehouse_sk` INT,
`w_warehouse_id` VARCHAR(16),
`w_warehouse_name` VARCHAR(20),
`w_warehouse_sq_ft` INT,
`w_street_number` VARCHAR(10),
`w_street_name` VARCHAR(60),
`w_street_type` VARCHAR(15),
`w_suite_number` VARCHAR(10),
`w_city` VARCHAR(60),
`w_county` VARCHAR(30),
`w_state` VARCHAR(2),
`w_zip` VARCHAR(10),
`w_country` VARCHAR(20),
`w_gmt_offset` DECIMAL(5,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `web_page`(
`wp_web_page_sk` INT,
`wp_web_page_id` VARCHAR(16),
`wp_rec_start_date` DATE,
`wp_rec_end_date` DATE,
`wp_creation_date_sk` INT,
`wp_access_date_sk` INT,
`wp_autogen_flag` VARCHAR(1),
`wp_customer_sk` INT,
`wp_url` VARCHAR(100),
`wp_type` VARCHAR(50),
`wp_char_count` INT,
`wp_link_count` INT,
`wp_image_count` INT,
`wp_max_ad_count` INT
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `web_returns`(
`wr_returned_date_sk` INT,
`wr_returned_time_sk` INT,
`wr_item_sk` INT,
`wr_refunded_customer_sk` INT,
`wr_refunded_cdemo_sk` INT,
`wr_refunded_hdemo_sk` INT,
`wr_refunded_addr_sk` INT,
`wr_returning_customer_sk` INT,
`wr_returning_cdemo_sk` INT,
`wr_returning_hdemo_sk` INT,
`wr_returning_addr_sk` INT,
`wr_web_page_sk` INT,
`wr_reason_sk` INT,
`wr_order_number` INT,
`wr_return_quantity` INT,
`wr_return_amt` DECIMAL(7,2),
`wr_return_tax` DECIMAL(7,2),
`wr_return_amt_inc_tax` DECIMAL(7,2),
`wr_fee` DECIMAL(7,2),
`wr_return_ship_cost` DECIMAL(7,2),
`wr_refunded_cash` DECIMAL(7,2),
`wr_reversed_charge` DECIMAL(7,2),
`wr_account_credit` DECIMAL(7,2),
`wr_net_loss` DECIMAL(7,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `web_sales`(
`ws_sold_date_sk` INT,
`ws_sold_time_sk` INT,
`ws_ship_date_sk` INT,
`ws_item_sk` INT,
`ws_bill_customer_sk` INT,
`ws_bill_cdemo_sk` INT,
`ws_bill_hdemo_sk` INT,
`ws_bill_addr_sk` INT,
`ws_ship_customer_sk` INT,
`ws_ship_cdemo_sk` INT,
`ws_ship_hdemo_sk` INT,
`ws_ship_addr_sk` INT,
`ws_web_page_sk` INT,
`ws_web_site_sk` INT,
`ws_ship_mode_sk` INT,
`ws_warehouse_sk` INT,
`ws_promo_sk` INT,
`ws_order_number` INT,
`ws_quantity` INT,
`ws_wholesale_cost` DECIMAL(7,2),
`ws_list_price` DECIMAL(7,2),
`ws_sales_price` DECIMAL(7,2),
`ws_ext_discount_amt` DECIMAL(7,2),
`ws_ext_sales_price` DECIMAL(7,2),
`ws_ext_wholesale_cost` DECIMAL(7,2),
`ws_ext_list_price` DECIMAL(7,2),
`ws_ext_tax` DECIMAL(7,2),
`ws_coupon_amt` DECIMAL(7,2),
`ws_ext_ship_cost` DECIMAL(7,2),
`ws_net_paid` DECIMAL(7,2),
`ws_net_paid_inc_tax` DECIMAL(7,2),
`ws_net_paid_inc_ship` DECIMAL(7,2),
`ws_net_paid_inc_ship_tax` DECIMAL(7,2),
`ws_net_profit` DECIMAL(7,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

CREATE TABLE `web_site`(
`web_site_sk` INT,
`web_site_id` VARCHAR(16),
`web_rec_start_date` DATE,
`web_rec_end_date` DATE,
`web_name` VARCHAR(50),
`web_open_date_sk` INT,
`web_close_date_sk` INT,
`web_class` VARCHAR(50),
`web_manager` VARCHAR(40),
`web_mkt_id` INT,
`web_mkt_class` VARCHAR(50),
`web_mkt_desc` VARCHAR(100),
`web_market_manager` VARCHAR(40),
`web_company_id` INT,
`web_company_name` VARCHAR(50),
`web_street_number` VARCHAR(10),
`web_street_name` VARCHAR(60),
`web_street_type` VARCHAR(15),
`web_suite_number` VARCHAR(10),
`web_city` VARCHAR(60),
`web_county` VARCHAR(30),
`web_state` VARCHAR(2),
`web_zip` VARCHAR(10),
`web_country` VARCHAR(20),
`web_gmt_offset` DECIMAL(5,2),
`web_tax_percentage` DECIMAL(5,2)
) STORED AS PARQUET
TBLPROPERTIES (
'parquet.compression'='LZ4'
);

2.1.1.2 Trino Syntax

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
CREATE TABLE call_center(
cc_call_center_sk INT,
cc_call_center_id VARCHAR(16),
cc_rec_start_date DATE,
cc_rec_end_date DATE,
cc_closed_date_sk INT,
cc_open_date_sk INT,
cc_name VARCHAR(50),
cc_class VARCHAR(50),
cc_employees INT,
cc_sq_ft INT,
cc_hours VARCHAR(20),
cc_manager VARCHAR(40),
cc_mkt_id INT,
cc_mkt_class VARCHAR(50),
cc_mkt_desc VARCHAR(100),
cc_market_manager VARCHAR(40),
cc_division INT,
cc_division_name VARCHAR(50),
cc_company INT,
cc_company_name VARCHAR(50),
cc_street_number VARCHAR(10),
cc_street_name VARCHAR(60),
cc_street_type VARCHAR(15),
cc_suite_number VARCHAR(10),
cc_city VARCHAR(60),
cc_county VARCHAR(30),
cc_state VARCHAR(2),
cc_zip VARCHAR(10),
cc_country VARCHAR(20),
cc_gmt_offset DECIMAL(5,2),
cc_tax_percentage DECIMAL(5,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['cc_call_center_sk'],
bucket_count = 100,
sorted_by = ARRAY['cc_call_center_sk']
);

CREATE TABLE catalog_page(
cp_catalog_page_sk INT,
cp_catalog_page_id VARCHAR(16),
cp_start_date_sk INT,
cp_end_date_sk INT,
cp_department VARCHAR(50),
cp_catalog_number INT,
cp_catalog_page_number INT,
cp_description VARCHAR(100),
cp_type VARCHAR(100)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['cp_catalog_page_sk'],
bucket_count = 100,
sorted_by = ARRAY['cp_catalog_page_sk']
);

CREATE TABLE catalog_returns(
cr_returned_date_sk INT,
cr_returned_time_sk INT,
cr_item_sk INT,
cr_refunded_customer_sk INT,
cr_refunded_cdemo_sk INT,
cr_refunded_hdemo_sk INT,
cr_refunded_addr_sk INT,
cr_returning_customer_sk INT,
cr_returning_cdemo_sk INT,
cr_returning_hdemo_sk INT,
cr_returning_addr_sk INT,
cr_call_center_sk INT,
cr_catalog_page_sk INT,
cr_ship_mode_sk INT,
cr_warehouse_sk INT,
cr_reason_sk INT,
cr_order_number INT,
cr_return_quantity INT,
cr_return_amount DECIMAL(7,2),
cr_return_tax DECIMAL(7,2),
cr_return_amt_inc_tax DECIMAL(7,2),
cr_fee DECIMAL(7,2),
cr_return_ship_cost DECIMAL(7,2),
cr_refunded_cash DECIMAL(7,2),
cr_reversed_charge DECIMAL(7,2),
cr_store_credit DECIMAL(7,2),
cr_net_loss DECIMAL(7,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['cr_returned_date_sk', 'cr_returned_time_sk'],
bucket_count = 100,
sorted_by = ARRAY['cr_returned_date_sk', 'cr_returned_time_sk']
);

CREATE TABLE catalog_sales(
cs_sold_date_sk INT,
cs_sold_time_sk INT,
cs_ship_date_sk INT,
cs_bill_customer_sk INT,
cs_bill_cdemo_sk INT,
cs_bill_hdemo_sk INT,
cs_bill_addr_sk INT,
cs_ship_customer_sk INT,
cs_ship_cdemo_sk INT,
cs_ship_hdemo_sk INT,
cs_ship_addr_sk INT,
cs_call_center_sk INT,
cs_catalog_page_sk INT,
cs_ship_mode_sk INT,
cs_warehouse_sk INT,
cs_item_sk INT,
cs_promo_sk INT,
cs_order_number INT,
cs_quantity INT,
cs_wholesale_cost DECIMAL(7,2),
cs_list_price DECIMAL(7,2),
cs_sales_price DECIMAL(7,2),
cs_ext_discount_amt DECIMAL(7,2),
cs_ext_sales_price DECIMAL(7,2),
cs_ext_wholesale_cost DECIMAL(7,2),
cs_ext_list_price DECIMAL(7,2),
cs_ext_tax DECIMAL(7,2),
cs_coupon_amt DECIMAL(7,2),
cs_ext_ship_cost DECIMAL(7,2),
cs_net_paid DECIMAL(7,2),
cs_net_paid_inc_tax DECIMAL(7,2),
cs_net_paid_inc_ship DECIMAL(7,2),
cs_net_paid_inc_ship_tax DECIMAL(7,2),
cs_net_profit DECIMAL(7,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['cs_sold_date_sk', 'cs_sold_time_sk'],
bucket_count = 100,
sorted_by = ARRAY['cs_sold_date_sk', 'cs_sold_time_sk']
);

CREATE TABLE customer(
c_customer_sk INT,
c_customer_id VARCHAR(16),
c_current_cdemo_sk INT,
c_current_hdemo_sk INT,
c_current_addr_sk INT,
c_first_shipto_date_sk INT,
c_first_sales_date_sk INT,
c_salutation VARCHAR(10),
c_first_name VARCHAR(20),
c_last_name VARCHAR(30),
c_preferred_cust_flag VARCHAR(1),
c_birth_day INT,
c_birth_month INT,
c_birth_year INT,
c_birth_country VARCHAR(20),
c_login VARCHAR(13),
c_email_address VARCHAR(50),
c_last_review_date VARCHAR(10)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['c_customer_sk'],
bucket_count = 100,
sorted_by = ARRAY['c_customer_sk']
);

CREATE TABLE customer_address(
ca_address_sk INT,
ca_address_id VARCHAR(16),
ca_street_number VARCHAR(10),
ca_street_name VARCHAR(60),
ca_street_type VARCHAR(15),
ca_suite_number VARCHAR(10),
ca_city VARCHAR(60),
ca_county VARCHAR(30),
ca_state VARCHAR(2),
ca_zip VARCHAR(10),
ca_country VARCHAR(20),
ca_gmt_offset DECIMAL(5,2),
ca_location_type VARCHAR(20)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['ca_address_sk'],
bucket_count = 100,
sorted_by = ARRAY['ca_address_sk']
);

CREATE TABLE customer_demographics(
cd_demo_sk INT,
cd_gender VARCHAR(1),
cd_marital_status VARCHAR(1),
cd_education_status VARCHAR(20),
cd_purchase_estimate INT,
cd_credit_rating VARCHAR(10),
cd_dep_count INT,
cd_dep_employed_count INT,
cd_dep_college_count INT
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['cd_demo_sk'],
bucket_count = 100,
sorted_by = ARRAY['cd_demo_sk']
);

CREATE TABLE date_dim(
d_date_sk INT,
d_date_id VARCHAR(16),
d_date DATE,
d_month_seq INT,
d_week_seq INT,
d_quarter_seq INT,
d_year INT,
d_dow INT,
d_moy INT,
d_dom INT,
d_qoy INT,
d_fy_year INT,
d_fy_quarter_seq INT,
d_fy_week_seq INT,
d_day_name VARCHAR(9),
d_quarter_name VARCHAR(6),
d_holiday VARCHAR(1),
d_weekend VARCHAR(1),
d_following_holiday VARCHAR(1),
d_first_dom INT,
d_last_dom INT,
d_same_day_ly INT,
d_same_day_lq INT,
d_current_day VARCHAR(1),
d_current_week VARCHAR(1),
d_current_month VARCHAR(1),
d_current_quarter VARCHAR(1),
d_current_year VARCHAR(1)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['d_date_sk'],
bucket_count = 100,
sorted_by = ARRAY['d_date_sk']
);

CREATE TABLE household_demographics(
hd_demo_sk INT,
hd_income_band_sk INT,
hd_buy_potential VARCHAR(15),
hd_dep_count INT,
hd_vehicle_count INT
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['hd_demo_sk'],
bucket_count = 100,
sorted_by = ARRAY['hd_demo_sk']
);

CREATE TABLE income_band(
ib_income_band_sk INT,
ib_lower_bound INT,
ib_upper_bound INT
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['ib_income_band_sk'],
bucket_count = 100,
sorted_by = ARRAY['ib_income_band_sk']
);

CREATE TABLE inventory(
inv_date_sk INT,
inv_item_sk INT,
inv_warehouse_sk INT,
inv_quantity_on_hand INT
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['inv_date_sk', 'inv_item_sk'],
bucket_count = 100,
sorted_by = ARRAY['inv_date_sk', 'inv_item_sk']
);

CREATE TABLE item(
i_item_sk INT,
i_item_id VARCHAR(16),
i_rec_start_date DATE,
i_rec_end_date DATE,
i_item_desc VARCHAR(200),
i_current_price DECIMAL(7,2),
i_wholesale_cost DECIMAL(7,2),
i_brand_id INT,
i_brand VARCHAR(50),
i_class_id INT,
i_class VARCHAR(50),
i_category_id INT,
i_category VARCHAR(50),
i_manufact_id INT,
i_manufact VARCHAR(50),
i_size VARCHAR(20),
i_formulation VARCHAR(20),
i_color VARCHAR(20),
i_units VARCHAR(10),
i_container VARCHAR(10),
i_manager_id INT,
i_product_name VARCHAR(50)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['i_item_sk'],
bucket_count = 100,
sorted_by = ARRAY['i_item_sk']
);

CREATE TABLE promotion(
p_promo_sk INT,
p_promo_id VARCHAR(16),
p_start_date_sk INT,
p_end_date_sk INT,
p_item_sk INT,
p_cost DECIMAL(15,2),
p_response_target INT,
p_promo_name VARCHAR(50),
p_channel_dmail VARCHAR(1),
p_channel_email VARCHAR(1),
p_channel_catalog VARCHAR(1),
p_channel_tv VARCHAR(1),
p_channel_radio VARCHAR(1),
p_channel_press VARCHAR(1),
p_channel_event VARCHAR(1),
p_channel_demo VARCHAR(1),
p_channel_details VARCHAR(100),
p_purpose VARCHAR(15),
p_discount_active VARCHAR(1)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['p_promo_sk'],
bucket_count = 100,
sorted_by = ARRAY['p_promo_sk']
);

CREATE TABLE reason(
r_reason_sk INT,
r_reason_id VARCHAR(16),
r_reason_desc VARCHAR(100)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['r_reason_sk'],
bucket_count = 100,
sorted_by = ARRAY['r_reason_sk']
);

CREATE TABLE ship_mode(
sm_ship_mode_sk INT,
sm_ship_mode_id VARCHAR(16),
sm_type VARCHAR(30),
sm_code VARCHAR(10),
sm_carrier VARCHAR(20),
sm_contract VARCHAR(20)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['sm_ship_mode_sk'],
bucket_count = 100,
sorted_by = ARRAY['sm_ship_mode_sk']
);

CREATE TABLE store(
s_store_sk INT,
s_store_id VARCHAR(16),
s_rec_start_date DATE,
s_rec_end_date DATE,
s_closed_date_sk INT,
s_store_name VARCHAR(50),
s_number_employees INT,
s_floor_space INT,
s_hours VARCHAR(20),
s_manager VARCHAR(40),
s_market_id INT,
s_geography_class VARCHAR(100),
s_market_desc VARCHAR(100),
s_market_manager VARCHAR(40),
s_division_id INT,
s_division_name VARCHAR(50),
s_company_id INT,
s_company_name VARCHAR(50),
s_street_number VARCHAR(10),
s_street_name VARCHAR(60),
s_street_type VARCHAR(15),
s_suite_number VARCHAR(10),
s_city VARCHAR(60),
s_county VARCHAR(30),
s_state VARCHAR(2),
s_zip VARCHAR(10),
s_country VARCHAR(20),
s_gmt_offset DECIMAL(5,2),
s_tax_precentage DECIMAL(5,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['s_store_sk'],
bucket_count = 100,
sorted_by = ARRAY['s_store_sk']
);

CREATE TABLE store_returns(
sr_returned_date_sk INT,
sr_return_time_sk INT,
sr_item_sk INT,
sr_customer_sk INT,
sr_cdemo_sk INT,
sr_hdemo_sk INT,
sr_addr_sk INT,
sr_store_sk INT,
sr_reason_sk INT,
sr_ticket_number INT,
sr_return_quantity INT,
sr_return_amt DECIMAL(7,2),
sr_return_tax DECIMAL(7,2),
sr_return_amt_inc_tax DECIMAL(7,2),
sr_fee DECIMAL(7,2),
sr_return_ship_cost DECIMAL(7,2),
sr_refunded_cash DECIMAL(7,2),
sr_reversed_charge DECIMAL(7,2),
sr_store_credit DECIMAL(7,2),
sr_net_loss DECIMAL(7,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['sr_returned_date_sk', 'sr_return_time_sk'],
bucket_count = 100,
sorted_by = ARRAY['sr_returned_date_sk', 'sr_return_time_sk']
);

CREATE TABLE store_sales(
ss_sold_date_sk INT,
ss_sold_time_sk INT,
ss_item_sk INT,
ss_customer_sk INT,
ss_cdemo_sk INT,
ss_hdemo_sk INT,
ss_addr_sk INT,
ss_store_sk INT,
ss_promo_sk INT,
ss_ticket_number INT,
ss_quantity INT,
ss_wholesale_cost DECIMAL(7,2),
ss_list_price DECIMAL(7,2),
ss_sales_price DECIMAL(7,2),
ss_ext_discount_amt DECIMAL(7,2),
ss_ext_sales_price DECIMAL(7,2),
ss_ext_wholesale_cost DECIMAL(7,2),
ss_ext_list_price DECIMAL(7,2),
ss_ext_tax DECIMAL(7,2),
ss_coupon_amt DECIMAL(7,2),
ss_net_paid DECIMAL(7,2),
ss_net_paid_inc_tax DECIMAL(7,2),
ss_net_profit DECIMAL(7,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['ss_sold_date_sk', 'ss_sold_time_sk'],
bucket_count = 100,
sorted_by = ARRAY['ss_sold_date_sk', 'ss_sold_time_sk']
);

CREATE TABLE time_dim(
t_time_sk INT,
t_time_id VARCHAR(16),
t_time INT,
t_hour INT,
t_minute INT,
t_second INT,
t_am_pm VARCHAR(2),
t_shift VARCHAR(20),
t_sub_shift VARCHAR(20),
t_meal_time VARCHAR(20)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['t_time_sk'],
bucket_count = 100,
sorted_by = ARRAY['t_time_sk']
);

CREATE TABLE warehouse(
w_warehouse_sk INT,
w_warehouse_id VARCHAR(16),
w_warehouse_name VARCHAR(20),
w_warehouse_sq_ft INT,
w_street_number VARCHAR(10),
w_street_name VARCHAR(60),
w_street_type VARCHAR(15),
w_suite_number VARCHAR(10),
w_city VARCHAR(60),
w_county VARCHAR(30),
w_state VARCHAR(2),
w_zip VARCHAR(10),
w_country VARCHAR(20),
w_gmt_offset DECIMAL(5,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['w_warehouse_sk'],
bucket_count = 100,
sorted_by = ARRAY['w_warehouse_sk']
);

CREATE TABLE web_page(
wp_web_page_sk INT,
wp_web_page_id VARCHAR(16),
wp_rec_start_date DATE,
wp_rec_end_date DATE,
wp_creation_date_sk INT,
wp_access_date_sk INT,
wp_autogen_flag VARCHAR(1),
wp_customer_sk INT,
wp_url VARCHAR(100),
wp_type VARCHAR(50),
wp_char_count INT,
wp_link_count INT,
wp_image_count INT,
wp_max_ad_count INT
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['wp_web_page_sk'],
bucket_count = 100,
sorted_by = ARRAY['wp_web_page_sk']
);

CREATE TABLE web_returns(
wr_returned_date_sk INT,
wr_returned_time_sk INT,
wr_item_sk INT,
wr_refunded_customer_sk INT,
wr_refunded_cdemo_sk INT,
wr_refunded_hdemo_sk INT,
wr_refunded_addr_sk INT,
wr_returning_customer_sk INT,
wr_returning_cdemo_sk INT,
wr_returning_hdemo_sk INT,
wr_returning_addr_sk INT,
wr_web_page_sk INT,
wr_reason_sk INT,
wr_order_number INT,
wr_return_quantity INT,
wr_return_amt DECIMAL(7,2),
wr_return_tax DECIMAL(7,2),
wr_return_amt_inc_tax DECIMAL(7,2),
wr_fee DECIMAL(7,2),
wr_return_ship_cost DECIMAL(7,2),
wr_refunded_cash DECIMAL(7,2),
wr_reversed_charge DECIMAL(7,2),
wr_account_credit DECIMAL(7,2),
wr_net_loss DECIMAL(7,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['wr_returned_date_sk', 'wr_returned_time_sk'],
bucket_count = 100,
sorted_by = ARRAY['wr_returned_date_sk', 'wr_returned_time_sk']
);

CREATE TABLE web_sales(
ws_sold_date_sk INT,
ws_sold_time_sk INT,
ws_ship_date_sk INT,
ws_item_sk INT,
ws_bill_customer_sk INT,
ws_bill_cdemo_sk INT,
ws_bill_hdemo_sk INT,
ws_bill_addr_sk INT,
ws_ship_customer_sk INT,
ws_ship_cdemo_sk INT,
ws_ship_hdemo_sk INT,
ws_ship_addr_sk INT,
ws_web_page_sk INT,
ws_web_site_sk INT,
ws_ship_mode_sk INT,
ws_warehouse_sk INT,
ws_promo_sk INT,
ws_order_number INT,
ws_quantity INT,
ws_wholesale_cost DECIMAL(7,2),
ws_list_price DECIMAL(7,2),
ws_sales_price DECIMAL(7,2),
ws_ext_discount_amt DECIMAL(7,2),
ws_ext_sales_price DECIMAL(7,2),
ws_ext_wholesale_cost DECIMAL(7,2),
ws_ext_list_price DECIMAL(7,2),
ws_ext_tax DECIMAL(7,2),
ws_coupon_amt DECIMAL(7,2),
ws_ext_ship_cost DECIMAL(7,2),
ws_net_paid DECIMAL(7,2),
ws_net_paid_inc_tax DECIMAL(7,2),
ws_net_paid_inc_ship DECIMAL(7,2),
ws_net_paid_inc_ship_tax DECIMAL(7,2),
ws_net_profit DECIMAL(7,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['ws_sold_date_sk', 'ws_sold_time_sk'],
bucket_count = 100,
sorted_by = ARRAY['ws_sold_date_sk', 'ws_sold_time_sk']
);

CREATE TABLE web_site(
web_site_sk INT,
web_site_id VARCHAR(16),
web_rec_start_date DATE,
web_rec_end_date DATE,
web_name VARCHAR(50),
web_open_date_sk INT,
web_close_date_sk INT,
web_class VARCHAR(50),
web_manager VARCHAR(40),
web_mkt_id INT,
web_mkt_class VARCHAR(50),
web_mkt_desc VARCHAR(100),
web_market_manager VARCHAR(40),
web_company_id INT,
web_company_name VARCHAR(50),
web_street_number VARCHAR(10),
web_street_name VARCHAR(60),
web_street_type VARCHAR(15),
web_suite_number VARCHAR(10),
web_city VARCHAR(60),
web_county VARCHAR(30),
web_state VARCHAR(2),
web_zip VARCHAR(10),
web_country VARCHAR(20),
web_gmt_offset DECIMAL(5,2),
web_tax_percentage DECIMAL(5,2)
) WITH (
format = 'PARQUET',
bucketed_by = ARRAY['web_site_sk'],
bucket_count = 100,
sorted_by = ARRAY['web_site_sk']
);

2.1.2 Insert Statement

Difference between hive schema and trino schema:

  • customer.c_last_review_date vs. customer.c_last_review_date_sk
  • promotion.p_response_target vs. promotion.p_response_targe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
INSERT INTO call_center (
cc_call_center_sk,
cc_call_center_id,
cc_rec_start_date,
cc_rec_end_date,
cc_closed_date_sk,
cc_open_date_sk,
cc_name,
cc_class,
cc_employees,
cc_sq_ft,
cc_hours,
cc_manager,
cc_mkt_id,
cc_mkt_class,
cc_mkt_desc,
cc_market_manager,
cc_division,
cc_division_name,
cc_company,
cc_company_name,
cc_street_number,
cc_street_name,
cc_street_type,
cc_suite_number,
cc_city,
cc_county,
cc_state,
cc_zip,
cc_country,
cc_gmt_offset,
cc_tax_percentage
) SELECT
cc_call_center_sk,
cc_call_center_id,
cc_rec_start_date,
cc_rec_end_date,
cc_closed_date_sk,
cc_open_date_sk,
cc_name,
cc_class,
cc_employees,
cc_sq_ft,
cc_hours,
cc_manager,
cc_mkt_id,
cc_mkt_class,
cc_mkt_desc,
cc_market_manager,
cc_division,
cc_division_name,
cc_company,
cc_company_name,
cc_street_number,
cc_street_name,
cc_street_type,
cc_suite_number,
cc_city,
cc_county,
cc_state,
cc_zip,
cc_country,
cc_gmt_offset,
cc_tax_percentage
FROM tpcds.sf1.call_center
ORDER BY cc_call_center_sk;

INSERT INTO catalog_page (
cp_catalog_page_sk,
cp_catalog_page_id,
cp_start_date_sk,
cp_end_date_sk,
cp_department,
cp_catalog_number,
cp_catalog_page_number,
cp_description,
cp_type
) SELECT
cp_catalog_page_sk,
cp_catalog_page_id,
cp_start_date_sk,
cp_end_date_sk,
cp_department,
cp_catalog_number,
cp_catalog_page_number,
cp_description,
cp_type
FROM tpcds.sf1.catalog_page
ORDER BY cp_catalog_page_sk;

INSERT INTO catalog_returns (
cr_returned_date_sk,
cr_returned_time_sk,
cr_item_sk,
cr_refunded_customer_sk,
cr_refunded_cdemo_sk,
cr_refunded_hdemo_sk,
cr_refunded_addr_sk,
cr_returning_customer_sk,
cr_returning_cdemo_sk,
cr_returning_hdemo_sk,
cr_returning_addr_sk,
cr_call_center_sk,
cr_catalog_page_sk,
cr_ship_mode_sk,
cr_warehouse_sk,
cr_reason_sk,
cr_order_number,
cr_return_quantity,
cr_return_amount,
cr_return_tax,
cr_return_amt_inc_tax,
cr_fee,
cr_return_ship_cost,
cr_refunded_cash,
cr_reversed_charge,
cr_store_credit,
cr_net_loss
) SELECT
cr_returned_date_sk,
cr_returned_time_sk,
cr_item_sk,
cr_refunded_customer_sk,
cr_refunded_cdemo_sk,
cr_refunded_hdemo_sk,
cr_refunded_addr_sk,
cr_returning_customer_sk,
cr_returning_cdemo_sk,
cr_returning_hdemo_sk,
cr_returning_addr_sk,
cr_call_center_sk,
cr_catalog_page_sk,
cr_ship_mode_sk,
cr_warehouse_sk,
cr_reason_sk,
cr_order_number,
cr_return_quantity,
cr_return_amount,
cr_return_tax,
cr_return_amt_inc_tax,
cr_fee,
cr_return_ship_cost,
cr_refunded_cash,
cr_reversed_charge,
cr_store_credit,
cr_net_loss
FROM tpcds.sf1.catalog_returns
ORDER BY cr_returned_date_sk, cr_returned_time_sk;

INSERT INTO catalog_sales (
cs_sold_date_sk,
cs_sold_time_sk,
cs_ship_date_sk,
cs_bill_customer_sk,
cs_bill_cdemo_sk,
cs_bill_hdemo_sk,
cs_bill_addr_sk,
cs_ship_customer_sk,
cs_ship_cdemo_sk,
cs_ship_hdemo_sk,
cs_ship_addr_sk,
cs_call_center_sk,
cs_catalog_page_sk,
cs_ship_mode_sk,
cs_warehouse_sk,
cs_item_sk,
cs_promo_sk,
cs_order_number,
cs_quantity,
cs_wholesale_cost,
cs_list_price,
cs_sales_price,
cs_ext_discount_amt,
cs_ext_sales_price,
cs_ext_wholesale_cost,
cs_ext_list_price,
cs_ext_tax,
cs_coupon_amt,
cs_ext_ship_cost,
cs_net_paid,
cs_net_paid_inc_tax,
cs_net_paid_inc_ship,
cs_net_paid_inc_ship_tax,
cs_net_profit
) SELECT
cs_sold_date_sk,
cs_sold_time_sk,
cs_ship_date_sk,
cs_bill_customer_sk,
cs_bill_cdemo_sk,
cs_bill_hdemo_sk,
cs_bill_addr_sk,
cs_ship_customer_sk,
cs_ship_cdemo_sk,
cs_ship_hdemo_sk,
cs_ship_addr_sk,
cs_call_center_sk,
cs_catalog_page_sk,
cs_ship_mode_sk,
cs_warehouse_sk,
cs_item_sk,
cs_promo_sk,
cs_order_number,
cs_quantity,
cs_wholesale_cost,
cs_list_price,
cs_sales_price,
cs_ext_discount_amt,
cs_ext_sales_price,
cs_ext_wholesale_cost,
cs_ext_list_price,
cs_ext_tax,
cs_coupon_amt,
cs_ext_ship_cost,
cs_net_paid,
cs_net_paid_inc_tax,
cs_net_paid_inc_ship,
cs_net_paid_inc_ship_tax,
cs_net_profit
FROM tpcds.sf1.catalog_sales
ORDER BY cs_sold_date_sk, cs_sold_time_sk;

INSERT INTO customer (
c_customer_sk,
c_customer_id,
c_current_cdemo_sk,
c_current_hdemo_sk,
c_current_addr_sk,
c_first_shipto_date_sk,
c_first_sales_date_sk,
c_salutation,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_day,
c_birth_month,
c_birth_year,
c_birth_country,
c_login,
c_email_address,
c_last_review_date
) SELECT
c_customer_sk,
c_customer_id,
c_current_cdemo_sk,
c_current_hdemo_sk,
c_current_addr_sk,
c_first_shipto_date_sk,
c_first_sales_date_sk,
c_salutation,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_day,
c_birth_month,
c_birth_year,
c_birth_country,
c_login,
c_email_address,
CAST(c_last_review_date_sk AS VARCHAR(10))
FROM tpcds.sf1.customer
ORDER BY c_customer_sk;

INSERT INTO customer_address (
ca_address_sk,
ca_address_id,
ca_street_number,
ca_street_name,
ca_street_type,
ca_suite_number,
ca_city,
ca_county,
ca_state,
ca_zip,
ca_country,
ca_gmt_offset,
ca_location_type
) SELECT
ca_address_sk,
ca_address_id,
ca_street_number,
ca_street_name,
ca_street_type,
ca_suite_number,
ca_city,
ca_county,
ca_state,
ca_zip,
ca_country,
ca_gmt_offset,
ca_location_type
FROM tpcds.sf1.customer_address
ORDER BY ca_address_sk;

INSERT INTO customer_demographics (
cd_demo_sk,
cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
) SELECT
cd_demo_sk,
cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
FROM tpcds.sf1.customer_demographics
ORDER BY cd_demo_sk;

INSERT INTO date_dim (
d_date_sk,
d_date_id,
d_date,
d_month_seq,
d_week_seq,
d_quarter_seq,
d_year,
d_dow,
d_moy,
d_dom,
d_qoy,
d_fy_year,
d_fy_quarter_seq,
d_fy_week_seq,
d_day_name,
d_quarter_name,
d_holiday,
d_weekend,
d_following_holiday,
d_first_dom,
d_last_dom,
d_same_day_ly,
d_same_day_lq,
d_current_day,
d_current_week,
d_current_month,
d_current_quarter,
d_current_year
) SELECT
d_date_sk,
d_date_id,
d_date,
d_month_seq,
d_week_seq,
d_quarter_seq,
d_year,
d_dow,
d_moy,
d_dom,
d_qoy,
d_fy_year,
d_fy_quarter_seq,
d_fy_week_seq,
d_day_name,
d_quarter_name,
d_holiday,
d_weekend,
d_following_holiday,
d_first_dom,
d_last_dom,
d_same_day_ly,
d_same_day_lq,
d_current_day,
d_current_week,
d_current_month,
d_current_quarter,
d_current_year
FROM tpcds.sf1.date_dim
ORDER BY d_date_sk;

INSERT INTO household_demographics (
hd_demo_sk,
hd_income_band_sk,
hd_buy_potential,
hd_dep_count,
hd_vehicle_count
) SELECT
hd_demo_sk,
hd_income_band_sk,
hd_buy_potential,
hd_dep_count,
hd_vehicle_count
FROM tpcds.sf1.household_demographics
ORDER BY hd_demo_sk;

INSERT INTO income_band (
ib_income_band_sk,
ib_lower_bound,
ib_upper_bound
) SELECT
ib_income_band_sk,
ib_lower_bound,
ib_upper_bound
FROM tpcds.sf1.income_band
ORDER BY ib_income_band_sk;

INSERT INTO inventory (
inv_date_sk,
inv_item_sk,
inv_warehouse_sk,
inv_quantity_on_hand
) SELECT
inv_date_sk,
inv_item_sk,
inv_warehouse_sk,
inv_quantity_on_hand
FROM tpcds.sf1.inventory
ORDER BY inv_date_sk, inv_item_sk;

INSERT INTO item (
i_item_sk,
i_item_id,
i_rec_start_date,
i_rec_end_date,
i_item_desc,
i_current_price,
i_wholesale_cost,
i_brand_id,
i_brand,
i_class_id,
i_class,
i_category_id,
i_category,
i_manufact_id,
i_manufact,
i_size,
i_formulation,
i_color,
i_units,
i_container,
i_manager_id,
i_product_name
) SELECT
i_item_sk,
i_item_id,
i_rec_start_date,
i_rec_end_date,
i_item_desc,
i_current_price,
i_wholesale_cost,
i_brand_id,
i_brand,
i_class_id,
i_class,
i_category_id,
i_category,
i_manufact_id,
i_manufact,
i_size,
i_formulation,
i_color,
i_units,
i_container,
i_manager_id,
i_product_name
FROM tpcds.sf1.item
ORDER BY i_item_sk;

INSERT INTO promotion (
p_promo_sk,
p_promo_id,
p_start_date_sk,
p_end_date_sk,
p_item_sk,
p_cost,
p_response_target,
p_promo_name,
p_channel_dmail,
p_channel_email,
p_channel_catalog,
p_channel_tv,
p_channel_radio,
p_channel_press,
p_channel_event,
p_channel_demo,
p_channel_details,
p_purpose,
p_discount_active
) SELECT
p_promo_sk,
p_promo_id,
p_start_date_sk,
p_end_date_sk,
p_item_sk,
p_cost,
p_response_targe,
p_promo_name,
p_channel_dmail,
p_channel_email,
p_channel_catalog,
p_channel_tv,
p_channel_radio,
p_channel_press,
p_channel_event,
p_channel_demo,
p_channel_details,
p_purpose,
p_discount_active
FROM tpcds.sf1.promotion
ORDER BY p_promo_sk;

INSERT INTO reason (
r_reason_sk,
r_reason_id,
r_reason_desc
) SELECT
r_reason_sk,
r_reason_id,
r_reason_desc
FROM tpcds.sf1.reason
ORDER BY r_reason_sk;

INSERT INTO ship_mode (
sm_ship_mode_sk,
sm_ship_mode_id,
sm_type,
sm_code,
sm_carrier,
sm_contract
) SELECT
sm_ship_mode_sk,
sm_ship_mode_id,
sm_type,
sm_code,
sm_carrier,
sm_contract
FROM tpcds.sf1.ship_mode
ORDER BY sm_ship_mode_sk;

INSERT INTO store (
s_store_sk,
s_store_id,
s_rec_start_date,
s_rec_end_date,
s_closed_date_sk,
s_store_name,
s_number_employees,
s_floor_space,
s_hours,
s_manager,
s_market_id,
s_geography_class,
s_market_desc,
s_market_manager,
s_division_id,
s_division_name,
s_company_id,
s_company_name,
s_street_number,
s_street_name,
s_street_type,
s_suite_number,
s_city,
s_county,
s_state,
s_zip,
s_country,
s_gmt_offset,
s_tax_precentage
) SELECT
s_store_sk,
s_store_id,
s_rec_start_date,
s_rec_end_date,
s_closed_date_sk,
s_store_name,
s_number_employees,
s_floor_space,
s_hours,
s_manager,
s_market_id,
s_geography_class,
s_market_desc,
s_market_manager,
s_division_id,
s_division_name,
s_company_id,
s_company_name,
s_street_number,
s_street_name,
s_street_type,
s_suite_number,
s_city,
s_county,
s_state,
s_zip,
s_country,
s_gmt_offset,
s_tax_precentage
FROM tpcds.sf1.store
ORDER BY s_store_sk;

INSERT INTO store_returns (
sr_returned_date_sk,
sr_return_time_sk,
sr_item_sk,
sr_customer_sk,
sr_cdemo_sk,
sr_hdemo_sk,
sr_addr_sk,
sr_store_sk,
sr_reason_sk,
sr_ticket_number,
sr_return_quantity,
sr_return_amt,
sr_return_tax,
sr_return_amt_inc_tax,
sr_fee,
sr_return_ship_cost,
sr_refunded_cash,
sr_reversed_charge,
sr_store_credit,
sr_net_loss
) SELECT
sr_returned_date_sk,
sr_return_time_sk,
sr_item_sk,
sr_customer_sk,
sr_cdemo_sk,
sr_hdemo_sk,
sr_addr_sk,
sr_store_sk,
sr_reason_sk,
sr_ticket_number,
sr_return_quantity,
sr_return_amt,
sr_return_tax,
sr_return_amt_inc_tax,
sr_fee,
sr_return_ship_cost,
sr_refunded_cash,
sr_reversed_charge,
sr_store_credit,
sr_net_loss
FROM tpcds.sf1.store_returns
ORDER BY sr_returned_date_sk, sr_return_time_sk;

INSERT INTO store_sales (
ss_sold_date_sk,
ss_sold_time_sk,
ss_item_sk,
ss_customer_sk,
ss_cdemo_sk,
ss_hdemo_sk,
ss_addr_sk,
ss_store_sk,
ss_promo_sk,
ss_ticket_number,
ss_quantity,
ss_wholesale_cost,
ss_list_price,
ss_sales_price,
ss_ext_discount_amt,
ss_ext_sales_price,
ss_ext_wholesale_cost,
ss_ext_list_price,
ss_ext_tax,
ss_coupon_amt,
ss_net_paid,
ss_net_paid_inc_tax,
ss_net_profit
) SELECT
ss_sold_date_sk,
ss_sold_time_sk,
ss_item_sk,
ss_customer_sk,
ss_cdemo_sk,
ss_hdemo_sk,
ss_addr_sk,
ss_store_sk,
ss_promo_sk,
ss_ticket_number,
ss_quantity,
ss_wholesale_cost,
ss_list_price,
ss_sales_price,
ss_ext_discount_amt,
ss_ext_sales_price,
ss_ext_wholesale_cost,
ss_ext_list_price,
ss_ext_tax,
ss_coupon_amt,
ss_net_paid,
ss_net_paid_inc_tax,
ss_net_profit
FROM tpcds.sf1.store_sales
ORDER BY ss_sold_date_sk, ss_sold_time_sk;

INSERT INTO time_dim (
t_time_sk,
t_time_id,
t_time,
t_hour,
t_minute,
t_second,
t_am_pm,
t_shift,
t_sub_shift,
t_meal_time
) SELECT
t_time_sk,
t_time_id,
t_time,
t_hour,
t_minute,
t_second,
t_am_pm,
t_shift,
t_sub_shift,
t_meal_time
FROM tpcds.sf1.time_dim
ORDER BY t_time_sk;

INSERT INTO warehouse (
w_warehouse_sk,
w_warehouse_id,
w_warehouse_name,
w_warehouse_sq_ft,
w_street_number,
w_street_name,
w_street_type,
w_suite_number,
w_city,
w_county,
w_state,
w_zip,
w_country,
w_gmt_offset
) SELECT
w_warehouse_sk,
w_warehouse_id,
w_warehouse_name,
w_warehouse_sq_ft,
w_street_number,
w_street_name,
w_street_type,
w_suite_number,
w_city,
w_county,
w_state,
w_zip,
w_country,
w_gmt_offset
FROM tpcds.sf1.warehouse
ORDER BY w_warehouse_sk;

INSERT INTO web_page (
wp_web_page_sk,
wp_web_page_id,
wp_rec_start_date,
wp_rec_end_date,
wp_creation_date_sk,
wp_access_date_sk,
wp_autogen_flag,
wp_customer_sk,
wp_url,
wp_type,
wp_char_count,
wp_link_count,
wp_image_count,
wp_max_ad_count
) SELECT
wp_web_page_sk,
wp_web_page_id,
wp_rec_start_date,
wp_rec_end_date,
wp_creation_date_sk,
wp_access_date_sk,
wp_autogen_flag,
wp_customer_sk,
wp_url,
wp_type,
wp_char_count,
wp_link_count,
wp_image_count,
wp_max_ad_count
FROM tpcds.sf1.web_page
ORDER BY wp_web_page_sk;

INSERT INTO web_returns (
wr_returned_date_sk,
wr_returned_time_sk,
wr_item_sk,
wr_refunded_customer_sk,
wr_refunded_cdemo_sk,
wr_refunded_hdemo_sk,
wr_refunded_addr_sk,
wr_returning_customer_sk,
wr_returning_cdemo_sk,
wr_returning_hdemo_sk,
wr_returning_addr_sk,
wr_web_page_sk,
wr_reason_sk,
wr_order_number,
wr_return_quantity,
wr_return_amt,
wr_return_tax,
wr_return_amt_inc_tax,
wr_fee,
wr_return_ship_cost,
wr_refunded_cash,
wr_reversed_charge,
wr_account_credit,
wr_net_loss
) SELECT
wr_returned_date_sk,
wr_returned_time_sk,
wr_item_sk,
wr_refunded_customer_sk,
wr_refunded_cdemo_sk,
wr_refunded_hdemo_sk,
wr_refunded_addr_sk,
wr_returning_customer_sk,
wr_returning_cdemo_sk,
wr_returning_hdemo_sk,
wr_returning_addr_sk,
wr_web_page_sk,
wr_reason_sk,
wr_order_number,
wr_return_quantity,
wr_return_amt,
wr_return_tax,
wr_return_amt_inc_tax,
wr_fee,
wr_return_ship_cost,
wr_refunded_cash,
wr_reversed_charge,
wr_account_credit,
wr_net_loss
FROM tpcds.sf1.web_returns
ORDER BY wr_returned_date_sk, wr_returned_time_sk;

INSERT INTO web_sales (
ws_sold_date_sk,
ws_sold_time_sk,
ws_ship_date_sk,
ws_item_sk,
ws_bill_customer_sk,
ws_bill_cdemo_sk,
ws_bill_hdemo_sk,
ws_bill_addr_sk,
ws_ship_customer_sk,
ws_ship_cdemo_sk,
ws_ship_hdemo_sk,
ws_ship_addr_sk,
ws_web_page_sk,
ws_web_site_sk,
ws_ship_mode_sk,
ws_warehouse_sk,
ws_promo_sk,
ws_order_number,
ws_quantity,
ws_wholesale_cost,
ws_list_price,
ws_sales_price,
ws_ext_discount_amt,
ws_ext_sales_price,
ws_ext_wholesale_cost,
ws_ext_list_price,
ws_ext_tax,
ws_coupon_amt,
ws_ext_ship_cost,
ws_net_paid,
ws_net_paid_inc_tax,
ws_net_paid_inc_ship,
ws_net_paid_inc_ship_tax,
ws_net_profit
) SELECT
ws_sold_date_sk,
ws_sold_time_sk,
ws_ship_date_sk,
ws_item_sk,
ws_bill_customer_sk,
ws_bill_cdemo_sk,
ws_bill_hdemo_sk,
ws_bill_addr_sk,
ws_ship_customer_sk,
ws_ship_cdemo_sk,
ws_ship_hdemo_sk,
ws_ship_addr_sk,
ws_web_page_sk,
ws_web_site_sk,
ws_ship_mode_sk,
ws_warehouse_sk,
ws_promo_sk,
ws_order_number,
ws_quantity,
ws_wholesale_cost,
ws_list_price,
ws_sales_price,
ws_ext_discount_amt,
ws_ext_sales_price,
ws_ext_wholesale_cost,
ws_ext_list_price,
ws_ext_tax,
ws_coupon_amt,
ws_ext_ship_cost,
ws_net_paid,
ws_net_paid_inc_tax,
ws_net_paid_inc_ship,
ws_net_paid_inc_ship_tax,
ws_net_profit
FROM tpcds.sf1.web_sales
ORDER BY ws_sold_date_sk, ws_sold_time_sk;

INSERT INTO web_site (
web_site_sk,
web_site_id,
web_rec_start_date,
web_rec_end_date,
web_name,
web_open_date_sk,
web_close_date_sk,
web_class,
web_manager,
web_mkt_id,
web_mkt_class,
web_mkt_desc,
web_market_manager,
web_company_id,
web_company_name,
web_street_number,
web_street_name,
web_street_type,
web_suite_number,
web_city,
web_county,
web_state,
web_zip,
web_country,
web_gmt_offset,
web_tax_percentage
) SELECT
web_site_sk,
web_site_id,
web_rec_start_date,
web_rec_end_date,
web_name,
web_open_date_sk,
web_close_date_sk,
web_class,
web_manager,
web_mkt_id,
web_mkt_class,
web_mkt_desc,
web_market_manager,
web_company_id,
web_company_name,
web_street_number,
web_street_name,
web_street_type,
web_suite_number,
web_city,
web_county,
web_state,
web_zip,
web_country,
web_gmt_offset,
web_tax_percentage
FROM tpcds.sf1.web_site
ORDER BY web_site_sk;

2.1.3 Drop Table Statement

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
DROP TABLE call_center;
DROP TABLE catalog_page;
DROP TABLE catalog_returns;
DROP TABLE catalog_sales;
DROP TABLE customer;
DROP TABLE customer_address;
DROP TABLE customer_demographics;
DROP TABLE date_dim;
DROP TABLE household_demographics;
DROP TABLE income_band;
DROP TABLE inventory;
DROP TABLE item;
DROP TABLE promotion;
DROP TABLE reason;
DROP TABLE ship_mode;
DROP TABLE store;
DROP TABLE store_returns;
DROP TABLE store_sales;
DROP TABLE time_dim;
DROP TABLE warehouse;
DROP TABLE web_page;
DROP TABLE web_returns;
DROP TABLE web_sales;
DROP TABLE web_site;

2.2 TPC-H

2.2.1 Create Table Statement for Hive

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
CREATE TABLE `customer`(
`c_custkey` INT,
`c_name` VARCHAR(25),
`c_address` VARCHAR(40),
`c_nationkey` INT,
`c_phone` VARCHAR(15),
`c_acctbal` DECIMAL(15,2),
`c_mktsegment` VARCHAR(10),
`c_comment` VARCHAR(117)
) STORED AS ORC
TBLPROPERTIES (
'orc.compression'='LZ4'
);

CREATE TABLE `lineitem`(
`l_orderkey` BIGINT,
`l_partkey` INT,
`l_suppkey` INT,
`l_linenumber` INT,
`l_quantity` DECIMAL(15,2),
`l_extendedprice` DECIMAL(15,2),
`l_discount` DECIMAL(15,2),
`l_tax` DECIMAL(15,2),
`l_returnflag` VARCHAR(1),
`l_linestatus` VARCHAR(1),
`l_shipdate` DATE,
`l_commitdate` DATE,
`l_receiptdate` DATE,
`l_shipinstruct` VARCHAR(25),
`l_shipmode` VARCHAR(10),
`l_comment` VARCHAR(44)
) STORED AS ORC
TBLPROPERTIES (
'orc.compression'='LZ4'
);

CREATE TABLE `nation`(
`n_nationkey` INT,
`n_name` VARCHAR(25),
`n_regionkey` INT,
`n_comment` VARCHAR(152)
) STORED AS ORC
TBLPROPERTIES (
'orc.compression'='LZ4'
);

CREATE TABLE `orders`(
`o_orderkey` BIGINT,
`o_custkey` INT,
`o_orderstatus` VARCHAR(1),
`o_totalprice` DECIMAL(15,2),
`o_orderdate` DATE,
`o_orderpriority` VARCHAR(15),
`o_clerk` VARCHAR(15),
`o_shippriority` INT,
`o_comment` VARCHAR(79)
) STORED AS ORC
TBLPROPERTIES (
'orc.compression'='LZ4'
);

CREATE TABLE `part`(
`p_partkey` INT,
`p_name` VARCHAR(55),
`p_mfgr` VARCHAR(25),
`p_brand` VARCHAR(10),
`p_type` VARCHAR(25),
`p_size` INT,
`p_container` VARCHAR(10),
`p_retailprice` DECIMAL(15,2),
`p_comment` VARCHAR(23)
) STORED AS ORC
TBLPROPERTIES (
'orc.compression'='LZ4'
);

CREATE TABLE `partsupp`(
`ps_partkey` INT,
`ps_suppkey` INT,
`ps_availqty` INT,
`ps_supplycost` DECIMAL(15,2),
`ps_comment` VARCHAR(199)
) STORED AS ORC
TBLPROPERTIES (
'orc.compression'='LZ4'
);

CREATE TABLE `region`(
`r_regionkey` INT,
`r_name` VARCHAR(25),
`r_comment` VARCHAR(152)
) STORED AS ORC
TBLPROPERTIES (
'orc.compression'='LZ4'
);

CREATE TABLE `supplier`(
`s_suppkey` INT,
`s_name` VARCHAR(25),
`s_address` VARCHAR(40),
`s_nationkey` INT,
`s_phone` VARCHAR(15),
`s_acctbal` DECIMAL(15,2),
`s_comment` VARCHAR(101)
) STORED AS ORC
TBLPROPERTIES (
'orc.compression'='LZ4'
);

2.2.2 Insert Statement

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
INSERT INTO customer (
c_custkey,
c_name,
c_address,
c_nationkey,
c_phone,
c_acctbal,
c_mktsegment,
c_comment
) SELECT
custkey,
name,
address,
nationkey,
phone,
acctbal,
mktsegment,
comment
FROM tpch.sf1.customer;

INSERT INTO lineitem (
l_orderkey,
l_partkey,
l_suppkey,
l_linenumber,
l_quantity,
l_extendedprice,
l_discount,
l_tax,
l_returnflag,
l_linestatus,
l_shipdate,
l_commitdate,
l_receiptdate,
l_shipinstruct,
l_shipmode,
l_comment
) SELECT
orderkey,
partkey,
suppkey,
linenumber,
quantity,
extendedprice,
discount,
tax,
returnflag,
linestatus,
shipdate,
commitdate,
receiptdate,
shipinstruct,
shipmode,
comment
FROM tpch.sf1.lineitem;

INSERT INTO nation (
n_nationkey,
n_name,
n_regionkey,
n_comment
) SELECT
nationkey,
name,
regionkey,
comment
FROM tpch.sf1.nation;

INSERT INTO orders (
o_orderkey,
o_custkey,
o_orderstatus,
o_totalprice,
o_orderdate,
o_orderpriority,
o_clerk,
o_shippriority,
o_comment
) SELECT
orderkey,
custkey,
orderstatus,
totalprice,
orderdate,
orderpriority,
clerk,
shippriority,
comment
FROM tpch.sf1.orders;

INSERT INTO part (
p_partkey,
p_name,
p_mfgr,
p_brand,
p_type,
p_size,
p_container,
p_retailprice,
p_comment
) SELECT
partkey,
name,
mfgr,
brand,
type,
size,
container,
retailprice,
comment
FROM tpch.sf1.part;

INSERT INTO partsupp (
ps_partkey,
ps_suppkey,
ps_availqty,
ps_supplycost,
ps_comment
) SELECT
partkey,
suppkey,
availqty,
supplycost,
comment
FROM tpch.sf1.partsupp;

INSERT INTO region (
r_regionkey,
r_name,
r_comment
) SELECT
regionkey,
name,
comment
FROM tpch.sf1.region;

INSERT INTO supplier (
s_suppkey,
s_name,
s_address,
s_nationkey,
s_phone,
s_acctbal,
s_comment
) SELECT
suppkey,
name,
address,
nationkey,
phone,
acctbal,
comment
FROM tpch.sf1.supplier;

2.2.3 Drop Table Statement

1
2
3
4
5
6
7
8
DROP TABLE customer;
DROP TABLE lineitem;
DROP TABLE nation;
DROP TABLE orders;
DROP TABLE part;
DROP TABLE partsupp;
DROP TABLE region;
DROP TABLE supplier;
1
2
SHOW SESSION LIKE '%hive.parquet%';
SHOW SESSION LIKE '%hive.orc%';

3 Sql

3.1 Drop Schema With Everything Contains

1
DROP SCHEMA xxx CASCADE;

3.2 How to export table to csv file

1
trino --output-format CSV --execute "SELECT * FROM tpch.sf1.nation" > nation.csv

Or

1
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpch.sf1.nation" > nation.csv

3.2.1 TPCH

1
2
3
4
5
6
7
8
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpch.sf1.customer limit 16" > customer.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpch.sf1.lineitem limit 16" > lineitem.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpch.sf1.nation limit 16" > nation.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpch.sf1.orders limit 16" > orders.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpch.sf1.part limit 16" > part.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpch.sf1.partsupp limit 16" > partsupp.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpch.sf1.region limit 16" > region.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpch.sf1.supplier limit 16" > supplier.csv

3.2.2 TPCDS

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.call_center limit 16" > call_center.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.catalog_page limit 16" > catalog_page.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.catalog_returns limit 16" > catalog_returns.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.catalog_sales limit 16" > catalog_sales.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.customer limit 16" > customer.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.customer_address limit 16" > customer_address.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.customer_demographics limit 16" > customer_demographics.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.date_dim limit 16" > date_dim.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.household_demographics limit 16" > household_demographics.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.income_band limit 16" > income_band.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.inventory limit 16" > inventory.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.item limit 16" > item.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.promotion limit 16" > promotion.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.reason limit 16" > reason.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.ship_mode limit 16" > ship_mode.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.store limit 16" > store.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.store_returns limit 16" > store_returns.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.store_sales limit 16" > store_sales.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.time_dim limit 16" > time_dim.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.warehouse limit 16" > warehouse.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.web_page limit 16" > web_page.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.web_returns limit 16" > web_returns.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.web_sales limit 16" > web_sales.csv
docker exec -it trino trino --output-format CSV --execute "SELECT * FROM tpcds.sf1.web_site limit 16" > web_site.csv