他的回复:
若upstream_status或upstream_addr_priv_ip包含多个状态码或ip地址,可使用以下建表语句:CREATE EXTERNAL TABLE IF NOT EXISTS elb_logs ( msec string, access_log_topic_id string, time_iso8601 string, log_ver string, remote_addr string, remote_port string, status string, request string, request_length int, bytes_sent int, body_bytes_sent int, request_time double, upstream_status string, upstream_connect_time string, upstream_header_time string, upstream_response_time string, upstream_addr_port string, http_user_agent string, http_referer string, http_x_forwarded_for string, lb_name string, listener_name string, listener_id string, pool_name string, member_name string, tenant_id string, eip_address string, eip_port string, upstream_addr_priv_ip_port string, certificate_id string, ssl_protocol string, ssl_cipher string, sni_domain_name string, tcpinfo_rtt int ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = '1', 'input.regex' = '([^ ]*) ([^ ]*) \\[([^ ]*)\\] ([^ ]*) ([^ ]*):([0-9]*) (|[-0-9]*) (\\\"[^\\\"]*\\\") (|[-0-9]*) (|[-0-9]*) (|[-0-9]*) ([-.0-9]*) (\\\"[^\\\"]*\\\") (\\\"[^\\\"]*\\\") (\\\"[^\\\"]*\\\") (\\\"[^\\\"]*\\\") (\\\"[^\\\"]*\\\") (\\\"[^\\\"]*\\\") (\\\"[^\\\"]*\\\") (\\\"[^\\\"]*\\\") ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) (\\\"[^\\\"]*\\\") ([^ ]*) ([^ ]*):([0-9]*) (\\\"[^\\\"]*\\\") ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) (|[-0-9]*)' ) LOCATION 'obs://xxx/xxx';