BigQuery Json Data
Using function to clean json data
from google.cloud import bigquery
location = "asia-northeast3"
my_account = "# rho715"
def runQuery_DF(query):
final_query = "\n".join([my_account, query]) #add my account info to query input
client = bigquery.Client(location=location)
data = client.query(final_query).to_dataframe() #run query
client.close()
print('\nquery : ' , final_query)
return data
query = '''
CREATE TEMP FUNCTION del_json_field(json_str STRING, key_name STRING)
RETURNS STRING
LANGUAGE js
AS r"""
json_obj = JSON.parse(json_str)
delete json_obj[key_name]
return JSON.stringify(json_obj)
""";
select
'{"appVer":"5.0.2","carrier":"none","country":"KR","destIp":"125.132.54.23","deviceName":"Box"}' STR,
del_json_field('{"appVer":"5.0.2","carrier":"none","country":"KR","destIp":"125.132.54.23","deviceName":"Box"}', "destIp")
;
'''
import pandas as pd
pd.set_option('display.max_colwidth', None)
runQuery_DF(query)
STR | f0_ | |
---|---|---|
0 | {"appVer":"5.0.2","carrier":"none","country":"KR","destIp":"125.132.54.23","deviceName":"Box"} | {"appVer":"5.0.2","carrier":"none","country":"KR","deviceName":"Box"} |
Using json_extract_scalar
& json_extract
from google.cloud import bigquery
location = "asia-northeast3"
my_account = "# rho715"
def runQuery_DF(query):
final_query = "\n".join([my_account, query]) #add my account info to query input
client = bigquery.Client(location=location)
data = client.query(final_query).to_dataframe() #run query
client.close()
print('\nquery : ' , final_query)
return data
query = '''
with data_sample as (
select
[struct('{"action_item":"sns_share","action_param":{"optional":{"share_type":"facebook"},"required":{"id":"01_0001.1","type":"vod"}},"action_type":"button_click","current":"content_detail"}' as json_data, current_date as yyyy_mm_dd),
struct('{"action_item":"sns_share","action_param":{"optional":{"share_type":"facebook"},"required":{"id":"01_0001.2","type":"qvod"}},"action_type":"button_click","current":"content_detail"}' as json_data, current_date as yyyy_mm_dd)
] as struct_sample
)
select yyyy_mm_dd
, json_extract_scalar(json_data, '$.action_item') `action_item` --json key
, json_extract(json_data, '$.action_param') `action_param` --json value
from data_sample, unnest(struct_sample)
'''
import pandas as pd
pd.set_option('display.max_colwidth', None)
runQuery_DF(query)
yyyy_mm_dd | action_item | action_param | |
---|---|---|---|
0 | 2023-01-28 | sns_share | {"optional":{"share_type":"facebook"},"required":{"id":"01_0001.1","type":"vod"}} |
1 | 2023-01-28 | sns_share | {"optional":{"share_type":"facebook"},"required":{"id":"01_0001.2","type":"qvod"}} |
Using JSON_QUERY_ARRAY
& UNNEST()
from google.cloud import bigquery
location = "asia-northeast3"
my_account = "# rho715"
def runQuery_DF(query):
final_query = "\n".join([my_account, query]) #add my account info to query input
client = bigquery.Client(location=location)
data = client.query(final_query).to_dataframe() #run query
client.close()
print('\nquery : ' , final_query)
return data
query = '''
with data_sample as (
select
[struct('{"agent":"SM-N986N","app_version":"none","data":{"action_type":"register","action_value":[{"content_id":"2101","content_type":"live"}]},"device":"android","log_timestamp":"2022-11-11 03:59:59","log_type":"","log_version":"1.2.2","profileid":"0","service":"cholo","uno":"abved","zone":"none"}' as data_json, current_timestamp() as created_at),
struct('{"agent":"iPhone12,3","app_version":"none","data":{"action_type":"delete_all","action_value":[{"content_id":"0079","content_type":"movie"},{"content_id":"EN394","content_type":"theme"},{"content_id":"EN394","content_type":"theme"}]},"device":"ios","log_timestamp":"2022-11-10 23:58:05","log_type":"","log_version":"1.2.2","profileid":"0","service":"cholo","uno":"zdfoe","zone":"none"}' as data_json, current_timestamp() as created_at),
struct('{"agent":"iPhone12,3","app_version":"none","data":{"action_type":"delete","action_value":[{"content_id":"0063","content_type":"program"}]},"device":"ios","log_timestamp":"2022-11-10 23:58:05","log_type":"","log_version":"1.2.2","profileid":"0","service":"cholo","uno":"jojojo","zone":"none"}' as data_json, current_timestamp() as created_at)
] as struct_sample
)
, struct_to_rows as (
select created_at
, data_json
from data_sample, unnest(struct_sample)
)
, unnest_action_value as (
SELECT
JSON_QUERY_ARRAY(data_json, "$.data.action_value") `action_value_array` --array
, created_at
, data_json data
FROM struct_to_rows
WHERE
json_extract_scalar(data_json, '$.uno') != 'test'
and json_extract_scalar(data_json, '$.log_type') != '1.1.1'
)
select
-- common log
json_extract_scalar(data, '$.log_type') `log_type`
, json_extract_scalar(data, '$.log_timestamp') `log_timestamp`
, TIMESTAMP_ADD(TIMESTAMP(created_at), INTERVAL 9 HOUR) `ap_timestamp`
, json_extract_scalar(data, '$.uno') `uno`
, json_extract_scalar(data, '$.profileid') `profile_id`
, json_extract_scalar(data, '$.zone') `zone`
, json_extract_scalar(data, '$.device') `device_type`
, json_extract_scalar(data, '$.agent') `agent`
, json_extract_scalar(data, '$.app_version') `app_version`
, json_extract_scalar(data, '$.data.action_type') `action_type`
--action_value
, json_extract_scalar(action_value, '$.content_type') `content_type`
, json_extract_scalar(action_value, '$.content_id') `content_id`
from unnest_action_value, unnest(unnest_action_value.action_value_array) as action_value
'''
import pandas as pd
pd.set_option('display.max_colwidth', None)
runQuery_DF(query)
log_type | log_timestamp | ap_timestamp | uno | profile_id | zone | device_type | agent | app_version | action_type | content_type | content_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2022-11-11 03:59:59 | 2023-01-28 20:51:57.011544+00:00 | abved | 0 | none | android | SM-N986N | none | register | live | 2101 | |
1 | 2022-11-10 23:58:05 | 2023-01-28 20:51:57.011544+00:00 | zdfoe | 0 | none | ios | iPhone12,3 | none | delete_all | movie | 0079 | |
2 | 2022-11-10 23:58:05 | 2023-01-28 20:51:57.011544+00:00 | zdfoe | 0 | none | ios | iPhone12,3 | none | delete_all | theme | EN394 | |
3 | 2022-11-10 23:58:05 | 2023-01-28 20:51:57.011544+00:00 | zdfoe | 0 | none | ios | iPhone12,3 | none | delete_all | theme | EN394 | |
4 | 2022-11-10 23:58:05 | 2023-01-28 20:51:57.011544+00:00 | jojojo | 0 | none | ios | iPhone12,3 | none | delete | program | 0063 |
댓글남기기