freezing-kite-62929
12/15/2022, 5:47 PMAND event_timestamp BETWEEN '2022-11-09 00:00:00' AND '2022-12-05 23:59:00'
However, when the final larger query runs, it does not seem to honor the partitions and BigQuery either takes more than 6 hours to run (after which GB timesout) OR BigQuery complains there is not enough quota available (which is a setting on our end).
I was wondering if there is a way to add where clause conditions to the outer query (the final select statement) to restrict it within the timestamp we desire, so that BigQuery uses the partitions?helpful-application-7107
12/15/2022, 5:58 PMfreezing-kite-62929
12/15/2022, 6:18 PM-- Conversion By Session ID (binomial)
WITH
__rawExperiment as (
SELECT
session_id AS user_id,
event_timestamp AS timestamp,
experiment_key AS experiment_id,
branch_key AS variation_id,
experiment_name,
branch_name AS variation_name
FROM
`bqprojectname.tf_ab_test_analysis.ab_sessions` AS e
WHERE
experiment_key = 'xyz'
AND event_timestamp BETWEEN '2022-11-09 00:00:00' AND '2022-12-05 23:59:00'
),
__experiment as ( -- Viewed Experiment
SELECT
e.user_id as user_id,
cast(e.variation_id as string) as variation,
CAST(e.timestamp as DATETIME) as timestamp,
CAST(e.timestamp as DATETIME) as conversion_start,
DATETIME_ADD(CAST(e.timestamp as DATETIME), INTERVAL 72 HOUR) as conversion_end
FROM
__rawExperiment e
WHERE
e.experiment_id = 'xyz'
AND CAST(e.timestamp as DATETIME) >= DATETIME("2022-11-09 00:00:00")
AND CAST(e.timestamp as DATETIME) <= DATETIME("2022-12-05 23:59:00")
),
__metric as ( -- Metric (Conversion By Session ID)
SELECT
user_id as user_id,
1 as value,
CAST(m.timestamp as DATETIME) as timestamp,
CAST(m.timestamp as DATETIME) as conversion_start,
CAST(m.timestamp as DATETIME) as conversion_end
FROM
(
SELECT DISTINCT
session_id AS user_id,
event_timestamp AS timestamp
FROM
`bqprojectname.tf_ab_test_analysis.ab_checkout_with_sales` AS e
WHERE
experiment_key = 'xyz'
AND event_timestamp BETWEEN '2022-11-09 00:00:00' AND '2022-12-08 23:59:00'
) m
WHERE
CAST(m.timestamp as DATETIME) >= DATETIME("2022-11-09 00:00:00")
AND CAST(m.timestamp as DATETIME) <= DATETIME("2022-12-08 23:59:00")
),
__distinctUsers as (
-- One row per user/dimension
SELECT
e.user_id,
cast('All' as string) as dimension,
(
CASE
WHEN count(distinct e.variation) > 1 THEN '__multiple__'
ELSE max(e.variation)
END
) as variation,
MIN(e.conversion_start) as conversion_start,
MIN(e.conversion_end) as conversion_end
FROM
__experiment e
GROUP BY
e.user_id
),
__userMetric as (
-- Add in the aggregate metric value for each user
SELECT
d.variation,
d.dimension,
d.user_id,
1 as value
FROM
__distinctUsers d
JOIN __metric m ON (m.user_id = d.user_id)
WHERE
m.timestamp >= d.conversion_start
AND m.timestamp <= d.conversion_end
GROUP BY
variation,
dimension,
d.user_id
),
__overallUsers as (
-- Number of users in each variation
SELECT
variation,
dimension,
COUNT(*) as users
FROM
__distinctUsers
GROUP BY
variation,
dimension
),
__stats as (
-- One row per variation/dimension with aggregations
SELECT
m.variation,
m.dimension,
COUNT(*) as count,
AVG(m.value) as m_mean,
VAR_SAMP(m.value) as m_var,
sum(m.value) as m_sum
FROM
__userMetric m
GROUP BY
m.variation,
m.dimension
),
__overall as (
SELECT
u.variation,
u.dimension,
u.users as count,
s.m_mean * s.count / u.users as mean,
(s.count / u.users) * (1 - s.count / u.users) as variance,
u.users as users
FROM
__overallUsers u
LEFT JOIN __stats s ON (
s.variation = u.variation
AND s.dimension = u.dimension
)
)
SELECT
variation,
dimension,
count,
mean,
(
CASE
WHEN variance > 0 THEN sqrt(variance)
ELSE 0
END
) as stddev,
users
FROM
__overall
helpful-application-7107
12/15/2022, 6:23 PMfreezing-kite-62929
12/15/2022, 6:24 PMhelpful-application-7107
12/15/2022, 6:25 PMfreezing-kite-62929
12/15/2022, 6:26 PMhelpful-application-7107
12/15/2022, 6:27 PM__overall
doesn't have any of the timestamp data, so I'm not sure what a filter at that stage would do.freezing-kite-62929
12/15/2022, 6:27 PMhelpful-application-7107
12/15/2022, 6:27 PMfreezing-kite-62929
12/15/2022, 6:27 PMhelpful-application-7107
12/15/2022, 6:28 PMfreezing-kite-62929
12/15/2022, 6:29 PMhelpful-application-7107
12/15/2022, 6:32 PM__metric
and __overall
), so I think it might be a slightly different issue.freezing-kite-62929
12/15/2022, 6:42 PMhelpful-application-7107
12/15/2022, 6:44 PM__userMetric
is the most expensive part of this query due to the join, and that we aren't filtering down the metric source table enough.freezing-kite-62929
12/15/2022, 6:54 PMhelpful-application-7107
12/15/2022, 6:54 PMfreezing-kite-62929
12/15/2022, 6:56 PM__userMetric
stage, timestamp is still available and yes it will be great to have the ability to add event_timestap filter there.helpful-application-7107
12/15/2022, 6:56 PMfreezing-kite-62929
12/15/2022, 6:57 PMhelpful-application-7107
12/15/2022, 6:57 PMfreezing-kite-62929
12/15/2022, 6:58 PMhelpful-application-7107
12/15/2022, 6:59 PMfreezing-kite-62929
12/15/2022, 7:00 PMhelpful-application-7107
12/15/2022, 7:04 PMfreezing-kite-62929
12/15/2022, 7:06 PMdistinctUsers
section, still the same result.helpful-application-7107
12/15/2022, 7:31 PMfreezing-kite-62929
12/15/2022, 7:32 PMhelpful-application-7107
12/15/2022, 7:32 PMfreezing-kite-62929
12/15/2022, 7:33 PMhelpful-application-7107
12/15/2022, 7:34 PMfreezing-kite-62929
12/15/2022, 7:34 PM