helpful-alligator-75316
11/28/2023, 9:26 PMbrief-honey-45610
11/28/2023, 9:40 PMhelpful-alligator-75316
11/28/2023, 9:48 PM-- Time To First Byte (duration)
WITH
__rawExperiment AS (
SELECT
host,
anonymous_id,
timestamp as timestamp,
experiment_id as experiment_id,
variant_id as variation_id
FROM
events
WHERE
event = 'experiment_viewed'
),
__experimentExposures AS (
-- Viewed Experiment
SELECT
e.anonymous_id as anonymous_id
, toString(e.variation_id) as variation
, e.timestamp as timestamp
FROM
__rawExperiment e
WHERE
e.experiment_id = '63f9e184aaaf29ffe3925b6855490506'
AND e.timestamp >= toDateTime('2023-10-14 18:00:00', 'UTC')
AND e.timestamp <= toDateTime('2023-11-28 16:33:51', 'UTC')
)
, __activationMetric as (-- Metric (RB Analytics - Is Likely Human)
SELECT
anonymous_id as anonymous_id,
1 as value,
m.timestamp as timestamp
FROM
(
SELECT
host,
anonymous_id,
timestamp
FROM events
WHERE bot_score > 29
and context_user_agent not like '%bot%'
and context_user_agent not like '%Bot%'
) m
WHERE m.timestamp >= toDateTime('2023-10-14 18:00:00', 'UTC') AND m.timestamp <= toDateTime('2023-12-28 16:33:51', 'UTC')
)
, __experimentUnits AS (
-- One row per user
SELECT
e.anonymous_id AS anonymous_id
, if(count(distinct e.variation) > 1, '__multiple__', max(e.variation)) AS variation
, MIN(e.timestamp) AS first_exposure_timestamp
, MIN(if(
a.timestamp >= e.timestamp
, a.timestamp, NULL)) AS first_activation_timestamp
FROM
__experimentExposures e
LEFT JOIN __activationMetric a ON (a.anonymous_id = e.anonymous_id)
GROUP BY
e.anonymous_id
),
__distinctUsers AS (
SELECT
anonymous_id,
toString('All') AS dimension,
variation,
first_activation_timestamp AS timestamp,
dateTrunc('day', first_exposure_timestamp) AS first_exposure_date
FROM __experimentUnits
WHERE first_activation_timestamp IS NOT NULL
)
, __metric as (-- Metric (Time To First Byte)
SELECT
anonymous_id as anonymous_id,
m.value as value,
m.timestamp as timestamp
FROM
(
SELECT
host,
anonymous_id,
timestamp,
(value/1000) as value -- It's looking for seconds so we divide by 1000
FROM events
WHERE event = 'ttfb'
) m
WHERE m.timestamp >= toDateTime('2023-10-14 18:00:00', 'UTC') AND m.timestamp <= toDateTime('2023-11-28 16:33:51', 'UTC')
)
, __denominator0 as (-- Metric (RB Analytics - Is TTFB session)
SELECT
anonymous_id as anonymous_id,
1 as value,
m.timestamp as timestamp
FROM
(
SELECT
host,
anonymous_id,
timestamp
FROM events
WHERE event = 'ttfb'
) m
WHERE m.timestamp >= toDateTime('2023-10-14 18:00:00', 'UTC') AND m.timestamp <= toDateTime('2023-11-28 16:33:51', 'UTC')
)
, __denominatorUsers as (
-- one row per user
SELECT
initial.anonymous_id AS anonymous_id,
MIN(initial.dimension) AS dimension,
MIN(initial.variation) AS variation,
MIN(initial.first_exposure_date) AS first_exposure_date,
MIN(t0.timestamp) AS timestamp
FROM
__distinctUsers initial
JOIN __denominator0 t0 ON (
t0.anonymous_id = initial.anonymous_id
)
WHERE
t0.timestamp >= initial.timestamp
GROUP BY
initial.anonymous_id)
, __userMetricJoin as (
SELECT
d.variation AS variation,
d.dimension AS dimension,
d.anonymous_id AS anonymous_id,
if(
m.timestamp >= d.timestamp
, m.value, NULL) as value
FROM
__denominatorUsers d
LEFT JOIN __metric m ON (
m.anonymous_id = d.anonymous_id
)
)
, __userMetricAgg as (
-- Add in the aggregate metric value for each user
SELECT
variation,
dimension,
anonymous_id,
AVG(value) as value
FROM
__userMetricJoin
GROUP BY
variation,
dimension,
anonymous_id
)
, __capValue AS (
SELECT quantileDeterministic(0.75)(value, 1234) AS cap_value
FROM __userMetricAgg
WHERE value IS NOT NULL
)
-- One row per variation/dimension with aggregations
SELECT
m.variation AS variation,
m.dimension AS dimension,
COUNT(*) AS users,
'mean' as statistic_type,
'count' as main_metric_type,
MAX(COALESCE(cap.cap_value, 0)) as main_cap_value,
SUM(LEAST(
toFloat64(COALESCE(m.value, 0)),
cap.cap_value
)) AS main_sum,
SUM(POWER(LEAST(
toFloat64(COALESCE(m.value, 0)),
cap.cap_value
), 2)) AS main_sum_squares
FROM
__userMetricAgg m
CROSS JOIN __capValue cap
GROUP BY
m.variation,
m.dimension
brief-honey-45610
11/28/2023, 10:05 PMbrief-honey-45610
11/28/2023, 10:06 PMhelpful-alligator-75316
11/28/2023, 10:06 PMhelpful-alligator-75316
11/28/2023, 10:09 PMbrief-honey-45610
11/28/2023, 10:16 PMhelpful-alligator-75316
11/28/2023, 10:16 PMhelpful-alligator-75316
11/28/2023, 10:18 PMbrief-honey-45610
11/28/2023, 10:22 PMhelpful-alligator-75316
11/28/2023, 10:27 PMhelpful-application-7107
11/28/2023, 11:51 PM{{ experimentId }}
, you could create some filter at the metric level that uses that template variable in some clever way to create the right filter. I'm not sure what the host
filter you're imagining is though so I'm not sure.
• I'd also just confirm that our query is properly hitting date partitions just to be safe.
Some notes:
• We have also built Fact Tables
to help you define metrics in a way that will not have to scan that events
table multiple times for an analysis like this. While you can set up metric definitions on the same base SQL right now, the work to build performance improvements on top of those fact tables is still ongoing. Ratio metrics are definitely inefficient right now, and we're hoping that the fact table refactor can do a lot to help trim down query costs in that regard.helpful-alligator-75316
11/30/2023, 10:45 PMExperiment Assignment Queries
and also as a filter for the fact table.
Moving to the fact table definitely helped w/ performance. Only problem now is that the numbers display incorrectly because I cant control certain aspects of the metric.
For example - all my events go to the events
table and write their metrics into the value
column. Most of the values are in milliseconds but the fact table only lets me set it as Time (seconds)
. I got around this with normal metrics by multiplying the value by .001 but I don't have the ability to do that in Fact Tables.
In addition to this - sometimes the values aren't milliseconds - it might be a measure such as how many pixels an element shifted. Since I can't break up the definition of the value
column based on the metric it doesn't allow me to see the changes properly.helpful-application-7107
12/01/2023, 4:01 PMSELECT
host,
anonymous_id,
timestamp,
(value/1000) as value_s,
value as value
FROM events
Columns
value
<- just a normal number
value_s
<- format seconds
Filters
Then you can add a bunch of filters here for your different where
clauses
Metrics
Then you can define your time metrics using the value_s
column and your other metrics using the value
column.
There's also an option where you use CASE WHEN
in your Fact Table SQL to define your metric columns but that seems worse.helpful-alligator-75316
12/01/2023, 4:48 PMhelpful-application-7107
12/01/2023, 4:49 PMhelpful-alligator-75316
12/01/2023, 4:50 PMhelpful-application-7107
12/01/2023, 4:53 PMhelpful-application-7107
12/04/2023, 5:10 PMhelpful-alligator-75316
12/04/2023, 5:11 PMhelpful-alligator-75316
12/04/2023, 5:21 PM-- Revenue (mean)
WITH __rawExperiment AS (
SELECT
host,
anonymous_id,
timestamp as timestamp,
experiment_id as experiment_id,
variant_id as variation_id
FROM
events
WHERE
event = 'experiment_viewed'
and context_user_agent not ilike '%bot%'
),
__experimentExposures AS (
-- Viewed Experiment
SELECT
e.anonymous_id as anonymous_id
,
toString(e.variation_id) as variation
,
e.timestamp as timestamp
FROM
__rawExperiment e
WHERE
e.experiment_id = '1cbfa0516ae56757d5615ddcfa3e919e'
AND e.timestamp >= toDateTime('2023-11-01 00:02:00',
'UTC')
AND e.timestamp <= toDateTime('2023-12-04 17:15:19',
'UTC')
),
__experimentUnits AS (
-- One row per user
SELECT
e.anonymous_id AS anonymous_id
,
if(count(distinct e.variation) > 1,
'__multiple__',
max(e.variation)) AS variation
,
MIN(e.timestamp) AS first_exposure_timestamp
FROM
__experimentExposures e
GROUP BY
e.anonymous_id
),
__distinctUsers AS (
SELECT
anonymous_id,
toString('All') AS dimension,
variation,
first_exposure_timestamp AS timestamp,
dateTrunc('day',
first_exposure_timestamp) AS first_exposure_date
FROM
__experimentUnits
)
,
__metric as (
-- Metric (Revenue)
SELECT
anonymous_id as anonymous_id,
m.amount as value,
m.timestamp as timestamp
FROM
(
SELECT
anonymous_id,
timestamp,
host,
context_user_agent,
context_device,
context_browser,
bot_score,
event,
value,
(value / 1000) value_seconds,
amount,
quantity,
currency
FROM
events
) m
WHERE
event = 'checkout_completed'
AND context_user_agent not ilike '%bot%'
AND m.timestamp >= toDateTime('2023-11-01 00:02:00',
'UTC')
AND m.timestamp <= toDateTime('2023-12-04 17:15:19',
'UTC')
),
__userMetricJoin as (
SELECT
d.variation AS variation,
d.dimension AS dimension,
d.anonymous_id AS anonymous_id,
if(m.timestamp >= d.timestamp, m.value, NULL) as value
FROM __distinctUsers d
LEFT JOIN __metric m ON (m.anonymous_id = d.anonymous_id)
),
__userMetricAgg as (
-- Add in the aggregate metric value for each user
SELECT
variation,
dimension,
anonymous_id,
SUM(COALESCE(value, 0)) as value
FROM
__userMetricJoin
GROUP BY
variation,
dimension,
anonymous_id
),
__capValue AS (
SELECT
quantile(0.95)(value) AS cap_value
FROM
__userMetricAgg
WHERE
value IS NOT NULL
)
-- One row per variation/dimension with aggregations
SELECT
m.variation AS variation,
m.dimension AS dimension,
COUNT(*) AS users,
'mean' as statistic_type,
'count' as main_metric_type,
MAX(COALESCE(cap.cap_value, 0)) as main_cap_value,
SUM(LEAST(
toFloat64(COALESCE(m.value, 0)),
cap.cap_value
)) AS main_sum,
SUM(POWER(LEAST(
toFloat64(COALESCE(m.value, 0)),
cap.cap_value
), 2)) AS main_sum_squares
FROM
__userMetricAgg m
CROSS JOIN __capValue cap
GROUP BY
m.variation,
m.dimension
helpful-application-7107
12/04/2023, 5:22 PMhelpful-alligator-75316
12/04/2023, 5:23 PMhelpful-application-7107
12/04/2023, 5:23 PMmain_cap_value
column?helpful-application-7107
12/04/2023, 5:23 PMhelpful-application-7107
12/04/2023, 5:24 PM__metric as (
-- Metric (Revenue)
SELECT
anonymous_id as anonymous_id,
m.amount as value,
m.timestamp as timestamp
FROM
(
SELECT
anonymous_id,
timestamp,
host,
context_user_agent,
context_device,
context_browser,
bot_score,
event,
value,
(value / 1000) value_seconds,
amount,
quantity,
currency
FROM
events
) m
WHERE
event = 'checkout_completed'
AND context_user_agent not ilike '%bot%'
AND m.timestamp >= toDateTime('2023-11-01 00:02:00',
'UTC')
AND m.timestamp <= toDateTime('2023-12-04 17:15:19',
'UTC')
),
__userMetricJoin as (
SELECT
d.variation AS variation,
d.dimension AS dimension,
d.anonymous_id AS anonymous_id,
if(m.timestamp >= d.timestamp, m.value, NULL) as value
FROM __distinctUsers d
LEFT JOIN __metric m ON (m.anonymous_id = d.anonymous_id)
)
helpful-application-7107
12/04/2023, 5:27 PMevets
table is dropping all your data for some reason (seems unlikely, but possible). If you just run that CTE, do you see data in your data warehouse?
• The join is not finding overlapping anonymous_id values, if for some reason your anonymous_id values aren't the same for users when you are initializing the growthbook SDK in your app and when you're tracking their conversions/other events (not uncommon problem if people aren't properly storing the anonymous ids using the trackingCallback and re-using them for event tracking)
• There's some weirdness with the date windows not overlapping, but that seems unlikely given your queries so far and traffichelpful-alligator-75316
12/04/2023, 6:12 PM__userMetricAgg as (
SELECT
variation,
dimension,
anonymous_id,
SUM(COALESCE(value, 0)) as value
FROM
__userMetricJoin
GROUP BY
variation,
dimension,
anonymous_id
HAVING value > 0 -- I added this line
)
helpful-alligator-75316
12/04/2023, 6:15 PMhelpful-application-7107
12/04/2023, 6:19 PMhelpful-alligator-75316
12/04/2023, 6:22 PMhelpful-application-7107
12/04/2023, 6:24 PMhelpful-alligator-75316
12/04/2023, 6:24 PM__distinctUsers
is getting all users in the experiment and its joining __metric
to that which causes a bunch of 0/null values.helpful-application-7107
12/04/2023, 6:24 PMhelpful-alligator-75316
12/04/2023, 6:25 PMhelpful-application-7107
12/04/2023, 6:25 PMSUM(COALESCE(value, 0))
helpful-alligator-75316
12/04/2023, 7:00 PMSUM(COALESCE(value, 0))
is the only major difference. Its just SUM(value)
on the regular metric. If I change the fact table query to use SUM(value)
It works correctlyhelpful-application-7107
12/04/2023, 7:01 PMsum(value)
?helpful-alligator-75316
12/04/2023, 7:04 PMhelpful-application-7107
12/04/2023, 7:05 PMhelpful-alligator-75316
12/04/2023, 7:07 PMSUM
there too?helpful-application-7107
12/04/2023, 7:32 PMhelpful-alligator-75316
12/04/2023, 7:49 PMhelpful-alligator-75316
12/04/2023, 8:18 PMhelpful-application-7107
02/28/2024, 2:40 PMhelpful-alligator-75316
02/28/2024, 11:31 PM