Hi
@fresh-football-47124 Hope you're well.
@future-teacher-7046
-- Past Experiments
WITH __experiments as (
SELECT
user_id,
anonymous_id,
received_at as timestamp,
experiment_id,
variation_id
FROM
experiment_viewed
),
__experimentDates as (
SELECT
experiment_id,
variation_id,
date_trunc(timestamp, DAY) as date,
count(distinct anonymous_id) as users
FROM
__experiments
WHERE
timestamp > DATETIME "2021-02-11 14:04:04"
GROUP BY
experiment_id,
variation_id,
date_trunc(timestamp, DAY)
),
__userThresholds as (
SELECT
experiment_id,
variation_id,
-- It's common for a small number of tracking events to continue coming in
-- long after an experiment ends, so limit to days with enough traffic
max(users) * 0.05 as threshold
FROM
__experimentDates
WHERE
-- Skip days where a variation got 5 or fewer visitors since it's probably not real traffic
users > 5
GROUP BY
experiment_id,
variation_id
),
__variations as (
SELECT
d.experiment_id,
d.variation_id,
MIN(d.date) as start_date,
MAX(d.date) as end_date,
SUM(d.users) as users
FROM
__experimentDates d
JOIN __userThresholds u ON (
d.experiment_id = u.experiment_id
AND d.variation_id = u.variation_id
)
WHERE
d.users > u.threshold
GROUP BY
d.experiment_id,
d.variation_id
)
SELECT
*
FROM
__variations
WHERE
-- Experiment was started recently
date_diff(DATETIME "2022-02-11 14:04:04", start_date, DAY) < 6
OR -- OR it ran for long enough and had enough users
(
date_diff(end_date, start_date, DAY) >= 6
AND users > 100
AND -- Skip experiments at start of date range since it's likely missing data
date_diff(start_date, DATETIME "2021-02-11 14:04:04", DAY) > 2
)
ORDER BY
experiment_id ASC,
variation_id ASC
Table name "experiment_viewed" missing dataset while no default dataset is set in the request.