late-teacher-86348
08/11/2021, 11:13 PM501: Code: 48, e.displayText() = DB::Exception: JOIN ON inequalities are not supported. Unexpected 'users > threshold': While processing users > threshold (version 21.3.15.2.altinity+stable (altinity build))
It seems to be a ClickHouse thing – only equi-joins supported. What does this mean for using Growth Book with ClickHouse?future-teacher-7046
late-teacher-86348
08/12/2021, 11:55 AM501: Code: 48, e.displayText() = DB::Exception: JOIN ON inequalities are not supported. Unexpected 'users > threshold': While processing users > threshold (version 21.3.15.2.altinity+stable (altinity build))
future-teacher-7046
-- Past Experiments
WITH __experiments as (
SELECT
user_id,
anonymous_id,
received_at as timestamp,
experiment_id,
variation_id,
context_page_path as url,
context_user_agent as user_agent
FROM
experiment_viewed
),
__experimentDates as (
SELECT
experiment_id,
variation_id,
date_trunc('day', timestamp) as date,
count(distinct anonymous_id) as users
FROM
__experiments
WHERE
timestamp > toDateTime('2020-08-12 11:51:08')
GROUP BY
experiment_id,
variation_id,
date_trunc('day', timestamp)
),
__userThresholds as (
SELECT
experiment_id,
variation_id,
-- It's common for a small number of tracking events to continue coming in
-- long after an experiment ends, so limit to days with enough traffic
max(users) * 0.05 as threshold
FROM
__experimentDates
WHERE
-- Skip days where a variation got 5 or fewer visitors since it's probably not real traffic
users > 5
GROUP BY
experiment_id,
variation_id
),
__variations as (
SELECT
d.experiment_id,
d.variation_id,
MIN(d.date) as start_date,
MAX(d.date) as end_date,
SUM(d.users) as users
FROM
__experimentDates d
JOIN __userThresholds u ON (
d.experiment_id = u.experiment_id
AND d.variation_id = u.variation_id
)
WHERE
d.users > u.threshold
GROUP BY
d.experiment_id,
d.variation_id
)
SELECT
*
FROM
__variations
WHERE
-- Skip experiments with fewer than 200 users since they don't have enough data
users > 200 -- Skip experiments that are 5 days or shorter (most likely means it was stopped early)
AND date_diff('day', start_date, end_date) > 5 -- Skip experiments that start of the very first day since we're likely missing data
AND date_diff(
'day',
toDateTime('2020-08-12 11:51:08'),
start_date
) > 2
ORDER BY
experiment_id ASC,
variation_id ASC
late-teacher-86348
08/12/2021, 1:44 PMReceived exception from server (version 21.3.15):
Code: 46. DB::Exception: Received from localhost:9000. DB::Exception: Unknown function date_diff. Maybe you meant: ['dateDiff']: While processing ((date_diff('day', toDateTime('2020-08-12 11:51:08'), min(date) AS start_date) > 2) AND (date_diff('day', start_date, max(date) AS end_date) > 5)) AND ((sum(users) AS users) > 200).
future-teacher-7046
date_diff
is an alias of dateDiff
WITH __experiments as (
SELECT
user_id,
anonymous_id,
received_at as timestamp,
experiment_id,
variation_id,
context_page_path as url,
context_user_agent as user_agent
FROM
experiment_viewed
),
__experimentDates as (
SELECT
experiment_id,
variation_id,
dateTrunc('day', timestamp) as date,
count(distinct anonymous_id) as users
FROM
__experiments
WHERE
timestamp > toDateTime('2020-08-12 11:51:08')
GROUP BY
experiment_id,
variation_id,
dateTrunc('day', timestamp)
),
__userThresholds as (
SELECT
experiment_id,
variation_id,
max(users) * 0.05 as threshold
FROM
__experimentDates
WHERE
users > 5
GROUP BY
experiment_id,
variation_id
),
__variations as (
SELECT
d.experiment_id,
d.variation_id,
MIN(d.date) as start_date,
MAX(d.date) as end_date,
SUM(d.users) as users
FROM
__experimentDates d
JOIN __userThresholds u ON (
d.experiment_id = u.experiment_id
AND d.variation_id = u.variation_id
)
WHERE
d.users > u.threshold
GROUP BY
d.experiment_id,
d.variation_id
)
SELECT
*
FROM
__variations
WHERE
users > 200
AND dateDiff('day', start_date, end_date) > 5
AND dateDiff(
'day',
toDateTime('2020-08-12 11:51:08'),
start_date
) > 2
ORDER BY
experiment_id ASC,
variation_id ASC
late-teacher-86348
08/12/2021, 3:00 PMfuture-teacher-7046
late-teacher-86348
08/12/2021, 4:52 PMfuture-teacher-7046
late-teacher-86348
08/12/2021, 6:15 PMrows.map is not a function
Query:
-- Past Experiments
WITH __experiments as (
SELECT
user_id,
anonymous_id,
received_at as timestamp,
experiment_id,
variation_id,
context_page_path as url,
context_user_agent as user_agent
FROM
experiment_viewed
),
__experimentDates as (
SELECT
experiment_id,
variation_id,
dateTrunc('day', timestamp) as date,
count(distinct anonymous_id) as users
FROM
__experiments
WHERE
timestamp > toDateTime('2020-08-12 18:42:35')
GROUP BY
experiment_id,
variation_id,
dateTrunc('day', timestamp)
),
__userThresholds as (
SELECT
experiment_id,
variation_id,
-- It's common for a small number of tracking events to continue coming in
-- long after an experiment ends, so limit to days with enough traffic
max(users) * 0.05 as threshold
FROM
__experimentDates
WHERE
-- Skip days where a variation got 5 or fewer visitors since it's probably not real traffic
users > 5
GROUP BY
experiment_id,
variation_id
),
__variations as (
SELECT
d.experiment_id,
d.variation_id,
MIN(d.date) as start_date,
MAX(d.date) as end_date,
SUM(d.users) as users
FROM
__experimentDates d
JOIN __userThresholds u ON (
d.experiment_id = u.experiment_id
AND d.variation_id = u.variation_id
)
WHERE
d.users > u.threshold
GROUP BY
d.experiment_id,
d.variation_id
)
SELECT
*
FROM
__variations
WHERE
-- Skip experiments with fewer than 200 users since they don't have enough data
users > 200 -- Skip experiments that are 5 days or shorter (most likely means it was stopped early)
AND dateDiff('day', start_date, end_date) > 5 -- Skip experiments that start of the very first day since we're likely missing data
AND dateDiff(
'day',
toDateTime('2020-08-12 18:42:35'),
start_date
) > 2
ORDER BY
experiment_id ASC,
variation_id ASC
future-teacher-7046
late-teacher-86348
08/13/2021, 1:28 AMfuture-teacher-7046
x-clickhouse-format
header to the request. So clickhouse was returning tab-separated results and the library was expecting JSON and failing to parse it.late-teacher-86348
08/14/2021, 1:16 AM