late-teacher-86348
08/11/2021, 11:13 PM501: Code: 48, e.displayText() = DB::Exception: JOIN ON inequalities are not supported. Unexpected 'users > threshold': While processing users > threshold (version 21.3.15.2.altinity+stable (altinity build))
It seems to be a ClickHouse thing – only equi-joins supported. What does this mean for using Growth Book with ClickHouse?future-teacher-7046
future-teacher-7046
late-teacher-86348
08/12/2021, 11:55 AM501: Code: 48, e.displayText() = DB::Exception: JOIN ON inequalities are not supported. Unexpected 'users > threshold': While processing users > threshold (version 21.3.15.2.altinity+stable (altinity build))late-teacher-86348
08/12/2021, 11:56 AMfuture-teacher-7046
-- Past Experiments
WITH __experiments as (
  SELECT
    user_id,
    anonymous_id,
    received_at as timestamp,
    experiment_id,
    variation_id,
    context_page_path as url,
    context_user_agent as user_agent
  FROM
    experiment_viewed
),
__experimentDates as (
  SELECT
    experiment_id,
    variation_id,
    date_trunc('day', timestamp) as date,
    count(distinct anonymous_id) as users
  FROM
    __experiments
  WHERE
    timestamp > toDateTime('2020-08-12 11:51:08')
  GROUP BY
    experiment_id,
    variation_id,
    date_trunc('day', timestamp)
),
__userThresholds as (
  SELECT
    experiment_id,
    variation_id,
    -- It's common for a small number of tracking events to continue coming in
    -- long after an experiment ends, so limit to days with enough traffic
    max(users) * 0.05 as threshold
  FROM
    __experimentDates
  WHERE
    -- Skip days where a variation got 5 or fewer visitors since it's probably not real traffic
    users > 5
  GROUP BY
    experiment_id,
    variation_id
),
__variations as (
  SELECT
    d.experiment_id,
    d.variation_id,
    MIN(d.date) as start_date,
    MAX(d.date) as end_date,
    SUM(d.users) as users
  FROM
    __experimentDates d
    JOIN __userThresholds u ON (
      d.experiment_id = u.experiment_id
      AND d.variation_id = u.variation_id
    )
  WHERE
    d.users > u.threshold
  GROUP BY
    d.experiment_id,
    d.variation_id
)
SELECT
  *
FROM
  __variations
WHERE
  -- Skip experiments with fewer than 200 users since they don't have enough data
  users > 200 -- Skip experiments that are 5 days or shorter (most likely means it was stopped early)
  AND date_diff('day', start_date, end_date) > 5 -- Skip experiments that start of the very first day since we're likely missing data
  AND date_diff(
    'day',
    toDateTime('2020-08-12 11:51:08'),
    start_date
  ) > 2
ORDER BY
  experiment_id ASC,
  variation_id ASCfuture-teacher-7046
future-teacher-7046
late-teacher-86348
08/12/2021, 1:44 PMReceived exception from server (version 21.3.15):
Code: 46. DB::Exception: Received from localhost:9000. DB::Exception: Unknown function date_diff. Maybe you meant: ['dateDiff']: While processing ((date_diff('day', toDateTime('2020-08-12 11:51:08'), min(date) AS start_date) > 2) AND (date_diff('day', start_date, max(date) AS end_date) > 5)) AND ((sum(users) AS users) > 200).late-teacher-86348
08/12/2021, 1:45 PMfuture-teacher-7046
date_diff is an alias of dateDifffuture-teacher-7046
future-teacher-7046
future-teacher-7046
WITH __experiments as (
  SELECT
    user_id,
    anonymous_id,
    received_at as timestamp,
    experiment_id,
    variation_id,
    context_page_path as url,
    context_user_agent as user_agent
  FROM
    experiment_viewed
),
__experimentDates as (
  SELECT
    experiment_id,
    variation_id,
    dateTrunc('day', timestamp) as date,
    count(distinct anonymous_id) as users
  FROM
    __experiments
  WHERE
    timestamp > toDateTime('2020-08-12 11:51:08')
  GROUP BY
    experiment_id,
    variation_id,
    dateTrunc('day', timestamp)
),
__userThresholds as (
  SELECT
    experiment_id,
    variation_id,
    max(users) * 0.05 as threshold
  FROM
    __experimentDates
  WHERE
    users > 5
  GROUP BY
    experiment_id,
    variation_id
),
__variations as (
  SELECT
    d.experiment_id,
    d.variation_id,
    MIN(d.date) as start_date,
    MAX(d.date) as end_date,
    SUM(d.users) as users
  FROM
    __experimentDates d
    JOIN __userThresholds u ON (
      d.experiment_id = u.experiment_id
      AND d.variation_id = u.variation_id
    )
  WHERE
    d.users > u.threshold
  GROUP BY
    d.experiment_id,
    d.variation_id
)
SELECT
  *
FROM
  __variations
WHERE
  users > 200
  AND dateDiff('day', start_date, end_date) > 5
  AND dateDiff(
    'day',
    toDateTime('2020-08-12 11:51:08'),
    start_date
  ) > 2
ORDER BY
  experiment_id ASC,
  variation_id ASClate-teacher-86348
08/12/2021, 3:00 PMfuture-teacher-7046
late-teacher-86348
08/12/2021, 4:52 PMfuture-teacher-7046
late-teacher-86348
08/12/2021, 6:15 PMlate-teacher-86348
08/12/2021, 6:43 PMrows.map is not a function
Query:
-- Past Experiments
WITH __experiments as (
  SELECT
    user_id,
    anonymous_id,
    received_at as timestamp,
    experiment_id,
    variation_id,
    context_page_path as url,
    context_user_agent as user_agent
  FROM
    experiment_viewed
),
__experimentDates as (
  SELECT
    experiment_id,
    variation_id,
    dateTrunc('day', timestamp) as date,
    count(distinct anonymous_id) as users
  FROM
    __experiments
  WHERE
    timestamp > toDateTime('2020-08-12 18:42:35')
  GROUP BY
    experiment_id,
    variation_id,
    dateTrunc('day', timestamp)
),
__userThresholds as (
  SELECT
    experiment_id,
    variation_id,
    -- It's common for a small number of tracking events to continue coming in
    -- long after an experiment ends, so limit to days with enough traffic
    max(users) * 0.05 as threshold
  FROM
    __experimentDates
  WHERE
    -- Skip days where a variation got 5 or fewer visitors since it's probably not real traffic
    users > 5
  GROUP BY
    experiment_id,
    variation_id
),
__variations as (
  SELECT
    d.experiment_id,
    d.variation_id,
    MIN(d.date) as start_date,
    MAX(d.date) as end_date,
    SUM(d.users) as users
  FROM
    __experimentDates d
    JOIN __userThresholds u ON (
      d.experiment_id = u.experiment_id
      AND d.variation_id = u.variation_id
    )
  WHERE
    d.users > u.threshold
  GROUP BY
    d.experiment_id,
    d.variation_id
)
SELECT
  *
FROM
  __variations
WHERE
  -- Skip experiments with fewer than 200 users since they don't have enough data
  users > 200 -- Skip experiments that are 5 days or shorter (most likely means it was stopped early)
  AND dateDiff('day', start_date, end_date) > 5 -- Skip experiments that start of the very first day since we're likely missing data
  AND dateDiff(
    'day',
    toDateTime('2020-08-12 18:42:35'),
    start_date
  ) > 2
ORDER BY
  experiment_id ASC,
  variation_id ASCfuture-teacher-7046
late-teacher-86348
08/13/2021, 1:28 AMlate-teacher-86348
08/13/2021, 1:44 PMfuture-teacher-7046
future-teacher-7046
future-teacher-7046
future-teacher-7046
x-clickhouse-format header to the request.  So clickhouse was returning tab-separated results and the library was expecting JSON and failing to parse it.late-teacher-86348
08/14/2021, 1:16 AM