hundreds-belgium-41942
07/13/2023, 10:33 AMwhite-fireman-22476
07/13/2023, 1:15 PMhundreds-belgium-41942
07/14/2023, 3:51 AMcuddly-fireman-12522
07/20/2023, 8:42 AM{
"level": 50,
"time": 1688636369686,
"pid": 59,
"hostname": "fa961f2113a5",
"err": {
"type": "SyntaxError",
"message": "Unexpected token u in JSON at position 0",
"stack": "SyntaxError: Unexpected token u in JSON at position 0\n at JSON.parse (<anonymous>)\n at /usr/local/src/app/packages/back-end/dist/services/stats.js:99:27\n at Generator.next (<anonymous>)\n at fulfilled (/usr/local/src/app/packages/back-end/dist/services/stats.js:5:58)\n at processTicksAndRejections (node:internal/process/task_queues:96:5)"
},
"msg": "Failed to run stats model: null"
}
white-fireman-22476
07/20/2023, 9:01 AMcuddly-fireman-12522
07/20/2023, 9:05 AMwhite-fireman-22476
07/20/2023, 9:19 AMcuddly-fireman-12522
07/20/2023, 9:20 AMwhite-fireman-22476
07/20/2023, 9:21 AMimport { promisify } from "util";
import { PythonShell } from "python-shell";
import {
DEFAULT_SEQUENTIAL_TESTING_TUNING_PARAMETER,
DEFAULT_STATS_ENGINE,
} from "shared/constants";
import { MetricInterface } from "../../types/metric";
import { ExperimentMetricAnalysis, StatsEngine } from "../../types/stats";
import {
ExperimentMetricQueryResponse,
ExperimentResults,
} from "../types/Integration";
import {
ExperimentReportResultDimension,
ExperimentReportResults,
ExperimentReportVariation,
} from "../../types/report";
import { promiseAllChunks } from "../util/promise";
import { checkSrm } from "../util/stats";
import { logger } from "../util/logger";
import {
ExperimentSnapshotAnalysisSettings,
ExperimentSnapshotSettings,
} from "../../types/experiment-snapshot";
import { QueryMap } from "../queryRunners/QueryRunner";
export const MAX_DIMENSIONS = 20;
export async function analyzeExperimentMetric(
variations: ExperimentReportVariation[],
metric: MetricInterface,
rows: ExperimentMetricQueryResponse,
dimension: string | null = null,
statsEngine: StatsEngine = DEFAULT_STATS_ENGINE,
sequentialTestingEnabled: boolean = false,
sequentialTestingTuningParameter: number = DEFAULT_SEQUENTIAL_TESTING_TUNING_PARAMETER
): Promise<ExperimentMetricAnalysis> {
if (!rows || !rows.length) {
return {
unknownVariations: [],
multipleExposures: 0,
dimensions: [],
};
}
const variationIdMap: { [key: string]: number } = {};
variations.map((v, i) => {
variationIdMap[v.id] = i;
});
logger.error("GRAB THE FOLLOWING AND SEND TO JAMES");
logger.error(`
from gbstats.gbstats import (
diff_for_daily_time_series,
detect_unknown_variations,
analyze_metric_df,
get_metric_df,
reduce_dimensionality,
format_results
)
from gbstats.shared.constants import StatsEngine
import pandas as pd
import json
data = json.loads("""${JSON.stringify({
var_id_map: variationIdMap,
var_names: variations.map((v) => v.name),
weights: variations.map((v) => v.weight),
ignore_nulls: !!metric.ignoreNulls,
inverse: !!metric.inverse,
max_dimensions:
dimension?.substring(0, 8) === "pre:date" ? 9999 : MAX_DIMENSIONS,
rows,
}).replace(/\\/g, "\\\\")}""", strict=False)
var_id_map = data['var_id_map']
var_names = data['var_names']
ignore_nulls = data['ignore_nulls']
inverse = data['inverse']
weights = data['weights']
max_dimensions = data['max_dimensions']
rows = pd.DataFrame(data['rows'])
unknown_var_ids = detect_unknown_variations(
rows=rows,
var_id_map=var_id_map
)
${
dimension === "pre:datedaily" ? `rows = diff_for_daily_time_series(rows)` : ``
}
df = get_metric_df(
rows=rows,
var_id_map=var_id_map,
var_names=var_names,
)
reduced = reduce_dimensionality(
df=df,
max=max_dimensions
)
result = analyze_metric_df(
df=reduced,
weights=weights,
inverse=inverse,
engine=${
statsEngine === "frequentist"
? "StatsEngine.FREQUENTIST"
: "StatsEngine.BAYESIAN"
},
engine_config=${
statsEngine === "frequentist" && sequentialTestingEnabled
? `{'sequential': True, 'sequential_tuning_parameter': ${sequentialTestingTuningParameter}}`
: "{}"
}
)
print(json.dumps({
'unknownVariations': list(unknown_var_ids),
'dimensions': format_results(result)
}, allow_nan=False))`);
const result = await promisify(PythonShell.runString)(
`
from gbstats.gbstats import (
diff_for_daily_time_series,
detect_unknown_variations,
analyze_metric_df,
get_metric_df,
reduce_dimensionality,
format_results
)
from gbstats.shared.constants import StatsEngine
import pandas as pd
import json
data = json.loads("""${JSON.stringify({
var_id_map: variationIdMap,
var_names: variations.map((v) => v.name),
weights: variations.map((v) => v.weight),
ignore_nulls: !!metric.ignoreNulls,
inverse: !!metric.inverse,
max_dimensions:
dimension?.substring(0, 8) === "pre:date" ? 9999 : MAX_DIMENSIONS,
rows,
}).replace(/\\/g, "\\\\")}""", strict=False)
var_id_map = data['var_id_map']
var_names = data['var_names']
ignore_nulls = data['ignore_nulls']
inverse = data['inverse']
weights = data['weights']
max_dimensions = data['max_dimensions']
rows = pd.DataFrame(data['rows'])
unknown_var_ids = detect_unknown_variations(
rows=rows,
var_id_map=var_id_map
)
${
dimension === "pre:datedaily" ? `rows = diff_for_daily_time_series(rows)` : ``
}
df = get_metric_df(
rows=rows,
var_id_map=var_id_map,
var_names=var_names,
)
reduced = reduce_dimensionality(
df=df,
max=max_dimensions
)
result = analyze_metric_df(
df=reduced,
weights=weights,
inverse=inverse,
engine=${
statsEngine === "frequentist"
? "StatsEngine.FREQUENTIST"
: "StatsEngine.BAYESIAN"
},
engine_config=${
statsEngine === "frequentist" && sequentialTestingEnabled
? `{'sequential': True, 'sequential_tuning_parameter': ${sequentialTestingTuningParameter}}`
: "{}"
}
)
print(json.dumps({
'unknownVariations': list(unknown_var_ids),
'dimensions': format_results(result)
}, allow_nan=False))`,
{}
);
let parsed: ExperimentMetricAnalysis;
try {
parsed = JSON.parse(result?.[0]);
// Add multiple exposures
parsed.multipleExposures =
rows.filter((r) => r.variation === "__multiple__")?.[0]?.users || 0;
} catch (e) {
logger.error(e, "Failed to run stats model: " + result);
throw e;
}
return parsed;
}
export async function analyzeExperimentResults({
queryData,
analysisSettings,
snapshotSettings,
variationNames,
metricMap,
}: {
queryData: QueryMap;
analysisSettings: ExperimentSnapshotAnalysisSettings;
snapshotSettings: ExperimentSnapshotSettings;
variationNames?: string[];
metricMap: Map<string, MetricInterface>;
}): Promise<ExperimentReportResults> {
const metricRows: {
metric: string;
rows: ExperimentMetricQueryResponse;
}[] = [];
let unknownVariations: string[] = [];
let multipleExposures = 0;
// Everything done in a single query (Mixpanel, Google Analytics)
// Need to convert to the same format as SQL rows
if (queryData.has("results")) {
const results = queryData.get("results");
if (!results) throw new Error("Empty experiment results");
const data = results.result as ExperimentResults;
unknownVariations = data.unknownVariations;
const byMetric: { [key: string]: ExperimentMetricQueryResponse } = {};
data.dimensions.forEach((row) => {
row.variations.forEach((v) => {
Object.keys(v.metrics).forEach((metric) => {
const stats = v.metrics[metric];
byMetric[metric] = byMetric[metric] || [];
byMetric[metric].push({
dimension: row.dimension,
variation:
snapshotSettings.variations[v.variation]?.id || v.variation + "",
users: stats.count,
count: stats.count,
statistic_type: "mean", // no ratio in mixpanel or GA
main_metric_type: stats.metric_type,
main_sum: stats.main_sum,
main_sum_squares: stats.main_sum_squares,
});
});
});
});
Object.keys(byMetric).forEach((metric) => {
metricRows.push({
metric,
rows: byMetric[metric],
});
});
}
// One query for each metric, can just use the rows directly from the query
else {
queryData.forEach((query, key) => {
const metric = metricMap.get(key);
if (!metric) return;
metricRows.push({
metric: key,
rows: query.result as ExperimentMetricQueryResponse,
});
});
}
const dimensionMap: Map<string, ExperimentReportResultDimension> = new Map();
await promiseAllChunks(
metricRows.map((data) => {
const metric = metricMap.get(data.metric);
return async () => {
if (!metric) return;
const result = await analyzeExperimentMetric(
snapshotSettings.variations.map((v, i) => ({
...v,
name: variationNames?.[i] || v.id,
})),
metric,
data.rows,
analysisSettings.dimensions[0],
analysisSettings.statsEngine,
analysisSettings.sequentialTesting,
analysisSettings.sequentialTestingTuningParameter
);
unknownVariations = unknownVariations.concat(result.unknownVariations);
multipleExposures = Math.max(
multipleExposures,
result.multipleExposures
);
result.dimensions.forEach((row) => {
const dim = dimensionMap.get(row.dimension) || {
name: row.dimension,
srm: 1,
variations: [],
};
row.variations.forEach((v, i) => {
const data = dim.variations[i] || {
users: v.users,
metrics: {},
};
data.users = Math.max(data.users, v.users);
data.metrics[metric.id] = {
...v,
buckets: [],
};
dim.variations[i] = data;
});
dimensionMap.set(row.dimension, dim);
});
};
}),
3
);
const dimensions = Array.from(dimensionMap.values());
if (!dimensions.length) {
dimensions.push({
name: "All",
srm: 1,
variations: [],
});
} else {
dimensions.forEach((dimension) => {
// Calculate SRM
dimension.srm = checkSrm(
dimension.variations.map((v) => v.users),
snapshotSettings.variations.map((v) => v.weight)
);
});
}
return {
multipleExposures,
unknownVariations: Array.from(new Set(unknownVariations)),
dimensions,
};
}
cuddly-fireman-12522
07/20/2023, 11:49 AMmelodic-ability-82567
07/20/2023, 11:54 AMwhite-fireman-22476
07/20/2023, 12:16 PM"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.analyzeExperimentResults = exports.analyzeExperimentMetric = exports.MAX_DIMENSIONS = void 0;
const util_1 = require("util");
const python_shell_1 = require("python-shell");
const constants_1 = require("shared/constants");
const promise_1 = require("../util/promise");
const stats_1 = require("../util/stats");
const logger_1 = require("../util/logger");
exports.MAX_DIMENSIONS = 20;
function analyzeExperimentMetric(variations, metric, rows, dimension = null, statsEngine = constants_1.DEFAULT_STATS_ENGINE, sequentialTestingEnabled = false, sequentialTestingTuningParameter = constants_1.DEFAULT_SEQUENTIAL_TESTING_TUNING_PARAMETER) {
var _a, _b;
return __awaiter(this, void 0, void 0, function* () {
if (!rows || !rows.length) {
return {
unknownVariations: [],
multipleExposures: 0,
dimensions: [],
};
}
const variationIdMap = {};
variations.map((v, i) => {
variationIdMap[v.id] = i;
});
logger_1.logger.error("GRAB THE FOLLOWING AND SEND TO JAMES");
logger_1.logger.error(`
from gbstats.gbstats import (
diff_for_daily_time_series,
detect_unknown_variations,
analyze_metric_df,
get_metric_df,
reduce_dimensionality,
format_results
)
from gbstats.shared.constants import StatsEngine
import pandas as pd
import json
data = json.loads("""${JSON.stringify({
var_id_map: variationIdMap,
var_names: variations.map((v) => v.name),
weights: variations.map((v) => v.weight),
ignore_nulls: !!metric.ignoreNulls,
inverse: !!metric.inverse,
max_dimensions: (dimension === null || dimension === void 0 ? void 0 : dimension.substring(0, 8)) === "pre:date" ? 9999 : exports.MAX_DIMENSIONS,
rows,
}).replace(/\\/g, "\\\\")}""", strict=False)
var_id_map = data['var_id_map']
var_names = data['var_names']
ignore_nulls = data['ignore_nulls']
inverse = data['inverse']
weights = data['weights']
max_dimensions = data['max_dimensions']
rows = pd.DataFrame(data['rows'])
unknown_var_ids = detect_unknown_variations(
rows=rows,
var_id_map=var_id_map
)
${dimension === "pre:datedaily" ? `rows = diff_for_daily_time_series(rows)` : ``}
df = get_metric_df(
rows=rows,
var_id_map=var_id_map,
var_names=var_names,
)
reduced = reduce_dimensionality(
df=df,
max=max_dimensions
)
result = analyze_metric_df(
df=reduced,
weights=weights,
inverse=inverse,
engine=${statsEngine === "frequentist"
? "StatsEngine.FREQUENTIST"
: "StatsEngine.BAYESIAN"},
engine_config=${statsEngine === "frequentist" && sequentialTestingEnabled
? `{'sequential': True, 'sequential_tuning_parameter': ${sequentialTestingTuningParameter}}`
: "{}"}
)
print(json.dumps({
'unknownVariations': list(unknown_var_ids),
'dimensions': format_results(result)
}, allow_nan=False))`);
const result = yield (0, util_1.promisify)(python_shell_1.PythonShell.runString)(`
from gbstats.gbstats import (
diff_for_daily_time_series,
detect_unknown_variations,
analyze_metric_df,
get_metric_df,
reduce_dimensionality,
format_results
)
from gbstats.shared.constants import StatsEngine
import pandas as pd
import json
data = json.loads("""${JSON.stringify({
var_id_map: variationIdMap,
var_names: variations.map((v) => v.name),
weights: variations.map((v) => v.weight),
ignore_nulls: !!metric.ignoreNulls,
inverse: !!metric.inverse,
max_dimensions: (dimension === null || dimension === void 0 ? void 0 : dimension.substring(0, 8)) === "pre:date" ? 9999 : exports.MAX_DIMENSIONS,
rows,
}).replace(/\\/g, "\\\\")}""", strict=False)
var_id_map = data['var_id_map']
var_names = data['var_names']
ignore_nulls = data['ignore_nulls']
inverse = data['inverse']
weights = data['weights']
max_dimensions = data['max_dimensions']
rows = pd.DataFrame(data['rows'])
unknown_var_ids = detect_unknown_variations(
rows=rows,
var_id_map=var_id_map
)
${dimension === "pre:datedaily" ? `rows = diff_for_daily_time_series(rows)` : ``}
df = get_metric_df(
rows=rows,
var_id_map=var_id_map,
var_names=var_names,
)
reduced = reduce_dimensionality(
df=df,
max=max_dimensions
)
result = analyze_metric_df(
df=reduced,
weights=weights,
inverse=inverse,
engine=${statsEngine === "frequentist"
? "StatsEngine.FREQUENTIST"
: "StatsEngine.BAYESIAN"},
engine_config=${statsEngine === "frequentist" && sequentialTestingEnabled
? `{'sequential': True, 'sequential_tuning_parameter': ${sequentialTestingTuningParameter}}`
: "{}"}
)
print(json.dumps({
'unknownVariations': list(unknown_var_ids),
'dimensions': format_results(result)
}, allow_nan=False))`, {});
let parsed;
try {
parsed = JSON.parse(result === null || result === void 0 ? void 0 : result[0]);
// Add multiple exposures
parsed.multipleExposures =
((_b = (_a = rows.filter((r) => r.variation === "__multiple__")) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.users) || 0;
}
catch (e) {
logger_1.logger.error(e, "Failed to run stats model: " + result);
throw e;
}
return parsed;
});
}
exports.analyzeExperimentMetric = analyzeExperimentMetric;
function analyzeExperimentResults({ queryData, analysisSettings, snapshotSettings, variationNames, metricMap, }) {
return __awaiter(this, void 0, void 0, function* () {
const metricRows = [];
let unknownVariations = [];
let multipleExposures = 0;
// Everything done in a single query (Mixpanel, Google Analytics)
// Need to convert to the same format as SQL rows
if (queryData.has("results")) {
const results = queryData.get("results");
if (!results)
throw new Error("Empty experiment results");
const data = results.result;
unknownVariations = data.unknownVariations;
const byMetric = {};
data.dimensions.forEach((row) => {
row.variations.forEach((v) => {
Object.keys(v.metrics).forEach((metric) => {
var _a;
const stats = v.metrics[metric];
byMetric[metric] = byMetric[metric] || [];
byMetric[metric].push({
dimension: row.dimension,
variation: ((_a = snapshotSettings.variations[v.variation]) === null || _a === void 0 ? void 0 : _a.id) || v.variation + "",
users: stats.count,
count: stats.count,
statistic_type: "mean",
main_metric_type: stats.metric_type,
main_sum: stats.main_sum,
main_sum_squares: stats.main_sum_squares,
});
});
});
});
Object.keys(byMetric).forEach((metric) => {
metricRows.push({
metric,
rows: byMetric[metric],
});
});
}
// One query for each metric, can just use the rows directly from the query
else {
queryData.forEach((query, key) => {
const metric = metricMap.get(key);
if (!metric)
return;
metricRows.push({
metric: key,
rows: query.result,
});
});
}
const dimensionMap = new Map();
yield (0, promise_1.promiseAllChunks)(metricRows.map((data) => {
const metric = metricMap.get(data.metric);
return () => __awaiter(this, void 0, void 0, function* () {
if (!metric)
return;
const result = yield analyzeExperimentMetric(snapshotSettings.variations.map((v, i) => (Object.assign(Object.assign({}, v), { name: (variationNames === null || variationNames === void 0 ? void 0 : variationNames[i]) || v.id }))), metric, data.rows, analysisSettings.dimensions[0], analysisSettings.statsEngine, analysisSettings.sequentialTesting, analysisSettings.sequentialTestingTuningParameter);
unknownVariations = unknownVariations.concat(result.unknownVariations);
multipleExposures = Math.max(multipleExposures, result.multipleExposures);
result.dimensions.forEach((row) => {
const dim = dimensionMap.get(row.dimension) || {
name: row.dimension,
srm: 1,
variations: [],
};
row.variations.forEach((v, i) => {
const data = dim.variations[i] || {
users: v.users,
metrics: {},
};
data.users = Math.max(data.users, v.users);
data.metrics[metric.id] = Object.assign(Object.assign({}, v), { buckets: [] });
dim.variations[i] = data;
});
dimensionMap.set(row.dimension, dim);
});
});
}), 3);
const dimensions = Array.from(dimensionMap.values());
if (!dimensions.length) {
dimensions.push({
name: "All",
srm: 1,
variations: [],
});
}
else {
dimensions.forEach((dimension) => {
// Calculate SRM
dimension.srm = (0, stats_1.checkSrm)(dimension.variations.map((v) => v.users), snapshotSettings.variations.map((v) => v.weight));
});
}
return {
multipleExposures,
unknownVariations: Array.from(new Set(unknownVariations)),
dimensions,
};
});
}
exports.analyzeExperimentResults = analyzeExperimentResults;
//# sourceMappingURL=stats.js.map