"use strict";

var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.createCategoryQuery = exports.createCategorizationRequestParams = exports.createCategorizationQuery = exports.categorizeDocuments = void 0;
var _moment = _interopRequireDefault(require("moment"));
var _calculateAuto = require("@kbn/calculate-auto");
var _mlRandomSamplerUtils = require("@kbn/ml-random-sampler-utils");
var _zod = require("@kbn/zod");
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

const isoTimestampFormat = "YYYY-MM-DD'T'HH:mm:ss.SSS'Z'";
// the fraction of a category's histogram below which the category is considered rare
const rarityThreshold = 0.2;
const categorizeDocuments = async ({
  esClient,
  index,
  endTimestamp,
  startTimestamp,
  timeField,
  messageField,
  samplingProbability,
  ignoredCategoryTerms,
  documentFilters = [],
  minDocsPerCategory
}) => {
  var _map;
  const randomSampler = (0, _mlRandomSamplerUtils.createRandomSamplerWrapper)({
    probability: samplingProbability,
    seed: 1
  });
  const requestParams = createCategorizationRequestParams({
    index,
    timeField,
    messageField,
    startTimestamp,
    endTimestamp,
    randomSampler,
    additionalFilters: documentFilters,
    ignoredCategoryTerms,
    minDocsPerCategory,
    maxCategoriesCount: 1000
  });
  const rawResponse = await esClient.search(requestParams);
  if (rawResponse.aggregations == null) {
    throw new Error('No aggregations found in large categories response');
  }
  const logCategoriesAggResult = randomSampler.unwrap(rawResponse.aggregations);
  if (!('categories' in logCategoriesAggResult)) {
    throw new Error('No categorization aggregation found in large categories response');
  }
  const logCategories = (_map = logCategoriesAggResult.categories.buckets.map(mapCategoryBucket)) !== null && _map !== void 0 ? _map : [];
  return {
    categories: logCategories,
    hasReachedLimit: logCategories.length >= 1000
  };
};
exports.categorizeDocuments = categorizeDocuments;
const mapCategoryBucket = bucket => esCategoryBucketSchema.transform(parsedBucket => ({
  change: mapChangePoint(parsedBucket),
  documentCount: parsedBucket.doc_count,
  histogram: parsedBucket.histogram,
  terms: parsedBucket.key
})).parse(bucket);
const mapChangePoint = ({
  change,
  histogram
}) => {
  switch (change.type) {
    case 'stationary':
      if (isRareInHistogram(histogram)) {
        var _findFirstNonZeroBuck, _findFirstNonZeroBuck2;
        return {
          type: 'rare',
          timestamp: (_findFirstNonZeroBuck = (_findFirstNonZeroBuck2 = findFirstNonZeroBucket(histogram)) === null || _findFirstNonZeroBuck2 === void 0 ? void 0 : _findFirstNonZeroBuck2.timestamp) !== null && _findFirstNonZeroBuck !== void 0 ? _findFirstNonZeroBuck : histogram[0].timestamp
        };
      } else {
        return {
          type: 'none'
        };
      }
    case 'dip':
    case 'spike':
      return {
        type: change.type,
        timestamp: change.bucket.key
      };
    case 'step_change':
      return {
        type: 'step',
        timestamp: change.bucket.key
      };
    case 'distribution_change':
      return {
        type: 'distribution',
        timestamp: change.bucket.key
      };
    case 'trend_change':
      return {
        type: 'trend',
        timestamp: change.bucket.key,
        correlationCoefficient: change.details.r_value
      };
    case 'unknown':
      return {
        type: 'unknown',
        rawChange: change.rawChange
      };
    case 'non_stationary':
    default:
      return {
        type: 'other'
      };
  }
};

/**
 * The official types are lacking the change_point aggregation
 */
const esChangePointBucketSchema = _zod.z.object({
  key: _zod.z.string().datetime(),
  doc_count: _zod.z.number()
});
const esChangePointDetailsSchema = _zod.z.object({
  p_value: _zod.z.number()
});
const esChangePointCorrelationSchema = esChangePointDetailsSchema.extend({
  r_value: _zod.z.number()
});
const esChangePointSchema = _zod.z.union([_zod.z.object({
  bucket: esChangePointBucketSchema,
  type: _zod.z.object({
    dip: esChangePointDetailsSchema
  })
}).transform(({
  bucket,
  type: {
    dip: details
  }
}) => ({
  type: 'dip',
  bucket,
  details
})), _zod.z.object({
  bucket: esChangePointBucketSchema,
  type: _zod.z.object({
    spike: esChangePointDetailsSchema
  })
}).transform(({
  bucket,
  type: {
    spike: details
  }
}) => ({
  type: 'spike',
  bucket,
  details
})), _zod.z.object({
  bucket: esChangePointBucketSchema,
  type: _zod.z.object({
    step_change: esChangePointDetailsSchema
  })
}).transform(({
  bucket,
  type: {
    step_change: details
  }
}) => ({
  type: 'step_change',
  bucket,
  details
})), _zod.z.object({
  bucket: esChangePointBucketSchema,
  type: _zod.z.object({
    trend_change: esChangePointCorrelationSchema
  })
}).transform(({
  bucket,
  type: {
    trend_change: details
  }
}) => ({
  type: 'trend_change',
  bucket,
  details
})), _zod.z.object({
  bucket: esChangePointBucketSchema,
  type: _zod.z.object({
    distribution_change: esChangePointDetailsSchema
  })
}).transform(({
  bucket,
  type: {
    distribution_change: details
  }
}) => ({
  type: 'distribution_change',
  bucket,
  details
})), _zod.z.object({
  type: _zod.z.object({
    non_stationary: esChangePointCorrelationSchema.extend({
      trend: _zod.z.enum(['increasing', 'decreasing'])
    })
  })
}).transform(({
  type: {
    non_stationary: details
  }
}) => ({
  type: 'non_stationary',
  details
})), _zod.z.object({
  type: _zod.z.object({
    stationary: _zod.z.object({})
  })
}).transform(() => ({
  type: 'stationary'
})), _zod.z.object({
  type: _zod.z.object({})
}).transform(value => ({
  type: 'unknown',
  rawChange: JSON.stringify(value)
}))]);
const esHistogramSchema = _zod.z.object({
  buckets: _zod.z.array(_zod.z.object({
    key_as_string: _zod.z.string(),
    doc_count: _zod.z.number()
  }).transform(bucket => ({
    timestamp: bucket.key_as_string,
    documentCount: bucket.doc_count
  })))
}).transform(({
  buckets
}) => buckets);
const esCategoryBucketSchema = _zod.z.object({
  key: _zod.z.string(),
  doc_count: _zod.z.number(),
  change: esChangePointSchema,
  histogram: esHistogramSchema
});
const isRareInHistogram = histogram => histogram.filter(bucket => bucket.documentCount > 0).length < histogram.length * rarityThreshold;
const findFirstNonZeroBucket = histogram => histogram.find(bucket => bucket.documentCount > 0);
const createCategorizationRequestParams = ({
  index,
  timeField,
  messageField,
  startTimestamp,
  endTimestamp,
  randomSampler,
  minDocsPerCategory = 0,
  additionalFilters = [],
  ignoredCategoryTerms = [],
  maxCategoriesCount = 1000
}) => {
  var _fixedIntervalDuratio;
  const startMoment = (0, _moment.default)(startTimestamp, isoTimestampFormat);
  const endMoment = (0, _moment.default)(endTimestamp, isoTimestampFormat);
  const fixedIntervalDuration = _calculateAuto.calculateAuto.atLeast(24, _moment.default.duration(endMoment.diff(startMoment)));
  const fixedIntervalSize = `${Math.ceil((_fixedIntervalDuratio = fixedIntervalDuration === null || fixedIntervalDuration === void 0 ? void 0 : fixedIntervalDuration.asMinutes()) !== null && _fixedIntervalDuratio !== void 0 ? _fixedIntervalDuratio : 1)}m`;
  return {
    index,
    size: 0,
    track_total_hits: false,
    query: createCategorizationQuery({
      messageField,
      timeField,
      startTimestamp,
      endTimestamp,
      additionalFilters,
      ignoredCategoryTerms
    }),
    aggs: randomSampler.wrap({
      histogram: {
        date_histogram: {
          field: '@timestamp',
          fixed_interval: fixedIntervalSize,
          extended_bounds: {
            min: startTimestamp,
            max: endTimestamp
          }
        }
      },
      categories: {
        categorize_text: {
          field: messageField,
          size: maxCategoriesCount,
          categorization_analyzer: {
            tokenizer: 'standard'
          },
          ...(minDocsPerCategory > 0 ? {
            min_doc_count: minDocsPerCategory
          } : {})
        },
        aggs: {
          histogram: {
            date_histogram: {
              field: timeField,
              fixed_interval: fixedIntervalSize,
              extended_bounds: {
                min: startTimestamp,
                max: endTimestamp
              }
            }
          },
          change: {
            // @ts-expect-error the official types don't support the change_point aggregation
            change_point: {
              buckets_path: 'histogram>_count'
            }
          }
        }
      }
    })
  };
};
exports.createCategorizationRequestParams = createCategorizationRequestParams;
const createCategoryQuery = messageField => categoryTerms => ({
  match: {
    [messageField]: {
      query: categoryTerms,
      operator: 'AND',
      fuzziness: 0,
      auto_generate_synonyms_phrase_query: false
    }
  }
});
exports.createCategoryQuery = createCategoryQuery;
const createCategorizationQuery = ({
  messageField,
  timeField,
  startTimestamp,
  endTimestamp,
  additionalFilters = [],
  ignoredCategoryTerms = []
}) => {
  return {
    bool: {
      filter: [{
        exists: {
          field: messageField
        }
      }, {
        range: {
          [timeField]: {
            gte: startTimestamp,
            lte: endTimestamp,
            format: 'strict_date_time'
          }
        }
      }, ...additionalFilters],
      must_not: ignoredCategoryTerms.map(createCategoryQuery(messageField))
    }
  };
};
exports.createCategorizationQuery = createCategorizationQuery;