"use strict";

var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.KnowledgeBaseService = exports.KnowledgeBaseEntryOperationType = void 0;
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
var _elasticsearch = require("@elastic/elasticsearch");
var _boom = require("@hapi/boom");
var _pLimit = _interopRequireDefault(require("p-limit"));
var _pRetry = _interopRequireDefault(require("p-retry"));
var _lodash = require("lodash");
var _gptTokenizer = require("gpt-tokenizer");
var _ = require("..");
var _types = require("../../../common/types");
var _get_access_query = require("../util/get_access_query");
var _get_category_query = require("../util/get_category_query");
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

function isAlreadyExistsError(error) {
  return error instanceof _elasticsearch.errors.ResponseError && (error.body.error.type === 'resource_not_found_exception' || error.body.error.type === 'status_exception');
}
function throwKnowledgeBaseNotReady(body) {
  throw (0, _boom.serverUnavailable)(`Knowledge base is not ready yet`, body);
}
let KnowledgeBaseEntryOperationType = exports.KnowledgeBaseEntryOperationType = /*#__PURE__*/function (KnowledgeBaseEntryOperationType) {
  KnowledgeBaseEntryOperationType["Index"] = "index";
  KnowledgeBaseEntryOperationType["Delete"] = "delete";
  return KnowledgeBaseEntryOperationType;
}({});
class KnowledgeBaseService {
  constructor(dependencies) {
    (0, _defineProperty2.default)(this, "hasSetup", false);
    (0, _defineProperty2.default)(this, "_queue", []);
    (0, _defineProperty2.default)(this, "setup", async () => {
      const elserModelId = await this.dependencies.getModelId();
      const retryOptions = {
        factor: 1,
        minTimeout: 10000,
        retries: 12
      };
      const installModel = async () => {
        this.dependencies.logger.info('Installing ELSER model');
        await this.dependencies.esClient.ml.putTrainedModel({
          model_id: elserModelId,
          input: {
            field_names: ['text_field']
          },
          // @ts-expect-error
          wait_for_completion: true
        }, {
          requestTimeout: '20m'
        });
        this.dependencies.logger.info('Finished installing ELSER model');
      };
      const getIsModelInstalled = async () => {
        var _getResponse$trained_;
        const getResponse = await this.dependencies.esClient.ml.getTrainedModels({
          model_id: elserModelId,
          include: 'definition_status'
        });
        this.dependencies.logger.debug('Model definition status:\n' + JSON.stringify(getResponse.trained_model_configs[0]));
        return Boolean((_getResponse$trained_ = getResponse.trained_model_configs[0]) === null || _getResponse$trained_ === void 0 ? void 0 : _getResponse$trained_.fully_defined);
      };
      await (0, _pRetry.default)(async () => {
        let isModelInstalled = false;
        try {
          isModelInstalled = await getIsModelInstalled();
        } catch (error) {
          if (isAlreadyExistsError(error)) {
            await installModel();
            isModelInstalled = await getIsModelInstalled();
          }
        }
        if (!isModelInstalled) {
          throwKnowledgeBaseNotReady({
            message: 'Model is not fully defined'
          });
        }
      }, retryOptions);
      try {
        await this.dependencies.esClient.ml.startTrainedModelDeployment({
          model_id: elserModelId,
          wait_for: 'fully_allocated'
        });
      } catch (error) {
        this.dependencies.logger.debug('Error starting model deployment');
        this.dependencies.logger.debug(error);
        if (!isAlreadyExistsError(error)) {
          throw error;
        }
      }
      await (0, _pRetry.default)(async () => {
        const response = await this.dependencies.esClient.ml.getTrainedModelsStats({
          model_id: elserModelId
        });
        const isReady = response.trained_model_stats.some(stats => {
          var _stats$deployment_sta;
          return ((_stats$deployment_sta = stats.deployment_stats) === null || _stats$deployment_sta === void 0 ? void 0 : _stats$deployment_sta.nodes).some(node => node.routing_state.routing_state === 'started');
        });
        if (isReady) {
          return Promise.resolve();
        }
        this.dependencies.logger.debug('Model is not allocated yet');
        this.dependencies.logger.debug(JSON.stringify(response));
        throw (0, _boom.gatewayTimeout)();
      }, retryOptions);
      this.dependencies.logger.info('Model is ready');
      this.ensureTaskScheduled();
    });
    (0, _defineProperty2.default)(this, "status", async () => {
      const elserModelId = await this.dependencies.getModelId();
      try {
        var _elserModelStats$depl, _elserModelStats$depl2;
        const modelStats = await this.dependencies.esClient.ml.getTrainedModelsStats({
          model_id: elserModelId
        });
        const elserModelStats = modelStats.trained_model_stats[0];
        const deploymentState = (_elserModelStats$depl = elserModelStats.deployment_stats) === null || _elserModelStats$depl === void 0 ? void 0 : _elserModelStats$depl.state;
        const allocationState = (_elserModelStats$depl2 = elserModelStats.deployment_stats) === null || _elserModelStats$depl2 === void 0 ? void 0 : _elserModelStats$depl2.allocation_status.state;
        return {
          ready: deploymentState === 'started' && allocationState === 'fully_allocated',
          deployment_state: deploymentState,
          allocation_state: allocationState,
          model_name: elserModelId
        };
      } catch (error) {
        return {
          error: error instanceof _elasticsearch.errors.ResponseError ? error.body.error : String(error),
          ready: false,
          model_name: elserModelId
        };
      }
    });
    (0, _defineProperty2.default)(this, "recall", async ({
      user,
      queries,
      categories,
      namespace,
      asCurrentUser
    }) => {
      this.dependencies.logger.debug(`Recalling entries from KB for queries: "${queries}"`);
      const modelId = await this.dependencies.getModelId();
      const [documentsFromKb, documentsFromConnectors] = await Promise.all([this.recallFromKnowledgeBase({
        user,
        queries,
        categories,
        namespace,
        modelId
      }).catch(error => {
        if (isAlreadyExistsError(error)) {
          throwKnowledgeBaseNotReady(error.body);
        }
        throw error;
      }), this.recallFromConnectors({
        asCurrentUser,
        queries,
        modelId
      }).catch(error => {
        this.dependencies.logger.debug('Error getting data from search indices');
        this.dependencies.logger.debug(error);
        return [];
      })]);
      const sortedEntries = (0, _lodash.orderBy)(documentsFromKb.concat(documentsFromConnectors), 'score', 'desc').slice(0, 20);
      const MAX_TOKENS = 4000;
      let tokenCount = 0;
      const returnedEntries = [];
      for (const entry of sortedEntries) {
        returnedEntries.push(entry);
        tokenCount += (0, _gptTokenizer.encode)(entry.text).length;
        if (tokenCount >= MAX_TOKENS) {
          break;
        }
      }
      const droppedEntries = sortedEntries.length - returnedEntries.length;
      if (droppedEntries > 0) {
        this.dependencies.logger.info(`Dropped ${droppedEntries} entries because of token limit`);
      }
      return {
        entries: returnedEntries
      };
    });
    (0, _defineProperty2.default)(this, "getInstructions", async (namespace, user) => {
      try {
        const response = await this.dependencies.esClient.search({
          index: this.dependencies.resources.aliases.kb,
          query: {
            bool: {
              must: [{
                term: {
                  'labels.category.keyword': {
                    value: 'instruction'
                  }
                }
              }],
              filter: (0, _get_access_query.getAccessQuery)({
                user,
                namespace
              })
            }
          },
          size: 500,
          _source: ['doc_id', 'text']
        });
        return response.hits.hits.map(hit => {
          var _hit$_source$doc_id, _hit$_source, _hit$_source$text, _hit$_source2;
          return {
            doc_id: (_hit$_source$doc_id = (_hit$_source = hit._source) === null || _hit$_source === void 0 ? void 0 : _hit$_source.doc_id) !== null && _hit$_source$doc_id !== void 0 ? _hit$_source$doc_id : '',
            text: (_hit$_source$text = (_hit$_source2 = hit._source) === null || _hit$_source2 === void 0 ? void 0 : _hit$_source2.text) !== null && _hit$_source$text !== void 0 ? _hit$_source$text : ''
          };
        });
      } catch (error) {
        this.dependencies.logger.error('Failed to load instructions from knowledge base');
        this.dependencies.logger.error(error);
        return [];
      }
    });
    (0, _defineProperty2.default)(this, "getEntries", async ({
      query,
      sortBy,
      sortDirection
    }) => {
      try {
        const response = await this.dependencies.esClient.search({
          index: this.dependencies.resources.aliases.kb,
          ...(query ? {
            query: {
              wildcard: {
                doc_id: {
                  value: `${query}*`
                }
              }
            }
          } : {}),
          sort: [{
            [String(sortBy)]: {
              order: sortDirection
            }
          }],
          size: 500,
          _source: {
            includes: ['doc_id', 'text', 'is_correction', 'labels', 'confidence', 'public', '@timestamp', 'role']
          }
        });
        return {
          entries: response.hits.hits.map(hit => {
            var _role;
            return {
              ...hit._source,
              role: (_role = hit._source.role) !== null && _role !== void 0 ? _role : _types.KnowledgeBaseEntryRole.UserEntry,
              score: hit._score,
              id: hit._id
            };
          })
        };
      } catch (error) {
        if (isAlreadyExistsError(error)) {
          throwKnowledgeBaseNotReady(error.body);
        }
        throw error;
      }
    });
    (0, _defineProperty2.default)(this, "addEntry", async ({
      entry: {
        id,
        ...document
      },
      user,
      namespace
    }) => {
      try {
        await this.dependencies.esClient.index({
          index: this.dependencies.resources.aliases.kb,
          id,
          document: {
            '@timestamp': new Date().toISOString(),
            ...document,
            user,
            namespace
          },
          pipeline: this.dependencies.resources.pipelines.kb,
          refresh: false
        });
      } catch (error) {
        if (error instanceof _elasticsearch.errors.ResponseError && error.body.error.type === 'status_exception') {
          throwKnowledgeBaseNotReady(error.body);
        }
        throw error;
      }
    });
    (0, _defineProperty2.default)(this, "addEntries", async ({
      operations
    }) => {
      this.dependencies.logger.info(`Starting import of ${operations.length} entries`);
      const limiter = (0, _pLimit.default)(5);
      await Promise.all(operations.map(operation => limiter(async () => {
        await this.processOperation(operation);
      })));
      this.dependencies.logger.info(`Completed import of ${operations.length} entries`);
    });
    (0, _defineProperty2.default)(this, "deleteEntry", async ({
      id
    }) => {
      try {
        await this.dependencies.esClient.delete({
          index: this.dependencies.resources.aliases.kb,
          id,
          refresh: 'wait_for'
        });
        return Promise.resolve();
      } catch (error) {
        if (isAlreadyExistsError(error)) {
          throwKnowledgeBaseNotReady(error.body);
        }
        throw error;
      }
    });
    this.dependencies = dependencies;
    this.ensureTaskScheduled();
  }
  ensureTaskScheduled() {
    this.dependencies.taskManagerStart.ensureScheduled({
      taskType: _.INDEX_QUEUED_DOCUMENTS_TASK_TYPE,
      id: _.INDEX_QUEUED_DOCUMENTS_TASK_ID,
      state: {},
      params: {},
      schedule: {
        interval: '1h'
      }
    }).then(() => {
      this.dependencies.logger.debug('Scheduled queue task');
      return this.dependencies.taskManagerStart.runSoon(_.INDEX_QUEUED_DOCUMENTS_TASK_ID);
    }).then(() => {
      this.dependencies.logger.debug('Queue task ran');
    }).catch(err => {
      this.dependencies.logger.error(`Failed to schedule queue task`);
      this.dependencies.logger.error(err);
    });
  }
  async processOperation(operation) {
    if (operation.type === KnowledgeBaseEntryOperationType.Delete) {
      await this.dependencies.esClient.deleteByQuery({
        index: this.dependencies.resources.aliases.kb,
        query: {
          bool: {
            filter: [...(operation.doc_id ? [{
              term: {
                _id: operation.doc_id
              }
            }] : []), ...(operation.labels ? (0, _lodash.map)(operation.labels, (value, key) => {
              return {
                term: {
                  [key]: value
                }
              };
            }) : [])]
          }
        }
      });
      return;
    }
    await this.addEntry({
      entry: operation.document
    });
  }
  async processQueue() {
    if (!this._queue.length) {
      return;
    }
    if (!(await this.status()).ready) {
      this.dependencies.logger.debug(`Bailing on queue task: KB is not ready yet`);
      return;
    }
    this.dependencies.logger.debug(`Processing queue`);
    this.hasSetup = true;
    this.dependencies.logger.info(`Processing ${this._queue.length} queue operations`);
    const limiter = (0, _pLimit.default)(5);
    const operations = this._queue.concat();
    await Promise.all(operations.map(operation => limiter(async () => {
      this._queue.splice(operations.indexOf(operation), 1);
      await this.processOperation(operation);
    })));
    this.dependencies.logger.info('Processed all queued operations');
  }
  queue(operations) {
    if (!operations.length) {
      return;
    }
    if (!this.hasSetup) {
      this._queue.push(...operations);
      return;
    }
    const limiter = (0, _pLimit.default)(5);
    const limitedFunctions = this._queue.map(operation => limiter(() => this.processOperation(operation)));
    Promise.all(limitedFunctions).catch(err => {
      this.dependencies.logger.error(`Failed to process all queued operations`);
      this.dependencies.logger.error(err);
    });
  }
  async recallFromKnowledgeBase({
    queries,
    categories,
    namespace,
    user,
    modelId
  }) {
    const query = {
      bool: {
        should: queries.map(text => ({
          text_expansion: {
            'ml.tokens': {
              model_text: text,
              model_id: modelId
            }
          }
        })),
        filter: [...(0, _get_access_query.getAccessQuery)({
          user,
          namespace
        }), ...(0, _get_category_query.getCategoryQuery)({
          categories
        })]
      }
    };
    const response = await this.dependencies.esClient.search({
      index: [this.dependencies.resources.aliases.kb],
      query,
      size: 20,
      _source: {
        includes: ['text', 'is_correction', 'labels']
      }
    });
    return response.hits.hits.map(hit => ({
      ...hit._source,
      score: hit._score,
      id: hit._id
    }));
  }
  async recallFromConnectors({
    queries,
    asCurrentUser,
    modelId
  }) {
    const ML_INFERENCE_PREFIX = 'ml.inference.';
    const fieldCaps = await asCurrentUser.fieldCaps({
      index: 'search*',
      fields: `${ML_INFERENCE_PREFIX}*`,
      allow_no_indices: true,
      types: ['sparse_vector'],
      filters: '-metadata,-parent'
    });
    const fieldsWithVectors = Object.keys(fieldCaps.fields).map(field => field.replace('_expanded.predicted_value', '').replace(ML_INFERENCE_PREFIX, ''));
    if (!fieldsWithVectors.length) {
      return [];
    }
    const esQueries = fieldsWithVectors.flatMap(field => {
      const vectorField = `${ML_INFERENCE_PREFIX}${field}_expanded.predicted_value`;
      const modelField = `${ML_INFERENCE_PREFIX}${field}_expanded.model_id`;
      return queries.map(query => {
        return {
          bool: {
            should: [{
              text_expansion: {
                [vectorField]: {
                  model_text: query,
                  model_id: modelId
                }
              }
            }],
            filter: [{
              term: {
                [modelField]: modelId
              }
            }]
          }
        };
      });
    });
    const response = await asCurrentUser.search({
      index: 'search-*',
      query: {
        bool: {
          should: esQueries
        }
      },
      size: 20,
      _source: {
        exclude: ['_*', 'ml*']
      }
    });
    return response.hits.hits.map(hit => ({
      text: JSON.stringify(hit._source),
      score: hit._score,
      is_correction: false,
      id: hit._id
    }));
  }
}
exports.KnowledgeBaseService = KnowledgeBaseService;