"use strict";

var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.MAX_CHUNK_CHAR_COUNT = exports.Importer = exports.IMPORT_RETRIES = void 0;
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
var _lodash = require("lodash");
var _moment = _interopRequireDefault(require("moment"));
var _i18n = require("@kbn/i18n");
var _mlIsPopulatedObject = require("@kbn/ml-is-populated-object");
var _constants = require("@kbn/file-upload-common/src/constants");
var _kibana_services = require("../kibana_services");
var _routes = require("./routes");
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

const CHUNK_SIZE = 5000;
const REDUCED_CHUNK_SIZE = 100;
const MAX_CHUNK_CHAR_COUNT = exports.MAX_CHUNK_CHAR_COUNT = 1000000;
const IMPORT_RETRIES = exports.IMPORT_RETRIES = 5;
const STRING_CHUNKS_MB = 100;
const DEFAULT_TIME_FIELD = '@timestamp';
class Importer {
  constructor() {
    (0, _defineProperty2.default)(this, "_docArray", []);
    (0, _defineProperty2.default)(this, "_chunkSize", CHUNK_SIZE);
    (0, _defineProperty2.default)(this, "_index", void 0);
    (0, _defineProperty2.default)(this, "_pipelines", []);
    (0, _defineProperty2.default)(this, "_timeFieldName", void 0);
    (0, _defineProperty2.default)(this, "_initialized", false);
  }
  initialized() {
    return this._initialized;
  }
  getIndex() {
    return this._index;
  }
  getTimeField() {
    return this._timeFieldName;
  }
  read(data) {
    const decoder = new TextDecoder();
    const size = STRING_CHUNKS_MB * _constants.MB;

    // chop the data up into 100MB chunks for processing.
    // if the chop produces a partial line at the end, a character "remainder" count
    // is returned which is used to roll the next chunk back that many chars so
    // it is included in the next chunk.
    const parts = Math.ceil(data.byteLength / size);
    let remainder = 0;
    for (let i = 0; i < parts; i++) {
      const byteArray = decoder.decode(data.slice(i * size - remainder, (i + 1) * size));
      const {
        success,
        docs,
        remainder: tempRemainder
      } = this._createDocs(byteArray, i === parts - 1);
      if (success) {
        this._docArray = this._docArray.concat(docs);
        remainder = tempRemainder;
      } else {
        return {
          success: false
        };
      }
    }
    return {
      success: true
    };
  }
  _initialize(index, mappings, pipelines) {
    for (let i = 0; i < pipelines.length; i++) {
      const pipeline = pipelines[i];
      if (pipeline !== undefined) {
        updatePipelineTimezone(pipeline);
        if (pipelineContainsSpecialProcessors(pipeline)) {
          // pipeline contains processors which we know are slow
          // so reduce the chunk size significantly to avoid timeouts
          this._chunkSize = REDUCED_CHUNK_SIZE;
        }
      }
      this._pipelines.push({
        id: `${index}-${i}-pipeline`,
        pipeline
      });
    }
    this._index = index;

    // if an @timestamp field has been added to the
    // mappings, use this field as the time field.
    // This relies on the field being populated by
    // the ingest pipeline on ingest
    this._timeFieldName = (0, _mlIsPopulatedObject.isPopulatedObject)(mappings.properties, [DEFAULT_TIME_FIELD]) ? DEFAULT_TIME_FIELD : undefined;
    this._initialized = true;
  }
  async initializeImport(index, settings, mappings, pipelines, existingIndex = false) {
    this._initialize(index, mappings, pipelines);
    return await (0, _routes.callInitializeImportRoute)({
      index,
      settings,
      mappings,
      ingestPipelines: this._pipelines,
      existingIndex
    });
  }
  async initializeWithoutCreate(index, mappings, pipelines) {
    this._initialize(index, mappings, pipelines);
  }
  async import(index, ingestPipelineId, setImportProgress) {
    if (!index) {
      return {
        success: false,
        error: _i18n.i18n.translate('xpack.fileUpload.import.noIndexSuppliedErrorMessage', {
          defaultMessage: 'No index supplied'
        })
      };
    }
    const chunks = createDocumentChunks(this._docArray, this._chunkSize);
    let success = true;
    const failures = [];
    let error;
    for (let i = 0; i < chunks.length; i++) {
      let retries = IMPORT_RETRIES;
      let resp = {
        success: false,
        failures: [],
        docCount: 0,
        index: '',
        pipelineId: ''
      };
      while (resp.success === false && retries > 0) {
        try {
          resp = await (0, _routes.callImportRoute)({
            index,
            ingestPipelineId,
            data: chunks[i]
          });
          if (retries < IMPORT_RETRIES) {
            // eslint-disable-next-line no-console
            console.log(`Retrying import ${IMPORT_RETRIES - retries}`);
          }
          retries--;
        } catch (err) {
          resp.success = false;
          resp.error = err;
          retries = 0;
        }
      }
      if (resp.success) {
        setImportProgress((i + 1) / chunks.length * 100);
      } else {
        // eslint-disable-next-line no-console
        console.error(resp);
        success = false;
        error = resp.error;
        populateFailures(resp, failures, i, this._chunkSize);
        break;
      }
      populateFailures(resp, failures, i, this._chunkSize);
    }
    const result = {
      success,
      failures,
      docCount: this._docArray.length
    };
    if (success) {
      setImportProgress(100);
    } else {
      result.error = error;
    }
    return result;
  }
  _getFirstReadDocs(count = 1) {
    const firstReadDocs = this._docArray.slice(0, count);
    return firstReadDocs.map(doc => typeof doc === 'string' ? JSON.parse(doc) : doc);
  }
  _getLastReadDocs(count = 1) {
    const lastReadDocs = this._docArray.slice(-count);
    return lastReadDocs.map(doc => typeof doc === 'string' ? JSON.parse(doc) : doc);
  }
  async previewIndexTimeRange() {
    const ingestPipeline = this._pipelines[0];
    if (this._initialized === false || (ingestPipeline === null || ingestPipeline === void 0 ? void 0 : ingestPipeline.pipeline) === undefined) {
      throw new Error('Import has not been initialized');
    }

    // take the first and last 10 docs from the file, to reduce the chance of getting
    // bad data or out of order data.
    const firstDocs = this._getFirstReadDocs(10);
    const lastDocs = this._getLastReadDocs(10);
    const body = JSON.stringify({
      docs: firstDocs.concat(lastDocs),
      pipeline: ingestPipeline.pipeline,
      timeField: this._timeFieldName
    });
    return await (0, _kibana_services.getHttp)().fetch({
      path: `/internal/file_upload/preview_index_time_range`,
      method: 'POST',
      version: '1',
      body
    });
  }
  async deletePipelines() {
    const ids = this._pipelines.filter(p => p.pipeline !== undefined).map(p => p.id);
    if (ids.length === 0) {
      return [];
    }
    return await (0, _kibana_services.getHttp)().fetch({
      path: `/internal/file_upload/remove_pipelines/${ids.join(',')}`,
      method: 'DELETE',
      version: '1'
    });
  }
}
exports.Importer = Importer;
function populateFailures(error, failures, chunkCount, chunkSize) {
  if (error.failures && error.failures.length) {
    // update the item value to include the chunk count
    // e.g. item 3 in chunk 2 is actually item 20003
    for (let f = 0; f < error.failures.length; f++) {
      const failure = error.failures[f];
      failure.item = failure.item + chunkSize * chunkCount;
    }
    failures.push(...error.failures);
  }
}

// The file structure endpoint sets the timezone to be {{ event.timezone }}
// as that's the variable Filebeat would send the client timezone in.
// In this data import function the UI is effectively performing the role of Filebeat,
// i.e. doing basic parsing, processing and conversion to JSON before forwarding to the ingest pipeline.
// But it's not sending every single field that Filebeat would add, so the ingest pipeline
// cannot look for a event.timezone variable in each input record.
// Therefore we need to replace {{ event.timezone }} with the actual browser timezone
function updatePipelineTimezone(ingestPipeline) {
  if (ingestPipeline !== undefined && ingestPipeline.processors && ingestPipeline.processors) {
    const dateProcessor = ingestPipeline.processors.find(p => p.date !== undefined && p.date.timezone === '{{ event.timezone }}');
    if (dateProcessor) {
      dateProcessor.date.timezone = _moment.default.tz.guess();
    }
  }
}
function createDocumentChunks(docArray, chunkSize) {
  if (chunkSize === 0) {
    return [docArray];
  }
  const chunks = [];
  // chop docArray into chunks
  const tempChunks = (0, _lodash.chunk)(docArray, chunkSize);

  // loop over tempChunks and check that the total character length
  // for each chunk is within the MAX_CHUNK_CHAR_COUNT.
  // if the length is too long, split the chunk into smaller chunks
  // based on how much larger it is than MAX_CHUNK_CHAR_COUNT
  // note, each document is a different size, so dividing by charCountOfDocs
  // only produces an average chunk size that should be smaller than the max length
  for (let i = 0; i < tempChunks.length; i++) {
    const docs = tempChunks[i];
    const numberOfDocs = docs.length;
    const charCountOfDocs = JSON.stringify(docs).length;
    if (charCountOfDocs > MAX_CHUNK_CHAR_COUNT) {
      // calculate new chunk size which should produce a chunk
      // who's length is on average around MAX_CHUNK_CHAR_COUNT
      const adjustedChunkSize = Math.floor(MAX_CHUNK_CHAR_COUNT / charCountOfDocs * numberOfDocs);
      const smallerChunks = (0, _lodash.chunk)(docs, adjustedChunkSize);
      chunks.push(...smallerChunks);
    } else {
      chunks.push(docs);
    }
  }
  return chunks;
}
function pipelineContainsSpecialProcessors(pipeline) {
  const findKeys = obj => {
    // return all nested keys in the pipeline
    const keys = [];
    Object.entries(obj).forEach(([key, val]) => {
      keys.push(key);
      if ((0, _mlIsPopulatedObject.isPopulatedObject)(val)) {
        keys.push(...findKeys(val));
      }
    });
    return keys;
  };
  const keys = findKeys(pipeline);
  const specialProcessors = ['inference', 'enrich'];
  return (0, _lodash.intersection)(specialProcessors, keys).length !== 0;
}