"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.RAGApplicationBuilder = void 0;
const base_loader_1 = require("./interfaces/base-loader");
const base_model_1 = require("./interfaces/base-model");
const rag_tools_1 = require("./rag-tools");
class RAGApplicationBuilder {
    constructor() {
        this.loaders = [];
        this.temperature = 0.1;
        this.searchResultCount = 30;
        this.model = null;
        this.systemMessage = `You are a helpful human like chat bot. Use relevant provided context and chat history to answer the query at the end. Answer in full.
        If you don't know the answer, just say that you don't know, don't try to make up an answer.

        Do not use words like context or training data when responding. You can say you do not have all the information but do not indicate that you are not a reliable source.`;
        this.storeConversationsToDefaultThread = true;
        this.embeddingRelevanceCutOff = 0;
        //this.store = new MemoryStore();
    }
    /**
     * The `build` function creates a new `RAGApplication` entity and initializes it asynchronously based on provided parameters.
     * @returns An instance of the `RAGApplication` class after it has been initialized asynchronously.
     */
    async build() {
        if (!this.getEmbeddingModel())
            throw new Error("Embedding model must be set!");
        base_loader_1.BaseLoader.setCache(this.store);
        //BaseModel.setStore(this.store);
        this.systemMessage = (0, rag_tools_1.cleanString)(this.getSystemMessage());
        await this.embeddingModel.init();
        base_model_1.BaseModel.setDefaultTemperature(this.getTemperature());
        //this.loaders = this.getLoaders();
        if (this.model) {
            await this.model.init();
            await this.model.build(this.configObj, this.tenantId, this.embeddingModel, this.vectorDatabase);
        }
        this.vectorDatabaseDimension = await this.embeddingModel.getDimensions();
        await this.vectorDatabase.init({ dimensions: this.vectorDatabaseDimension });
        if (this.store) {
            await this.store.init();
        }
        return this;
    }
    getVectorDatabaseDimension() {
        return this.vectorDatabaseDimension;
    }
    /**
     * The function `getEmbeddings` retrieves embeddings for a query, performs similarity search,
     * filters and sorts the results based on relevance score, and returns a subset of the top results.
     * @param {string} cleanQuery - The `cleanQuery` parameter is a string that represents the query
     * input after it has been cleaned or processed to remove any unnecessary characters, symbols, or
     * noise. This clean query is then used to generate embeddings for similarity search.
     * @returns The `getEmbeddings` function returns a filtered and sorted array of search results based
     * on the similarity score of the query embedded in the cleanQuery string. The results are filtered
     * based on a relevance cutoff value, sorted in descending order of score, and then sliced to return
     * only the number of results specified by the `searchResultCount` property.
     */
    async getEmbeddings(cleanQuery, limitsPerDoc, type) {
        const queryEmbedded = await this.embeddingModel.embedQuery(cleanQuery);
        const unfilteredResultSet = await this.vectorDatabase.similaritySearch(queryEmbedded, this.searchResultCount + 10, limitsPerDoc, type);
        this.debug(`Query resulted in ${unfilteredResultSet.length} chunks before filteration...`);
        return unfilteredResultSet
            .filter((result) => result.score > this.embeddingRelevanceCutOff)
            .sort((a, b) => b.score - a.score)
            .slice(0, this.searchResultCount);
    }
    async getEmbeddingsUniqueIDs(type, returnType) {
        return await this.vectorDatabase.getEmbeddingsUniqueIDs(type, returnType);
    }
    /**
     * The `search` function retrieves the unique embeddings for a given query without calling a LLM.
     * @param {string} query - The `query` parameter is a string that represents the input query that
     * needs to be processed.
     * @returns An array of unique page content items / chunks.
     */
    async search(query, limitsPerDoc) {
        const cleanQuery = (0, rag_tools_1.cleanString)(query);
        const rawContext = await this.getEmbeddings(cleanQuery, limitsPerDoc, "doc");
        return [...new Map(rawContext.map((item) => [item.pageContent, item])).values()];
    }
    /**
     * This function takes a user query, retrieves relevant context, identifies unique sources, and
     * returns the query result along with the list of sources.
     * @param {string} userQuery - The `userQuery` parameter is a string that represents the query
     * input provided by the user. It is used as input to retrieve context and ultimately generate a
     * result based on the query.
     * @param [options] - The `options` parameter in the `query` function is an optional object that
     * can have the following properties:
     * - conversationId - The `conversationId` parameter in the `query` method is an
     * optional parameter that represents the unique identifier for a conversation. It allows you to
     * track and associate the query with a specific conversation thread if needed. If provided, it can be
     * used to maintain context or history related to the conversation.
     * - customContext - You can pass in custom context from your own RAG stack. Passing.
     * your own context will disable the inbuilt RAG retrieval for that specific query
     * @returns The `query` method returns a Promise that resolves to an object with two properties:
     * `result` and `sources`. The `result` property is a string representing the result of querying
     * the LLM model with the provided query template, user query, context, and conversation history. The
     * `sources` property is an array of strings representing unique sources used to generate the LLM response.
     */
    async query(context, callback) {
        if (!this.model) {
            throw new Error("LLM Not set; query method not available");
        }
        return this.model.query(context, callback);
    }
    async ask(lang, sessionId, message, messages, chatHistory, callback, wsStatus) {
        if (!this.model) {
            throw new Error("LLM Not set; query method not available");
        }
        return this.model.ask(lang, sessionId, message, messages, chatHistory, callback, wsStatus);
    }
    /**
     * The function setVectorDatabase sets a BaseVectorDatabase object
     * @param {BaseVectorDatabase} vectorDatabase - The `vectorDatabase` parameter is an instance of the `BaseVectorDatabase` class, which
     * is used to store vectors in a database.
     * @returns The `this` object is being returned, which allows for method chaining.
     */
    setVectorDatabase(vectorDatabase) {
        this.vectorDatabase = vectorDatabase;
        return this;
    }
    setConfigObj(obj) {
        this.configObj = obj;
        return this;
    }
    setTenantId(tenantId) {
        this.tenantId = tenantId;
        return this;
    }
    setEmbeddingModel(embeddingModel) {
        this.embeddingModel = embeddingModel;
        return this;
    }
    setModel(model) {
        this.model = model;
        return this;
    }
    setStore(store) {
        this.store = store;
        return this;
    }
    setTemperature(temperature) {
        this.temperature = temperature;
        if (this.model)
            this.setModel(this.model);
        return this;
    }
    setSystemMessage(systemMessage) {
        this.systemMessage = systemMessage;
        return this;
    }
    setEmbeddingRelevanceCutOff(embeddingRelevanceCutOff) {
        this.embeddingRelevanceCutOff = embeddingRelevanceCutOff;
        return this;
    }
    /**
     * The function `_addLoader` asynchronously adds a loader, processes its chunks, and handles
     * incremental loading if supported by the loader.
     * @param {BaseLoader} loader - The `loader` parameter in the `_addLoader` method is an instance of the
     * `BaseLoader` class.
     * @returns The function `_addLoader` returns an object with the following properties:
     * - `entriesAdded`: Number of new entries added during the loader operation
     * - `uniqueId`: Unique identifier of the loader
     * - `loaderType`: Name of the loader's constructor class
     */
    async addLoader(loader, forceReload) {
        const uniqueId = loader.getUniqueId();
        let type = loader.getType();
        this.debug("Exploring loader", uniqueId);
        if (this.model)
            loader.injectModel(this.model);
        if (this.store && (await this.store.hasLoaderMetadata(uniqueId))) {
            if (forceReload) {
                const { chunksProcessed } = await this.store.getLoaderMetadata(uniqueId);
                this.debug(`Loader previously run but forceReload set! Deleting previous ${chunksProcessed} keys...`, uniqueId);
                this.loaders = this.loaders.filter((x) => x.getUniqueId() != loader.getUniqueId());
                if (chunksProcessed > 0)
                    await this.deleteLoader(uniqueId);
            }
            else {
                this.debug("Loader previously run. Skipping...", uniqueId);
                return { entriesAdded: 0, uniqueId, loaderType: loader.constructor.name };
            }
        }
        await loader.init();
        const chunks = await loader.getChunks();
        this.debug("Chunks generator received", uniqueId);
        const { newInserts } = await this.batchLoadChunks(type, uniqueId, chunks);
        this.debug(`Add loader completed with ${newInserts} new entries for`, uniqueId);
        if (loader.canIncrementallyLoad) {
            this.debug(`Registering incremental loader`, uniqueId);
            loader.on("incrementalChunkAvailable", async (incrementalGenerator) => {
                await this.incrementalLoader(type, uniqueId, incrementalGenerator);
            });
        }
        this.loaders.push(loader);
        this.debug(`Add loader ${uniqueId} wrap up done`);
        return { entriesAdded: newInserts, uniqueId, loaderType: loader.constructor.name };
    }
    debug(...msg) {
        console.log(msg);
    }
    /**
     * The `incrementalLoader` function asynchronously processes incremental chunks for a loader.
     * @param {string} uniqueId - The `uniqueId` parameter is a string that serves as an identifier for
     * the loader.
     * @param incrementalGenerator - The `incrementalGenerator` parameter is an asynchronous generator
     * function that yields `LoaderChunk` objects. It is used to incrementally load chunks of data for a specific loader
     */
    async incrementalLoader(type, uniqueId, incrementalGenerator) {
        this.debug(`incrementalChunkAvailable for loader`, uniqueId);
        const { newInserts } = await this.batchLoadChunks(type, uniqueId, incrementalGenerator);
        this.debug(`${newInserts} new incrementalChunks processed`, uniqueId);
    }
    /**
     * The function `deleteLoader` deletes embeddings from a loader after confirming the action.
     * @param {string} uniqueLoaderId - The `uniqueLoaderId` parameter is a string that represents the
     * identifier of the loader that you want to delete.
     * @returns The `deleteLoader` method returns a boolean value indicating the success of the operation.
     */
    async deleteLoader(uniqueLoaderId) {
        const deleteResult = await this.vectorDatabase.deleteKeys(uniqueLoaderId);
        if (this.store && deleteResult)
            await this.store.deleteLoaderMetadataAndCustomValues(uniqueLoaderId);
        this.loaders = this.loaders.filter((x) => x.getUniqueId() != uniqueLoaderId);
        return deleteResult;
    }
    /**
     * The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
     * an increment ID.
     * @param {string} loaderUniqueId - A unique identifier for the loader.
     * @param {number} incrementId - The `incrementId` parameter is a number that represents the
     * increment value used to generate a unique chunk identifier.
     * @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
     * `incrementId`.
     */
    getChunkUniqueId(loaderUniqueId, incrementId) {
        return `${loaderUniqueId}_${incrementId}`;
    }
    /**
     * The function `batchLoadChunks` processes chunks of data in batches and formats them for insertion.
     * @param {string} uniqueId - The `uniqueId` parameter is a string that represents a unique
     * identifier for loader being processed.
     * @param generator - The `incrementalGenerator` parameter in the `batchLoadChunks`
     * function is an asynchronous generator that yields `LoaderChunk` objects.
     * @returns The `batchLoadChunks` function returns an object with two properties:
     * 1. `newInserts`: The total number of new inserts made during the batch loading process.
     * 2. `formattedChunks`: An array containing the formatted chunks that were processed during the
     * batch loading process.
     */
    async batchLoadChunks(type, uniqueId, generator) {
        let i = 0, batchSize = 0, newInserts = 0, formattedChunks = [];
        for await (const chunk of generator) {
            batchSize++;
            const formattedChunk = {
                pageContent: chunk.pageContent,
                metadata: {
                    ...chunk.metadata,
                    uniqueLoaderId: uniqueId,
                    id: this.getChunkUniqueId(uniqueId, i++),
                },
            };
            formattedChunks.push(formattedChunk);
            if (batchSize % rag_tools_1.DEFAULT_INSERT_BATCH_SIZE === 0) {
                newInserts += await this.batchLoadEmbeddings(type, uniqueId, formattedChunks);
                formattedChunks = [];
                batchSize = 0;
            }
        }
        newInserts += await this.batchLoadEmbeddings(type, uniqueId, formattedChunks);
        return { newInserts, formattedChunks };
    }
    /**
     * The function `batchLoadEmbeddings` asynchronously loads embeddings for formatted chunks and
     * inserts them into a vector database.
     * @param {string} loaderUniqueId - The `loaderUniqueId` parameter is a unique identifier for the
     * loader that is used to load embeddings.
     * @param {Chunk[]} formattedChunks - `formattedChunks` is an array of Chunk objects that contain
     * page content, metadata, and other information needed for processing. The `batchLoadEmbeddings`
     * function processes these chunks in batches to obtain embeddings for each chunk and then inserts
     * them into a database for further use.
     * @returns The function `batchLoadEmbeddings` returns the result of inserting the embed chunks
     * into the vector database.
     */
    async batchLoadEmbeddings(type, loaderUniqueId, formattedChunks) {
        if (formattedChunks.length === 0)
            return 0;
        this.debug(`Processing batch (size ${formattedChunks.length}) for loader ${loaderUniqueId}`);
        const embeddings = await this.embedChunks(formattedChunks);
        this.debug(`Batch embeddings (size ${formattedChunks.length}) obtained for loader ${loaderUniqueId}`);
        const embedChunks = formattedChunks.map((chunk, index) => {
            return {
                pageContent: chunk.pageContent,
                vector: embeddings[index],
                metadata: chunk.metadata,
            };
        });
        this.debug(`Inserting chunks for loader ${loaderUniqueId} to vectorDatabase`);
        return this.vectorDatabase.insertChunks(type, embedChunks);
    }
    /**
     * The function `embedChunks` embeds the content of chunks by invoking the planned embedding model.
     * @param {Pick<Chunk, 'pageContent'>[]} chunks - The `chunks` parameter is an array of objects
     * that have a property `pageContent` which contains text content for each chunk.
     * @returns The `embedChunks` function is returning the embedded vectors for the chunks.
     */
    async embedChunks(chunks) {
        const texts = chunks.map(({ pageContent }) => pageContent);
        return this.embeddingModel.embedDocuments(texts);
    }
    // addLoader(loader: BaseLoader) {
    //   this.loaders.push(loader);
    //   return this;
    // }
    /**
     * The setSearchResultCount function sets the search result count
     * @param {number} searchResultCount - The `searchResultCount` parameter
     * represents the count of search results picked up from the vector store per query.
     * @returns The `this` object is being returned, which allows for method chaining.
     */
    setSearchResultCount(searchResultCount) {
        this.searchResultCount = searchResultCount;
        return this;
    }
    /**
     * The setParamStoreConversationsToDefaultThread configures whether the conversation hisotry for queries made
     * without a conversationId passed should be stored in the default thread. This is set to True by default.
     */
    setParamStoreConversationsToDefaultThread(storeConversationsToDefaultThread) {
        this.storeConversationsToDefaultThread = storeConversationsToDefaultThread;
        return this;
    }
    //getLoaders() {
    //  return this.loaders;
    //}
    /**
     * The function `getLoaders` asynchronously retrieves a list of loaders loaded so far. This includes
     * internal loaders that were loaded by other loaders. It requires that cache is enabled to work.
     * @returns The list of loaders with some metadata about them.
     */
    async getLoaders() {
        if (!this.store)
            return [];
        return this.store.getAllLoaderMetadata();
    }
    getSearchResultCount() {
        return this.searchResultCount;
    }
    getVectorDatabase() {
        return this.vectorDatabase;
    }
    getTemperature() {
        return this.temperature;
    }
    getEmbeddingRelevanceCutOff() {
        return this.embeddingRelevanceCutOff;
    }
    getSystemMessage() {
        return this.systemMessage;
    }
    getStore() {
        return this.store;
    }
    getEmbeddingModel() {
        return this.embeddingModel;
    }
    getModel() {
        return this.model;
    }
    getParamStoreConversationsToDefaultThread() {
        return this.storeConversationsToDefaultThread;
    }
}
exports.RAGApplicationBuilder = RAGApplicationBuilder;
