"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
    return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.PptLoader = void 0;
const textsplitters_1 = require("@langchain/textsplitters");
const office_text_extractor_1 = require("office-text-extractor");
const md5_1 = __importDefault(require("md5"));
const base_loader_1 = require("../interfaces/base-loader");
const rag_tools_1 = require("../rag-tools");
class PptLoader extends base_loader_1.BaseLoader {
    constructor({ type, source, filePathOrUrl, chunkOverlap, chunkSize }) {
        super(type, source, `ppt_${(0, md5_1.default)(filePathOrUrl)}`, chunkSize ?? 1000, chunkOverlap ?? 0);
        this.filePathOrUrl = filePathOrUrl;
        this.isUrl = (0, rag_tools_1.isValidURL)(filePathOrUrl) ? true : false;
    }
    async *getUnfilteredChunks() {
        const chunker = new textsplitters_1.RecursiveCharacterTextSplitter({
            chunkSize: this.chunkSize,
            chunkOverlap: this.chunkOverlap,
        });
        const extractor = (0, office_text_extractor_1.getTextExtractor)();
        const docxParsed = await extractor.extractText({
            input: this.filePathOrUrl,
            type: this.isUrl ? "url" : "file",
        });
        const chunks = await chunker.splitText((0, rag_tools_1.cleanString)(docxParsed));
        for (const chunk of chunks) {
            yield {
                pageContent: chunk,
                metadata: {
                    type: "PptLoader",
                    source: this.filePathOrUrl,
                },
            };
        }
    }
}
exports.PptLoader = PptLoader;
