import FilterRule from "./filter-rule";
import {SearchResult} from "../components/webviewer";


class SmartFilterManager {
    private ruleMap: Map<string, FilterRule>
    private patternMap: Map<string, RegExp>
    //The number of characters before the match that we'll search for forbidden words. This is the value we use on the
    //desktop app and it seems to be working fine.
    public static BEFORE_WINDOW_LENGTH = 25;
    public static AFTER_WINDOW_LENGTH = 25;
    //The number of characters to check when we're looking for a page's title
    public static START_OF_PAGE_LENGTH = 50;

    constructor(patterns: { label: string, type: string, regex: RegExp }[]) {
        this.patternMap = new Map()
        for (const { label, regex } of patterns) {
            this.patternMap.set(label, regex);
        }
        this.ruleMap = new Map()
        const dateRule = new FilterRule()
        dateRule.wordsForbiddenBefore = ["study date", "data extraction", "study report", "dataset creation", "run by",
            "issue date", "date of the report", "run date", "extract date", "generation", "reporting period",
            "data cutoff", "protocol", "amend", "study initiation", "signoff", "administrative update", "crf status",
            "approved on", "final on", "collection date", "appendix", "section",
            "appendices", "published on"]
        //Filters out citations like (Smith 2016)
        dateRule.patternForbiddenBefore = /[\(\[]\w+,?\s+$/i
        dateRule.wordsForbiddenInPageTitle= ["references", "resume"]

        const patientIdRule = new FilterRule()
        patientIdRule.wordsForbiddenBefore = ["eudract", "births for", "during", "number of", "protocol", "kit #", "kit no", "container",
            "artifact id", "drug id no", "report #", "of subjects", "of patients", "per subject", "project no", "reference ID", "NDA"]
        patientIdRule.wordsForbiddenAfter = [" mg ", " mgs ",  "g/", "/ mm3"]
        //This is to filter out IDs in "reference sections". I'm not entirely sure what that means but this is copied over
        //from the desktop app.
        patientIdRule.patternForbiddenBefore = /\d{2,3}\)?:$/i
        patientIdRule.patternForbiddenAfter = /(^(?:\(\d{4}\))?\.)/i

        const ageRule = new FilterRule()
        ageRule.wordsForbiddenBefore = ["<",">",">=","<=","more than ","less than ","at least ","at most","≤","≥","\\u2264"]
        ageRule.wordsForbiddenAfter = ["age group", "years experience"]

        this.ruleMap.set('date', dateRule)
        this.ruleMap.set('patient_id', patientIdRule)
        this.ruleMap.set('age', ageRule)
    }

    public shouldFilter(searchResult: SearchResult, pageText: string): boolean {
        const textBefore = this.getTextBefore(searchResult, pageText)
        const textAfter = this.getTextAfter(searchResult, pageText)
        const startOfPageText = this.getStartOfPage(pageText)
        let shouldFilter = false;
        //TODO because categories are broken in PDFTron's Core code, we'll apply every rule to every search result for now.
        //Once that code is fixed those, we should only apply rules for the corresponding category.
        this.ruleMap.forEach((rule, category) => {
            if (rule.shouldFilter(searchResult.resultStr, textBefore, textAfter, startOfPageText)) {
                shouldFilter = true
            }
        });
        return shouldFilter
    }

    //Currently unused
    //This runs the pattern against the ambient string of the search result (containing the match with surrounding words).
    //That's so that the pattern can properly use lookbehinds or lookaheads. However, it does require the actual match to
    //be in the resultStr, to make sure we're not matching some words after the result.
    public patternMatchesResult(searchResult: {resultStr: string, ambientStr: string, resultStrStart: number, resultStrEnd: number}, pattern: RegExp) {
        const result = pattern.exec(searchResult.ambientStr);
        if (result !== null && result.index > searchResult.resultStrStart && result.index < searchResult.resultStrEnd) {
            return true
        }
        return false
    }

    public getTextBefore(searchResult: SearchResult, pageText: string): string {
        let textBefore = searchResult.ambientStr.substring(0, searchResult.resultStrStart)
        //If the before window is already long enough, no more work to do. This seems to not be the case but PDFTron might
        //change something in the future.
        if (textBefore.length >= SmartFilterManager.BEFORE_WINDOW_LENGTH) {
            return textBefore.substring(textBefore.length - SmartFilterManager.BEFORE_WINDOW_LENGTH) //cut off the start if we need to
        }

        //If it's too short, we'll use the page text to extend it.
        // We do this by looking for the ambient string in the page text. This will not work if the ambientStr occurs more than
        //once on a page, but it's long enough that I don't expect this to happen (if you're reading this because that did happen...sorry).
        const i = pageText.indexOf(searchResult.ambientStr)

        if (i != -1) {
            const startIndex = Math.max(0, i + searchResult.resultStrStart - SmartFilterManager.BEFORE_WINDOW_LENGTH)
            textBefore = pageText.substring(startIndex, i + searchResult.resultStrStart)
        } else {
            //There's a chance we can't find the ambient string in the page text if the order of words on the page text is different.
            console.log(`couldn't find ambient string in page text: ${searchResult.ambientStr}`)
        }
        return textBefore
    }

    public getTextAfter(searchResult: SearchResult, pageText: string): string {
        let textAfter = searchResult.ambientStr.substring(searchResult.resultStrEnd)
        if (textAfter.length >= SmartFilterManager.AFTER_WINDOW_LENGTH) {
            return textAfter.substring(0, SmartFilterManager.AFTER_WINDOW_LENGTH) //cut off the end if we need to
        }

        const i = pageText.indexOf(searchResult.ambientStr)
        if (i != -1) {
            const startIndex = searchResult.resultStrEnd
            const endIndex = Math.min(pageText.length, i + searchResult.resultStrEnd + SmartFilterManager.AFTER_WINDOW_LENGTH)
            textAfter = pageText.substring(startIndex, endIndex)
        } else {
            //There's a chance we can't find the ambient string in the page text if the order of words on the page text is different.
            console.log(`couldn't find ambient string in page text: ${searchResult.ambientStr}`)
        }
        return textAfter
    }

    public getStartOfPage(pageText: string) {
        return pageText.substring(0, SmartFilterManager.START_OF_PAGE_LENGTH)
    }
}

export default SmartFilterManager
