const ARABIC_UNICODE_RANGE = '\u0621-\u064A\u0660-\u0669';

const CHINESE_UNICODE_RANGE_1 = '\u4E00-\u9FCC\u3400-\u4DB5\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29';
const CHINESE_UNICODE_RANGE_2 = '\uD840-\uD868';
const CHINESE_UNICODE_RANGE_3 = '\uDC00-\uDFFF';
const CHINESE_UNICODE_RANGE_4 = '\uD869';
const CHINESE_UNICODE_RANGE_5 = '\uDC00-\uDED6\uDF00-\uDFFF';
const CHINESE_UNICODE_RANGE_6 = '\uD86A-\uD86C';
const CHINESE_UNICODE_RANGE_7 = '\uDC00-\uDFFF';
const CHINESE_UNICODE_RANGE_8 = '\uD86D';
const CHINESE_UNICODE_RANGE_9 = '\uDC00-\uDF34\uDF40-\uDFFF';
const CHINESE_UNICODE_RANGE_10 = '\uDC00-\uDC1D';
const XOR_CHINESE_UNICODE = `[${CHINESE_UNICODE_RANGE_1}]|[${CHINESE_UNICODE_RANGE_2}][${CHINESE_UNICODE_RANGE_3}]|${CHINESE_UNICODE_RANGE_4}[${CHINESE_UNICODE_RANGE_5}]|[${CHINESE_UNICODE_RANGE_6}][${CHINESE_UNICODE_RANGE_7}]|${CHINESE_UNICODE_RANGE_8}[${CHINESE_UNICODE_RANGE_9}]|[${CHINESE_UNICODE_RANGE_10}]`;
const AND_CHINESE_UNICODE = `${CHINESE_UNICODE_RANGE_1}${CHINESE_UNICODE_RANGE_2}${CHINESE_UNICODE_RANGE_3}${CHINESE_UNICODE_RANGE_4}${CHINESE_UNICODE_RANGE_5}${CHINESE_UNICODE_RANGE_6}${CHINESE_UNICODE_RANGE_7}${CHINESE_UNICODE_RANGE_8}${CHINESE_UNICODE_RANGE_9}${CHINESE_UNICODE_RANGE_10}`;

const INDIAN_UNICODE_RANGE = '\u0900-\u097F';
const JAPANESE_UNICODE_RANGE = '\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\uFF00-\uFF9F\u4E00-\u9FAF\u3400-\u4DBF';
const KOREAN_UNICODE_RANGE = '\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uAC00-\uD7AF\uD7B0-\uD7FF';
const RUSSIAN_UNICODE_RANGE = '\u0430-\u044F';
const UNICODE_SLICE_ERROR = '\uD835';

const SPECIAL_CHARACTERS = [
  { char: '&', entity: '&amp;' },
  { char: '<', entity: '&lt;' },
  { char: '>', entity: '&gt;' },
  { char: "'", entity: '&apos;' },
  { char: `"`, entity: '&quot;' },
];

export class TextUtils {
  /**
   * developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Unicode_character_class_escape
   * When you use a global flag (g), the RegExp object keeps track of the lastIndex where the previous match occurred.
   * With this, there is a problem where only the first time that you run the regex works as expected, the second time won't work.
   * This post explains the problem: https://dev.to/dvddpl/why-is-my-regex-working-intermittently-4f4g, so, to avoid this problem,
   * we need to use the regex with match instead of test.
   */
  public static REGEX_IS_EMOJI = /\p{Extended_Pictographic}/gu;

  public static REGEX_ONLY_REAL = /^(\d+|\d{1,3}(,\d{3})*)?(\.\d+)?$/;

  public static REGEX_SAFE_STRING = new RegExp('<\\/?[a-z][\\s\\S]*>', 'g');

  public static REGEX_HEX_COLOR = /^#([\dA-Fa-f]{6}|[\dA-Fa-f]{3})$/;

  public static REGEX_ARABIC_CHARACTERS = new RegExp(`[${ARABIC_UNICODE_RANGE}]`, 'g');
  public static REGEX_CHINESE_CHARACTERS = new RegExp(`${XOR_CHINESE_UNICODE}`, 'g');
  public static REGEX_INDIAN_CHARACTERS = new RegExp(`[${INDIAN_UNICODE_RANGE}]`, 'g');
  public static REGEX_JAPANESE_CHARACTERS = new RegExp(`[${JAPANESE_UNICODE_RANGE}]`, 'g');
  public static REGEX_KOREAN_CHARACTERS = new RegExp(`[${KOREAN_UNICODE_RANGE}]`, 'g');
  public static REGEX_RUSSIAN_CHARACTERS = new RegExp(`[${RUSSIAN_UNICODE_RANGE}]`, 'g');

  public static REGEX_UNICODE_SLICE_ERROR = new RegExp(`[${UNICODE_SLICE_ERROR}]`);

  // eslint-disable-next-line no-misleading-character-class
  public static EXCLUDE_OTHER_ALPHABETS_CHARACTERS = new RegExp(
    `[^${ARABIC_UNICODE_RANGE}${AND_CHINESE_UNICODE}${INDIAN_UNICODE_RANGE}${JAPANESE_UNICODE_RANGE}${KOREAN_UNICODE_RANGE}${RUSSIAN_UNICODE_RANGE}]`,
    'g',
  );

  /**
   * Transforms a string to a safe string by escaping certain unsafe chars by their corresponding HTML entity.
   *
   * @param unsafeText - The string to transform.
   * @param maxLength - The max length of the string.
   * @param excludedChars - The characters to exclude from escaping.
   *
   * @returns The safe string.
   *
   * @example
   * safeText('<script>alert("Hello, World!")</script>', 70) => '&lt;script&gt;alert(&quot;Hello, World!&quot;)&lt;/script&gt;'
   * safeText('<b>Hello, World!</b>', 70, ['<', '>']) => '<b>Hello, World!</b>'
   */
  public static safeText(unsafeText: string | number, maxLength = 30, excludedChars: string[] = []): string {
    let safeText = `${unsafeText}`;

    const filteredReplacements = SPECIAL_CHARACTERS.filter((replacement) => !excludedChars.includes(replacement.char));

    for (const replacement of filteredReplacements) {
      let regex;
      // Specific adjustment for the '&' character.
      // eslint-disable-next-line unicorn/prefer-ternary
      if (replacement.char === '&') {
        // Only replace '&' if it's not followed by a sequence that looks like an HTML entity.
        // This regex checks for '&' that are not followed by a series of alphanumeric characters and a semicolon.
        regex = new RegExp('&(?![a-zA-Z0-9]+;)', 'g');
      } else {
        // For other characters, use a simple global match.
        regex = new RegExp(replacement.char, 'g');
      }

      safeText = safeText.replace(regex, replacement.entity);
    }

    const safeTextSlice = safeText.slice(0, maxLength).trim();

    if (this.REGEX_UNICODE_SLICE_ERROR.test(safeTextSlice)) {
      // If more cases arise, the necessary logic will have to be added.
      return this.removeBoldCharacters(safeText).slice(0, maxLength).trim();
    }

    return safeTextSlice;
  }

  /**
   * Checks if the input text contains any special characters.
   *
   * This static method iterates through a predefined list of special characters, `SPECIAL_CHARACTERS`,
   * each defined with a `char` property representing the character or pattern to search for within the text.
   * It uses regular expressions to search for each special character within the input text.
   *
   * @param text - The string to be checked for special characters.
   * @returns `true` if any special characters are found in the text; otherwise, `false`.
   */

  public static containsSpecialHTMLEntities(text: string | number) {
    return SPECIAL_CHARACTERS.some((character) => {
      const regex = new RegExp(character.entity, 'g');
      return regex.test(`${text}`);
    });
  }

  /**
   * Determines if a string has unicode characters or not.
   *
   * @param text - The string to check.
   *
   *
   * @returns `true` in case the string has unicode characters, `false` otherwise.
   */
  public static hasEmojis(text: string): boolean {
    //eslint-disable-next-line unicorn/prefer-regexp-test
    return !!text.match(this.REGEX_IS_EMOJI);
  }

  /**
   * Removes emojis from a string.
   *
   * @param text - The string to remove the emojis from.
   *
   * @returns The string without emojis.
   */
  public static removeEmojis(text: string): string {
    return text.replace(this.REGEX_IS_EMOJI, '');
  }
  /**
   * Revert HTML entities to the original characters.
   *
   * @param text - The string to revert.
   *
   * @returns The string with the original characters.
   */
  public static revertHtmlEntities(text: string): string {
    return text.replace(/(&quot;)|(&#039;)/g, "'").replace(/&amp;/g, '&');
  }

  /**
   * Truncates a string to the given length and adds an ellipsis if necessary.
   *
   * @param text - The string to truncate.
   * @param maxLength - The maximum length of the string.
   * @param ellipsis - The ellipsis to add to the truncated string.
   *
   * @example
   * ellipsis('12345',4) // 1234...
   * ellipsis('12345',4,'') // 1234...
   * ellipsis('12345',4,'....') // 1234....
   *
   * @returns The truncated string.
   */
  public static ellipsis(text: string, maxLength: number, ellipsis = '...'): string {
    return text.length > maxLength ? `${text.slice(0, maxLength)}${ellipsis.length > 0 ? ellipsis : '...'}` : text;
  }

  public static removeNonUTFCharacters(searchedText: string) {
    return searchedText
      .replace(TextUtils.REGEX_ARABIC_CHARACTERS, '')
      .replace(TextUtils.REGEX_CHINESE_CHARACTERS, '')
      .replace(TextUtils.REGEX_INDIAN_CHARACTERS, '')
      .replace(TextUtils.REGEX_JAPANESE_CHARACTERS, '')
      .replace(TextUtils.REGEX_KOREAN_CHARACTERS, '')
      .replace(TextUtils.REGEX_RUSSIAN_CHARACTERS, '');
  }

  public static removeUTFCharacters(searchedText: string) {
    return searchedText.replace(TextUtils.EXCLUDE_OTHER_ALPHABETS_CHARACTERS, '');
  }

  /**
   * Remove special "bold" Unicode characters from a given string.
   *
   * This function takes an input string containing special Unicode characters
   * that appear as "bold" and returns a new string where these have been replaced
   * with their regular alphabet equivalents.
   *
   * @param {string} text - The string containing special Unicode characters.
   * @returns {string} - A new string with special "bold" Unicode characters replaced.
   */

  public static removeBoldCharacters(text: string) {
    const boldCharactersMapping: { [key: string]: string } = {
      '𝗔': 'A',
      '𝗕': 'B',
      '𝗖': 'C',
      '𝗗': 'D',
      '𝗘': 'E',
      '𝗙': 'F',
      '𝗚': 'G',
      '𝗛': 'H',
      '𝗜': 'I',
      '𝗝': 'J',
      '𝗞': 'K',
      '𝗟': 'L',
      '𝗠': 'M',
      '𝗡': 'N',
      '𝗢': 'O',
      '𝗣': 'P',
      '𝗤': 'Q',
      '𝗥': 'R',
      '𝗦': 'S',
      '𝗧': 'T',
      '𝗨': 'U',
      '𝗩': 'V',
      '𝗪': 'W',
      '𝗫': 'X',
      '𝗬': 'Y',
      '𝗭': 'Z',
      '𝗮': 'a',
      '𝗯': 'b',
      '𝗰': 'c',
      '𝗱': 'd',
      '𝗲': 'e',
      '𝗳': 'f',
      '𝗴': 'g',
      '𝗵': 'h',
      '𝗶': 'i',
      '𝗷': 'j',
      '𝗸': 'k',
      '𝗹': 'l',
      '𝗺': 'm',
      '𝗻': 'n',
      '𝗼': 'o',
      '𝗽': 'p',
      '𝗾': 'q',
      '𝗿': 'r',
      '𝘀': 's',
      '𝘁': 't',
      '𝘂': 'u',
      '𝘃': 'v',
      '𝘄': 'w',
      '𝘅': 'x',
      '𝘆': 'y',
      '𝘇': 'z',
    };
    let output = '';
    for (const char of text) {
      output += boldCharactersMapping[char] || char;
    }
    return output;
  }
}
