import { saveAs } from 'file-saver';

const entityMap = {
  "&amp;": "&",
  "&lt;": "<",
  "&gt;": ">",
  "&quot;": "\"",
  "&#39;": "'",
  "&nbsp;": " ",
  "&copy;": "©",
  "&reg;": "®",
  "&cent;": "¢",
  "&pound;": "£",
  "&yen;": "¥",
  "&euro;": "€",
  "&deg;": "°",
  "&plusmn;": "±",
  "&sect;": "§",
  "&times;": "×",
  "&divide;": "÷",
  "&mdash;": "—",
  "&ndash;": "–",
  "&hellip;": "…",
  "&trade;": "™",
  "&bull;": "•",
  "&ldquo;": "\"",
  "&rdquo;": "\"",
  "&lsquo;": "'",
  "&rsquo;": "'",
  "&laquo;": "«",
  "&raquo;": "»",
  "&acute;": "´",
  "&uml;": "¨",
};

export const replaceHtmlEntities = (content) => {
  if (!content) return content;
  return content.replace(/&(?:#\d+|#x[\da-f]+|[a-z]+);/gi, (entity) => {
    if (entity in entityMap) {
      return entityMap[entity];
    } else if (entity.startsWith('&#')) {
      const code = entity.slice(2, -1);
      return String.fromCharCode(Number(code));
    }
    return entity; // If unknown entity, leave it as is
  });
};

export const removeAfterFirstParagraph = (content) => {
  if (!content) return content;
  const firstParagraphEnd = content.indexOf('\n\n');
  return firstParagraphEnd !== -1 ? content.slice(0, firstParagraphEnd + 1) : content;
};

export const removeUserDefinedKeywords = (content, keywords, options) => {
  if (!content || !keywords || keywords.length === 0) {
    return content;
  }

  const replaceWith = options.replaceWith || '';
  const escapedKeywords = keywords.map(keyword => {
    return keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
  });

  const regex = new RegExp(`\\b(${escapedKeywords.join('|')})\\b`, 'gi');
  return content.replace(regex, replaceWith);
};

export const capitalizeNewSentences = (content) => {
  if (!content) return content;
  const sentences = content.split(/([.!?]\s+)/);
  return sentences.map((sentence, i) => i % 2 === 0 ? sentence.charAt(0).toUpperCase() + sentence.slice(1) : sentence).join('');
};

export const removeSymbolsAndSpacesFromStart = (content) => {
  if (!content) return content;
  const symbols = ',.!?:;-_()[]{}"\'';
  const lines = content.split('\n');
  return lines.map(line => line.replace(new RegExp(`^[${symbols}\\s]+`), '')).join('\n');
};

export const fixMisplacedPunctuation = (content) => {
  if (!content) return content;
  content = content.replace(/ ([,;.!?])/g, '$1');
  return content.replace(/([,;.!?])([^\s])/g, '$1 $2');
};

const stopWords = new Set([
  "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by",
  "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have",
  "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is",
  "isn't", "it", "it's", "its", "itself", "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours",
  "ourselves", "out", "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them",
  "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "wasn't", "we",
  "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "won't", "would",
  "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves"
]);

export const removeStopWords = (content) => {
  if (!content) return content;
  return content.split(' ').filter(word => !stopWords.has(word.toLowerCase())).join(' ');
};

export const normalizeText = (content) => {
  if (!content) return content;
  return content
    .toLowerCase()
    .replace(/[^a-z0-9\s]/g, ''); // Remove special characters
};

export const removeAfterFirstSentence = (content) => {
  if (!content) return content;
  const firstSentenceEnd = content.search(/[.!?]\s/);
  return firstSentenceEnd !== -1 ? content.slice(0, firstSentenceEnd + 1) : content;
};

export const normalizeUnicode = (content) => {
  if (!content) return content;

  // Step 1: Normalize the Unicode characters using NFKD (Normalization Form KD)
  const normalizedContent = content.normalize('NFKD');

  // Step 2: Remove diacritical marks (accents)
  const contentWithoutDiacritics = normalizedContent.replace(/[\u0300-\u036F]/g, '');

  // Step 3: Additional normalization steps (optional)
  // Convert various similar-looking characters to their canonical forms
  // For example, replacing full-width characters with their half-width equivalents
  const canonicalizedContent = contentWithoutDiacritics
    .replace(/[Ａ-Ｚａ-ｚ０-９]/g, (char) => String.fromCharCode(char.charCodeAt(0) - 0xFEE0))
    .replace(/[‐‑‒–—―]/g, '-')  // Normalize different types of dashes to a single hyphen
    .replace(/[‘’‚‛“”„‟]/g, '"') // Normalize different types of quotes to a single quote
    .replace(/…/g, '...');       // Replace ellipsis with three dots

  // Step 4: Normalize whitespace characters
  const finalContent = canonicalizedContent.replace(/\s+/g, ' ').trim();

  return finalContent;
};

export const processFileContent = (content, options) => {
  if (!content) return content;

  if (options.normalizeUnicode) {
    content = normalizeUnicode(content);
  }
  if (options.keepFirstSentence) {
    content = removeAfterFirstSentence(content);
  }
  if (options.normalizeText) {
    content = normalizeText(content);
  }
  if (options.addPrefix && options.prefixText) {
    content = options.prefixText + ' ' + content;
  }
  if (options.addSuffix && options.suffixText) {
    content = content + ' ' + options.suffixText;
  }
  if (options.replaceHtmlEntities) {
    content = replaceHtmlEntities(content);
  }
  if (options.keepFirstParagraph) {
    content = removeAfterFirstParagraph(content);
  }
  if (options.capitalizeSentences) {
    content = capitalizeNewSentences(content);
  }
  if (options.fixPunctuation) {
    content = fixMisplacedPunctuation(content);
  }
  if (options.removeStopWords) {
    content = removeStopWords(content);
  }
  if (options.batchRemoveKeywords && options.keywords && options.keywords.length > 0) {
    content = removeUserDefinedKeywords(content, options.keywords, options);
  }
  if (options.removeSymbolsAndSpacesFromStart) {
    content = removeSymbolsAndSpacesFromStart(content);
  }
  if (options.normalizeUnicode) {
    content = normalizeUnicode(content);
  }

  // Execute custom script if provided
  if (options.customScript) {
    try {
      const scriptMatch = options.customScript.match(/<start>([\s\S]*?)<end>/);
      if (scriptMatch) {
        const scriptContent = scriptMatch[1].trim();
        const customFunction = new Function('content', `
          if (typeof content !== 'string') {
            throw new Error('Content must be a string');
          }
          return (${scriptContent})(content);
        `);
        content = customFunction(content);
      } else {
        throw new Error('Custom script format is incorrect.');
      }
    } catch (error) {
      alert('Error executing custom script. Please check the console for more details.');
    }
  }

  return content;
};

export const convertToCSV = async (files, options) => {
  const processedFiles = files.map(file => {
    const processedContent = processFileContent(file.content, options);
    return {
      ...file,
      content: processedContent
    };
  });

  const csvContent = processedFiles.map(file => file.content.replace(/\n/g, ' ')).join('\n');
  const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });

  return processedFiles;
};
