import Tesseract from 'tesseract.js'
import {
  processImageForReceiver,
  processImageForSender,
  changeImageToGrey
} from './imageProcessing'
import { checkAndSeparateDates, truncateConversation, allDateRegex } from './messagesProcessing'

import * as pdfjsLib from 'pdfjs-dist'
pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.min.mjs`

const checkAndRemoveExtra = (messagesReceiver) => {
  const cleanMessages = messagesReceiver.map(messageObj => {
    const { text } = messageObj

    // Loop through each regex and try to find a match
    for (const regex of allDateRegex) {
      const match = text.match(regex)

      if (match) {
        // If a match is found, remove everything from the match onward
        const matchIndex = text.indexOf(match[0])
        if (matchIndex !== -1) {
          return {
            ...messageObj,
            text: text.substring(0, matchIndex).trim() // Update the text field
          }
        }
      }
    }

    // Return the original message if no date match is found
    return messageObj
  })

  return cleanMessages
}

const getMessagesFromImage = (words) => {
  if ( !words || words.length === 0 ) return []
  
  const Messages = []

  let currentMessage = {
    text: '',
    x: words[0].baseline.x0,
    y: words[0].baseline.y0,
    width: words[0].baseline.x1 - words[0].baseline.x0,
    height: words[0].baseline.y1 - words[0].baseline.y0
  }

  words.forEach((word, index) => {
    if (index === 0) {
      // Start with the first word
      currentMessage.text = word.text
    } else {
      // Check if the difference in y positions indicates a new message
      const yDifference = word.baseline.y0 - words[index - 1].baseline.y0
      if (yDifference > 70) {
        // Finish the current message and start a new one
        Messages.push(currentMessage)
        currentMessage = {
          text: word.text,
          x: word.baseline.x0,
          y: word.baseline.y0,
          width: word.baseline.x1 - word.baseline.x0,
          height: word.baseline.y1 - word.baseline.y0,
        }
      } else {
        // Continue adding words to the current message
        currentMessage.text += ' ' + word.text
        // Adjust the bounding box if needed
        currentMessage.width = Math.max(currentMessage.width, word.baseline.x1 - currentMessage.x)
        currentMessage.height = Math.max(currentMessage.height, word.baseline.y1 - currentMessage.y)
      }
    }
  })

  // Push the last message
  Messages.push(currentMessage)

  return Messages
}

const extractTextWithTesseract = async (setLoadingDisplay, processConversation, receiverImage, senderImage) => {
  console.log('Using OCR to find text on the processed image and the original image...')

  setLoadingDisplay(
    (prevState) => ({
      ...prevState,
      message: 'Extraction des messages depuis la capture d\'écran...'
    })
  )

  let filteredMessagesReceiver
  let filteredMessagesSender
  let result
  
  // get messages from each image (one of each side)
  const resultReceiver = await Tesseract.recognize(receiverImage, 'fra')
  const resultSender = result = await Tesseract.recognize(senderImage, 'fra')

  const { words: wordsReceiver } = resultReceiver.data
  const { words: wordsSender } = resultSender.data

  const messagesReceiver = getMessagesFromImage(wordsReceiver)
  const messagesSender = getMessagesFromImage(wordsSender)

  filteredMessagesReceiver = messagesReceiver.filter(message => message.x < 180)
  filteredMessagesSender = messagesSender.filter(message => message.x >= 180)

  // perform first clean for dates
  const messagesReceiverClean = checkAndRemoveExtra(filteredMessagesReceiver)
  const messagesSenderClean = checkAndSeparateDates(filteredMessagesSender)

  // group all messages in the right order
  let allMessages = [...messagesSenderClean, ...messagesReceiverClean]
  allMessages.sort((a, b) => a.y - b.y)

  // perform another quick clean
  allMessages = allMessages.filter(message => {
    const lowerCaseText = message.text.trim().toLowerCase()
    return !['distribu', 'delivered', 'lu le', 'read on'].some(unwanted => lowerCaseText.startsWith(unwanted))
  })

  const truncatedAllMessages = truncateConversation(allMessages) // Filter messages to keep only those before a "+ " message

  // final data (matching with pdf extraction)
  const extractedData = [{
    pageNumber: 1,
    viewport: { width: result.data.width, height: result.data.height },
    textItems: truncatedAllMessages.map(message => ({
      str: message.text,
      transform: [1, 0, 0, 1, message.x, message.y],
      width: message.width,
      height: message.height
    }))
  }]

  return extractedData
}

const handleImageUploadWithTesseract = async (originalImageData) => {
  const processedImageDataForReceiver = await processImageForReceiver(originalImageData)
  const processedImageDataForSender = await processImageForSender(originalImageData)
  const processedImageDataForSender2 = await changeImageToGrey(processedImageDataForSender)

  await extractTextWithTesseract(processedImageDataForReceiver, processedImageDataForSender2)
}

export default handleImageUploadWithTesseract