import { atom, useAtomValue } from 'jotai'
import { throttle } from 'lodash'
import { useEffect } from 'react'
import { useLiveSessionContext } from 'sierra-client/components/liveV2/contexts/live-session-data'
import { useVideoCallService } from 'sierra-client/components/liveV2/live-context'
import { useIsDebugMode } from 'sierra-client/hooks/use-is-debug-mode'
import { logger } from 'sierra-client/logger/logger'
import { useSelector } from 'sierra-client/state/hooks'
import {
  selectAudioState,
  selectCallModeState,
  selectCurrentMicrophoneId,
} from 'sierra-client/state/live/selectors'
import { z } from 'zod'

export const ModelType = z.enum(['fast', 'accurate'])
export type ModelType = z.infer<typeof ModelType>

const Codec = z.enum(['opus', 'wav', 'mp4'])
type Codec = z.infer<typeof Codec>

export const LanguageBehavior = z.enum(['automatic single language', 'automatic multiple languages'])
export type LanguageBehavior = z.infer<typeof LanguageBehavior>

type TranscriptionSettings = {
  modelType: ModelType
  languageBehavior: LanguageBehavior
}

export const transcriptionsSettingsAtom = atom<TranscriptionSettings>({
  modelType: 'accurate',
  languageBehavior: 'automatic multiple languages',
})

const MAX_GLADIA_AUDIO_DURATION = 120_000
const AUDIO_TIME_SLICE_MS = 500
const MAX_MESSAGES_PER_FLUSH = Math.floor(MAX_GLADIA_AUDIO_DURATION / AUDIO_TIME_SLICE_MS) - 40

const throttledWarning = throttle((message: string) => logger.warn(message), 5000)

const safeSocketCloseCodes = [1000, 10001, 1005, 1006]
class ReconnectingWebSocket {
  constructor(
    private options: {
      token: string
      modelType: ModelType
      languageBehavior: LanguageBehavior
      codec: Codec
      debug: boolean
    }
  ) {}
  private socket: WebSocket | null = null
  private messageBuffer: Blob[] = []
  private retryCount = 0

  start(): void {
    this.socket?.close()
    const isDevMode = process.env.NODE_ENV === 'development'
    const args = `?token=${this.options.token}&modelType=${this.options.modelType}&languageBehavior=${this.options.languageBehavior}&codec=${this.options.codec}`
    const socket = new WebSocket(
      isDevMode
        ? `ws://localhost:1236/websockets${args}`
        : `wss://${window.location.host}/x-transcription/websockets${args}`
    )

    this.retryCount++
    if (this.retryCount > 10) {
      logger.warn(`[transcription-controller] Retried > 10 times`)
    }
    socket.onopen = event => {
      this.retryCount = 0
      logger.debug(`[transcription-controller] Starting websocket`, { event })
    }
    socket.onclose = event => {
      if (!safeSocketCloseCodes.includes(event.code)) {
        throttledWarning(
          `[transcription-controller] Transcription socket closed unexpectedly - code = ${event.code}`
        )
      }
    }
    socket.onerror = event => {
      logger.error('[transcription-controller] Transcription socket error', { error: event })
    }
    socket.onmessage = event => {
      // We only send transcription back over the socket when debugging
      try {
        const message = JSON.parse(event.data)
        if ('type' in message && message.type === 'final')
          console.info(`[transcription-worker] [${message.language}, ${message.confidence}] `, {
            transcription: message.transcription,
          })
        else console.info(`[transcription-worker] ${message}`)
      } catch (err) {
        console.info(`[transcription-worker] Error while parsing`)
      }
    }

    this.socket = socket
  }

  stop(): void {
    this.flush()
    this.socket?.close()
    this.socket = null
  }
  send(message: Blob): void {
    this.messageBuffer.push(message)
  }

  private pause = false

  flush(): void {
    if (this.pause) {
      return
    }

    const readyState = this.socket?.readyState

    if (readyState === WebSocket.CLOSED) {
      logger.debug(`[transcription-worker] Transcription socket is closed, trying to reconnect`)
      this.start()
    } else if (readyState === WebSocket.OPEN) {
      if (this.messageBuffer.length > MAX_MESSAGES_PER_FLUSH) {
        this.pause = true
        logger.debug(`[transcription-worker] Transcription sending is paused, too many messages in buffer`)
        setTimeout(() => {
          logger.debug(`[transcription-worker] Transcription sending is resumed`)
          this.pause = false
          this.flush()
        }, 1000)
        return
      }

      let count = 0
      while (this.messageBuffer.length > 0 && count < MAX_MESSAGES_PER_FLUSH) {
        count++
        const message = this.messageBuffer.shift()
        if (message !== undefined) {
          try {
            this.socket?.send(message)
          } catch (error) {
            this.messageBuffer.unshift(message)
            logger.error(`[transcription-controller] Could not send message to socket`, {
              message,
              error,
            })
            return
          }
        }
      }
    }
  }
}

const recordingMimeTypes = ['audio/webm; codecs=opus', 'audio/mp4'] as const

const startTranscribe = (
  track: MediaStreamTrack,
  token: string,
  settings: TranscriptionSettings,
  debug: boolean
): { stop: () => void } | undefined => {
  const audioStream = new MediaStream([track])

  const mimeType = recordingMimeTypes.find(codec => MediaRecorder.isTypeSupported(codec))
  if (!mimeType) {
    logger.debug(`[transcription-controller] Recording codecs "${recordingMimeTypes}" are not supported`)
    return
  }

  logger.debug(`[transcription-controller] Recording codec "${mimeType}" is supported, using MediaRecorder`)

  const socket = new ReconnectingWebSocket({
    token,
    codec: mimeType === 'audio/webm; codecs=opus' ? 'opus' : 'mp4',
    debug,
    ...settings,
  })
  const recorder = new MediaRecorder(audioStream, {
    audioBitsPerSecond: 18_000,
    mimeType,
  })

  recorder.ondataavailable = event => {
    const blob = event.data
    socket.send(blob)
  }

  let isSocketSenderCancelled = false

  requestAnimationFrame(function send() {
    socket.flush()

    if (!isSocketSenderCancelled) {
      requestAnimationFrame(send)
    }
  })

  recorder.start(AUDIO_TIME_SLICE_MS)
  socket.start()

  return {
    stop: () => {
      isSocketSenderCancelled = true
      recorder.stop()
      socket.stop()
    },
  }
}

const _TranscriptionController = (): null => {
  const videoCallService = useVideoCallService()
  const audioState = useSelector(selectAudioState)
  const transcriptionToken = useSelector(state => {
    return state.live.transcriptionToken
  })

  const settings = useAtomValue(transcriptionsSettingsAtom)
  const microphone = useSelector(selectCurrentMicrophoneId)
  const isInMainChannel = useSelector(selectCallModeState) === 'in-main-channel'
  const debug = useIsDebugMode()

  useEffect(() => {
    let stopFunction: undefined | { stop: () => void } = undefined

    if (audioState === 'on' && transcriptionToken !== undefined && isInMainChannel) {
      const track = videoCallService?.tracks.localAudioTrack
      if (!track) return
      if (microphone === undefined) return

      try {
        stopFunction = startTranscribe(track.getMediaStreamTrack(), transcriptionToken, settings, debug)
      } catch (error) {
        logger.error(`[transcription-controller] Error while starting transcription`, { error })
      }

      return () => {
        stopFunction?.stop()
      }
    }
  }, [audioState, transcriptionToken, videoCallService, settings, microphone, isInMainChannel, debug])

  return null
}

export const TranscriptionController = (): React.ReactNode => {
  const liveSession = useLiveSessionContext()

  if (liveSession.data.transcribeSession) return <_TranscriptionController />
  else return null
}
