package service

import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.launch
import java.io.BufferedReader
import java.io.File
import java.io.InputStreamReader
import java.util.concurrent.ConcurrentHashMap

object LlamaServerManager {
    // 포트별로 프로세스를 관리합니다.
    private val processes = ConcurrentHashMap<Int, Process>()
    private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())
    init {
        Runtime.getRuntime().addShutdownHook(Thread {
            stopAll()
        })
    }

    fun startServer(binPath: String, modelPath: String, port: Int, nGpuLayers: Int = 99) {
        // 이미 해당 포트에서 실행 중이거나 모델 경로가 비었으면 무시합니다.
        if (processes.containsKey(port) || modelPath.isBlank()) return
        val os = System.getProperty("os.name").lowercase()
        val arch = System.getProperty("os.arch").lowercase()
        val (nGpuLayers, threads) = when {
            // M3 맥: 통합 메모리 활용 최적 (99레이어, 성능코어 위주 8스레드)
            os.contains("mac") && (arch.contains("arm64") || arch.contains("aarch64")) -> {
                99 to 8
            }
            // 윈도우 NUC: Core Ultra 7은 코어가 많으므로 스레드 상향 (OpenVINO 사용 시 nGpu 조정 가능)
            os.contains("win") -> {
                // NUC 15 Pro (Core Ultra 7 155H)는 16코어 22스레드이므로 12~14 권장
                40 to 12
            }
            // 인텔 맥 2017: 16GB 램 한계로 인해 CPU 위주 설정 권장
            else -> {
                0 to 4 // 쿼드코어 모델일 가능성이 높음
            }
        }

        val command = listOf(
            binPath,
            "-m", modelPath,
            "--port", port.toString(),
            "-c", if (port == 8081) "512" else "8192",
            "-ngl", nGpuLayers.toString(),
            "-t", threads.toString(),
            "--embedding"
        )


        scope.launch {
            try {
                val pb = ProcessBuilder(command)

                pb.redirectErrorStream(true)
                File(binPath).setExecutable(true)

                val process = pb.start()
                processes[port] = process
                println("✅ AI 서버 시작 시도 (Port: $port, Model: ${File(modelPath).name})")

                val reader = BufferedReader(InputStreamReader(process.inputStream))
                var line: String?
                while (reader.readLine().also { line = it } != null) {
                    // 로그 출력 (디버깅용)
//                     println("[Server $port] $line")
                    if (line?.contains("server is listening") == true) {
                        println("🚀 AI 서버 준비 완료 (Port: $port)")
                        if (processes.size > 1) {
                            println("[Cache] ${processes.size}")
                            RagService.active()
                        }
                    }
                }
            } catch (e: Exception) {
                println("❌ AI 서버 실행 실패 (Port: $port): ${e.message}")
                processes.remove(port)
            }

        }
    }

    fun stopAll() {
        processes.forEach { (port, process) ->
            process.destroy()
            println("🛑 AI 서버 종료 (Port: $port)")
        }
        processes.clear()
    }
}