atrade/src/main/kotlin/service/LlamaServerManager.kt

package service

import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
import network.RagService
import util.HardwareDetector
import java.io.BufferedReader
import java.io.File
import java.io.InputStreamReader
import java.util.concurrent.ConcurrentHashMap

object LlamaServerManager {
    // 포트별로 프로세스를 관리합니다.
    private val processes = ConcurrentHashMap<Int, Process>()
    private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())
    init {
        killZombieProcesses()

        Runtime.getRuntime().addShutdownHook(Thread {
            stopAll()
        })
    }

    // OS 시스템 명령어를 이용해 찌꺼기 프로세스를 이름으로 찾아 강제 종료하는 함수
    private fun killZombieProcesses() {
        try {
            val os = System.getProperty("os.name").lowercase()
            if (os.contains("win")) {
                // 윈도우: taskkill 명령어로 llama-server.exe 강제 종료 (/F: 강제, /T: 트리거된 자식까지)
                ProcessBuilder("cmd", "/c", "taskkill /F /IM llama-server.exe /T")
                    .redirectErrorStream(true)
                    .start()
                    .waitFor()
                println("🧹 [System] 이전 llama-server 좀비 프로세스 정리 완료 (Windows)")
            } else {
                // 맥/리눅스: pkill 사용
                ProcessBuilder("pkill", "-f", "llama-server")
                    .redirectErrorStream(true)
                    .start()
                    .waitFor()
                println("🧹 [System] 이전 llama-server 좀비 프로세스 정리 완료 (Mac/Linux)")
            }
        } catch (e: Exception) {
            // 실행 중인 프로세스가 없어서 에러가 나도 조용히 무시합니다.
        }
    }

    fun checkPortStatus(port: Int): String {
        return try {
            // netstat 명령어로 해당 포트를 점유 중인 프로세스 확인
            val process = Runtime.getRuntime().exec("cmd /c netstat -ano | findstr :$port")
            val reader = process.inputStream.bufferedReader()
            val result = reader.readText()

            if (result.contains("LISTENING")) {
                val pid = result.trim().split(Regex("\\s+")).last()
                "✅ 포트 $port 상태: 사용 중 (PID: $pid - 정상 대기 중)"
            } else {
                "⚠️ 포트 $port 상태: 리스닝 상태가 아님 (서버 미구동 또는 차단 가능성)"
            }
        } catch (e: Exception) {
            "❌ 포트 점검 실패: ${e.message}"
        }
    }

    fun startServer(binPath: String, modelPath: String, port: Int) {

        if (processes.containsKey(port) || modelPath.isBlank()) return
        val os = System.getProperty("os.name").lowercase()
        val arch = System.getProperty("os.arch").lowercase()
        val isWin = os.contains("win")
        val isMacArm = os.contains("mac") && (arch.contains("arm64") || arch.contains("aarch64"))

        val cpuCores = Runtime.getRuntime().availableProcessors() // HardwareDetector.getCpuCores()와 동일
        val hasGpu = HardwareDetector.hasNvidiaGpu()

// 1. optimalThreads: 할당 비율 적용 및 최소/최대 범위 제한(Safety Boundary)
// 과도한 스레드 할당은 오히려 컨텍스트 스위칭 비용을 높여 성능을 저하시킬 수 있습니다.
        val ratio = if (isWin) 0.5 else 0.7
        val optimalThreads = (cpuCores * ratio).toInt().coerceIn(4, 16)

// 2. optimalGpuLayers: GPU 가속 조건 (윈도우 NVIDIA 또는 맥 ARM)
        var optimalGpuLayers = if ((isWin && hasGpu) || isMacArm) 99 else 4
        if(HardwareDetector.getCpuName().contains("i7")) {
            optimalGpuLayers = 0
        }
        println("🖥️ OS: $os / Arch: $arch")
        println("⚙️ 할당 스레드: $optimalThreads (Core: $cpuCores, Ratio: $ratio)")
        println("🚀 GPU 레이어: $optimalGpuLayers (NVIDIA/MacArm: ${if(optimalGpuLayers == 99) "YES" else "NO"})")

//        val (nGpuLayers, threads) = when {
//            os.contains("mac") && (arch.contains("arm64") || arch.contains("aarch64")) -> 99 to 8
//            isWin -> optimalGpuLayers to optimalThreads // NUC Core Ultra 7: GPU 레이어 40 내외, 스레드 12 권장
//            else -> 0 to 4    // 인텔 맥 2017 등
//        }

        val command = mutableListOf(
            binPath,
            "-m", modelPath,
            "--port", port.toString(),
            "-c", if (port == 8081) "512" else "8192",
            "-ngl", optimalGpuLayers.toString(),
            "-t", optimalThreads.toString(),
            "--embedding"
        )
        if (port != 8081) { // 텍스트 생성용 모델에만 적용
            command.addAll(listOf(
                "-b", "512", // Batch size (토큰 병렬 처리량 제한으로 연산 안정화)
                "--threads-batch", optimalThreads.toString(),
                "-fa","on" // Flash Attention 활성화 (메모리 절약 및 긴 컨텍스트 연산 안정성 증가)
            ))
        }
        scope.launch {
            try {
                val pb = ProcessBuilder(command)

                // 2. 윈도우 Vulkan 환경 변수 설정
                if (isWin && binPath.contains("win-x64")) {
                    val env = pb.environment()
                    // 특정 GPU 선택 (내장 GPU가 여러 개일 경우)
                    // env["GGML_VULKAN_DEVICE"] = "0"

                    // DLL 로드 경로 강제 지정 (bin 폴더 내 dll 참조)
                    val libraryPath = File(binPath).parentFile.absolutePath
                    val currentPath = System.getenv("PATH") ?: ""
                    env["PATH"] = "$libraryPath;$currentPath"

                    println("🔧 [Vulkan] 환경 변수 설정 완료: $libraryPath")
                }

                pb.redirectErrorStream(true)
                File(binPath).setExecutable(true)

                val process = pb.start()
                processes[port] = process
                println("✅ AI 서버 시작 시도 (Port: $port, Model: ${File(modelPath).name})")

                delay(3000)

                val status = checkPortStatus(port)
                println(status) // 콘솔 로그

                // UI 로그 스토어에도 기록 (TradingDecisionLog 등에서 확인 가능)
                TradingLogStore.addAnalyzer("System", "Port:$port", status, status.contains("✅"))

                val reader = BufferedReader(InputStreamReader(process.inputStream))
                var line: String?
                while (reader.readLine().also { line = it } != null) {
                    // 로그 출력 (디버깅용)
//                     println("[Server $port] $line")
                    if (line?.contains("server is listening") == true) {
                        println("🚀 AI 서버 준비 완료 (Port: $port)")
                        if (port == 8080){
                            AutoTradingManager.llmAnalyser = true
                        }
                        if (port == 8081){
                            AutoTradingManager.llmNews = true
                        }
                        if (processes.size > 1) {
                            println("[Cache] ${processes.size}")
                            RagService.active()
                        }
                    }
                }
            } catch (e: Exception) {
                println("❌ AI 서버 실행 실패 (Port: $port): ${e.message}")
                processes.remove(port)
            }

        }
    }

    fun stopAll(): Boolean {
        var allStopped = true // 모든 프로세스 종료 여부를 추적하는 플래그

        processes.forEach { (port, process) ->
            try {
                process.destroy() // 1차: 부드러운 종료 시도

                // 2차: 최대 3초 대기 후 종료되지 않으면 강제 종료
                if (!process.waitFor(3, java.util.concurrent.TimeUnit.SECONDS)) {
                    process.destroyForcibly() // 강제 사살

                    // 강제 종료 후에도 프로세스가 살아있는지 최종 확인
                    if (process.isAlive) {
                        println("❌ [Server $port] 강제 종료 명령 후에도 프로세스가 살아있습니다.")
                        allStopped = false
                    } else {
                        println("⚠️ [Server $port] 응답이 없어 강제 종료되었습니다.")
                    }
                } else {
                    println("🛑 [Server $port] 정상 종료되었습니다.")
                }
            } catch (e: Exception) {
                println("❌ [Server $port] 종료 중 오류: ${e.message}")
                allStopped = false
            }
        }

        if (allStopped) {
            processes.clear() // 모든 프로세스가 성공적으로 종료되거나 리스트에서 제거될 준비가 된 경우
        }

        return allStopped
    }
}