atrade/src/main/kotlin/service/LlamaServerManager.kt

package service

import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.launch
import java.io.BufferedReader
import java.io.File
import java.io.InputStreamReader
import java.util.concurrent.ConcurrentHashMap

object LlamaServerManager {
    // 포트별로 프로세스를 관리합니다.
    private val processes = ConcurrentHashMap<Int, Process>()
    private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())
    init {
        Runtime.getRuntime().addShutdownHook(Thread {
            stopAll()
        })
    }

    fun startServer(binPath: String, modelPath: String, port: Int, nGpuLayers: Int = 99) {
        // 이미 해당 포트에서 실행 중이거나 모델 경로가 비었으면 무시합니다.
        if (processes.containsKey(port) || modelPath.isBlank()) return
        val os = System.getProperty("os.name").lowercase()
        val arch = System.getProperty("os.arch").lowercase()
        val isWin = os.contains("win")
        val (nGpuLayers, threads) = when {
            os.contains("mac") && (arch.contains("arm64") || arch.contains("aarch64")) -> 99 to 8
            isWin -> 40 to 12 // NUC Core Ultra 7: GPU 레이어 40 내외, 스레드 12 권장
            else -> 0 to 4    // 인텔 맥 2017 등
        }

        val command = listOf(
            binPath,
            "-m", modelPath,
            "--port", port.toString(),
            "-c", if (port == 8081) "512" else "8192",
            "-ngl", nGpuLayers.toString(),
            "-t", threads.toString(),
            "--embedding"
        )

        scope.launch {
            try {
                val pb = ProcessBuilder(command)

                // 2. 윈도우 Vulkan 환경 변수 설정
                if (isWin && binPath.contains("win-x64")) {
                    val env = pb.environment()
                    // 특정 GPU 선택 (내장 GPU가 여러 개일 경우)
                    // env["GGML_VULKAN_DEVICE"] = "0"

                    // DLL 로드 경로 강제 지정 (bin 폴더 내 dll 참조)
                    val libraryPath = File(binPath).parentFile.absolutePath
                    val currentPath = System.getenv("PATH") ?: ""
                    env["PATH"] = "$libraryPath;$currentPath"

                    println("🔧 [Vulkan] 환경 변수 설정 완료: $libraryPath")
                }

                pb.redirectErrorStream(true)
                File(binPath).setExecutable(true)

                val process = pb.start()
                processes[port] = process
                println("✅ AI 서버 시작 시도 (Port: $port, Model: ${File(modelPath).name})")

                val reader = BufferedReader(InputStreamReader(process.inputStream))
                var line: String?
                while (reader.readLine().also { line = it } != null) {
                    // 로그 출력 (디버깅용)
//                     println("[Server $port] $line")
                    if (line?.contains("server is listening") == true) {
                        println("🚀 AI 서버 준비 완료 (Port: $port)")
                        if (processes.size > 1) {
                            println("[Cache] ${processes.size}")
                            RagService.active()
                        }
                    }
                }
            } catch (e: Exception) {
                println("❌ AI 서버 실행 실패 (Port: $port): ${e.message}")
                processes.remove(port)
            }

        }
    }

    fun stopAll() {
        processes.forEach { (port, process) ->
            process.destroy()
            println("🛑 AI 서버 종료 (Port: $port)")
        }
        processes.clear()
    }
}
.... 2026-01-23 17:05:09 +09:00			`package service`
. 2026-01-10 18:16:50 +09:00
.... 2026-01-23 17:05:09 +09:00			`import kotlinx.coroutines.CoroutineScope`
			`import kotlinx.coroutines.Dispatchers`
			`import kotlinx.coroutines.SupervisorJob`
			`import kotlinx.coroutines.launch`
. 2026-01-10 18:16:50 +09:00			`import java.io.BufferedReader`
.... 2026-01-23 17:05:09 +09:00			`import java.io.File`
. 2026-01-10 18:16:50 +09:00			`import java.io.InputStreamReader`
... 2026-01-21 18:30:03 +09:00			`import java.util.concurrent.ConcurrentHashMap`
. 2026-01-10 18:16:50 +09:00
			`object LlamaServerManager {`
... 2026-01-21 18:30:03 +09:00			`// 포트별로 프로세스를 관리합니다.`
			`private val processes = ConcurrentHashMap<Int, Process>()`
			`private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())`
			`init {`
			`Runtime.getRuntime().addShutdownHook(Thread {`
			`stopAll()`
			`})`
			`}`
. 2026-01-10 18:16:50 +09:00
... 2026-01-21 18:30:03 +09:00			`fun startServer(binPath: String, modelPath: String, port: Int, nGpuLayers: Int = 99) {`
			`// 이미 해당 포트에서 실행 중이거나 모델 경로가 비었으면 무시합니다.`
			`if (processes.containsKey(port) \|\| modelPath.isBlank()) return`
... 2026-02-19 16:55:59 +09:00			`val os = System.getProperty("os.name").lowercase()`
			`val arch = System.getProperty("os.arch").lowercase()`
... 2026-02-19 18:02:37 +09:00			`val isWin = os.contains("win")`
... 2026-02-19 16:55:59 +09:00			`val (nGpuLayers, threads) = when {`
... 2026-02-19 18:02:37 +09:00			`os.contains("mac") && (arch.contains("arm64") \|\| arch.contains("aarch64")) -> 99 to 8`
			`isWin -> 40 to 12 // NUC Core Ultra 7: GPU 레이어 40 내외, 스레드 12 권장`
			`else -> 0 to 4 // 인텔 맥 2017 등`
... 2026-02-19 16:55:59 +09:00			`}`
. 2026-01-10 18:16:50 +09:00
			`val command = listOf(`
			`binPath,`
			`"-m", modelPath,`
... 2026-01-21 18:30:03 +09:00			`"--port", port.toString(),`
... 2026-02-19 16:55:59 +09:00			`"-c", if (port == 8081) "512" else "8192",`
... 2026-01-21 18:30:03 +09:00			`"-ngl", nGpuLayers.toString(),`
... 2026-02-19 16:55:59 +09:00			`"-t", threads.toString(),`
			`"--embedding"`
. 2026-01-10 18:16:50 +09:00			`)`

			`scope.launch {`
			`try {`
			`val pb = ProcessBuilder(command)`
... 2026-01-21 18:30:03 +09:00
... 2026-02-19 18:02:37 +09:00			`// 2. 윈도우 Vulkan 환경 변수 설정`
			`if (isWin && binPath.contains("win-x64")) {`
			`val env = pb.environment()`
			`// 특정 GPU 선택 (내장 GPU가 여러 개일 경우)`
			`// env["GGML_VULKAN_DEVICE"] = "0"`

			`// DLL 로드 경로 강제 지정 (bin 폴더 내 dll 참조)`
			`val libraryPath = File(binPath).parentFile.absolutePath`
			`val currentPath = System.getenv("PATH") ?: ""`
			`env["PATH"] = "$libraryPath;$currentPath"`

			`println("🔧 [Vulkan] 환경 변수 설정 완료: $libraryPath")`
			`}`

... 2026-01-21 18:30:03 +09:00			`pb.redirectErrorStream(true)`
. 2026-01-10 18:16:50 +09:00			`File(binPath).setExecutable(true)`

... 2026-01-21 18:30:03 +09:00			`val process = pb.start()`
			`processes[port] = process`
			`println("✅ AI 서버 시작 시도 (Port: $port, Model: ${File(modelPath).name})")`
. 2026-01-10 18:16:50 +09:00
... 2026-01-21 18:30:03 +09:00			`val reader = BufferedReader(InputStreamReader(process.inputStream))`
. 2026-01-10 18:16:50 +09:00			`var line: String?`
			`while (reader.readLine().also { line = it } != null) {`
... 2026-01-21 18:30:03 +09:00			`// 로그 출력 (디버깅용)`
.... 2026-01-23 17:05:09 +09:00			`// println("[Server $port] $line")`
... 2026-01-21 18:30:03 +09:00			`if (line?.contains("server is listening") == true) {`
			`println("🚀 AI 서버 준비 완료 (Port: $port)")`
.... 2026-01-23 17:05:09 +09:00			`if (processes.size > 1) {`
			`println("[Cache] ${processes.size}")`
			`RagService.active()`
			`}`
. 2026-01-10 18:16:50 +09:00			`}`
			`}`
			`} catch (e: Exception) {`
... 2026-01-21 18:30:03 +09:00			`println("❌ AI 서버 실행 실패 (Port: $port): ${e.message}")`
			`processes.remove(port)`
. 2026-01-10 18:16:50 +09:00			`}`
.... 2026-01-23 17:05:09 +09:00
. 2026-01-10 18:16:50 +09:00			`}`
			`}`

... 2026-01-21 18:30:03 +09:00			`fun stopAll() {`
			`processes.forEach { (port, process) ->`
			`process.destroy()`
			`println("🛑 AI 서버 종료 (Port: $port)")`
			`}`
			`processes.clear()`
. 2026-01-10 18:16:50 +09:00			`}`
			`}`