atrade/src/main/kotlin/service/LlamaServerManager.kt

package service

import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.launch
import java.io.BufferedReader
import java.io.File
import java.io.InputStreamReader
import java.util.concurrent.ConcurrentHashMap

object LlamaServerManager {
    // 포트별로 프로세스를 관리합니다.
    private val processes = ConcurrentHashMap<Int, Process>()
    private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())
    init {
        Runtime.getRuntime().addShutdownHook(Thread {
            stopAll()
        })
    }

    fun startServer(binPath: String, modelPath: String, port: Int, nGpuLayers: Int = 99) {
        // 이미 해당 포트에서 실행 중이거나 모델 경로가 비었으면 무시합니다.
        if (processes.containsKey(port) || modelPath.isBlank()) return
        val os = System.getProperty("os.name").lowercase()
        val arch = System.getProperty("os.arch").lowercase()
        val (nGpuLayers, threads) = when {
            // M3 맥: 통합 메모리 활용 최적 (99레이어, 성능코어 위주 8스레드)
            os.contains("mac") && (arch.contains("arm64") || arch.contains("aarch64")) -> {
                99 to 8
            }
            // 윈도우 NUC: Core Ultra 7은 코어가 많으므로 스레드 상향 (OpenVINO 사용 시 nGpu 조정 가능)
            os.contains("win") -> {
                // NUC 15 Pro (Core Ultra 7 155H)는 16코어 22스레드이므로 12~14 권장
                40 to 12
            }
            // 인텔 맥 2017: 16GB 램 한계로 인해 CPU 위주 설정 권장
            else -> {
                0 to 4 // 쿼드코어 모델일 가능성이 높음
            }
        }

        val command = listOf(
            binPath,
            "-m", modelPath,
            "--port", port.toString(),
            "-c", if (port == 8081) "512" else "8192",
            "-ngl", nGpuLayers.toString(),
            "-t", threads.toString(),
            "--embedding"
        )


        scope.launch {
            try {
                val pb = ProcessBuilder(command)

                pb.redirectErrorStream(true)
                File(binPath).setExecutable(true)

                val process = pb.start()
                processes[port] = process
                println("✅ AI 서버 시작 시도 (Port: $port, Model: ${File(modelPath).name})")

                val reader = BufferedReader(InputStreamReader(process.inputStream))
                var line: String?
                while (reader.readLine().also { line = it } != null) {
                    // 로그 출력 (디버깅용)
//                     println("[Server $port] $line")
                    if (line?.contains("server is listening") == true) {
                        println("🚀 AI 서버 준비 완료 (Port: $port)")
                        if (processes.size > 1) {
                            println("[Cache] ${processes.size}")
                            RagService.active()
                        }
                    }
                }
            } catch (e: Exception) {
                println("❌ AI 서버 실행 실패 (Port: $port): ${e.message}")
                processes.remove(port)
            }

        }
    }

    fun stopAll() {
        processes.forEach { (port, process) ->
            process.destroy()
            println("🛑 AI 서버 종료 (Port: $port)")
        }
        processes.clear()
    }
}
.... 2026-01-23 17:05:09 +09:00			`package service`
. 2026-01-10 18:16:50 +09:00
.... 2026-01-23 17:05:09 +09:00			`import kotlinx.coroutines.CoroutineScope`
			`import kotlinx.coroutines.Dispatchers`
			`import kotlinx.coroutines.SupervisorJob`
			`import kotlinx.coroutines.launch`
. 2026-01-10 18:16:50 +09:00			`import java.io.BufferedReader`
.... 2026-01-23 17:05:09 +09:00			`import java.io.File`
. 2026-01-10 18:16:50 +09:00			`import java.io.InputStreamReader`
... 2026-01-21 18:30:03 +09:00			`import java.util.concurrent.ConcurrentHashMap`
. 2026-01-10 18:16:50 +09:00
			`object LlamaServerManager {`
... 2026-01-21 18:30:03 +09:00			`// 포트별로 프로세스를 관리합니다.`
			`private val processes = ConcurrentHashMap<Int, Process>()`
			`private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())`
			`init {`
			`Runtime.getRuntime().addShutdownHook(Thread {`
			`stopAll()`
			`})`
			`}`
. 2026-01-10 18:16:50 +09:00
... 2026-01-21 18:30:03 +09:00			`fun startServer(binPath: String, modelPath: String, port: Int, nGpuLayers: Int = 99) {`
			`// 이미 해당 포트에서 실행 중이거나 모델 경로가 비었으면 무시합니다.`
			`if (processes.containsKey(port) \|\| modelPath.isBlank()) return`
... 2026-02-19 16:55:59 +09:00			`val os = System.getProperty("os.name").lowercase()`
			`val arch = System.getProperty("os.arch").lowercase()`
			`val (nGpuLayers, threads) = when {`
			`// M3 맥: 통합 메모리 활용 최적 (99레이어, 성능코어 위주 8스레드)`
			`os.contains("mac") && (arch.contains("arm64") \|\| arch.contains("aarch64")) -> {`
			`99 to 8`
			`}`
			`// 윈도우 NUC: Core Ultra 7은 코어가 많으므로 스레드 상향 (OpenVINO 사용 시 nGpu 조정 가능)`
			`os.contains("win") -> {`
			`// NUC 15 Pro (Core Ultra 7 155H)는 16코어 22스레드이므로 12~14 권장`
			`40 to 12`
			`}`
			`// 인텔 맥 2017: 16GB 램 한계로 인해 CPU 위주 설정 권장`
			`else -> {`
			`0 to 4 // 쿼드코어 모델일 가능성이 높음`
			`}`
			`}`
. 2026-01-10 18:16:50 +09:00
			`val command = listOf(`
			`binPath,`
			`"-m", modelPath,`
... 2026-01-21 18:30:03 +09:00			`"--port", port.toString(),`
... 2026-02-19 16:55:59 +09:00			`"-c", if (port == 8081) "512" else "8192",`
... 2026-01-21 18:30:03 +09:00			`"-ngl", nGpuLayers.toString(),`
... 2026-02-19 16:55:59 +09:00			`"-t", threads.toString(),`
			`"--embedding"`
. 2026-01-10 18:16:50 +09:00			`)`

... 2026-02-19 16:55:59 +09:00
. 2026-01-10 18:16:50 +09:00			`scope.launch {`
			`try {`
			`val pb = ProcessBuilder(command)`
... 2026-01-21 18:30:03 +09:00
			`pb.redirectErrorStream(true)`
. 2026-01-10 18:16:50 +09:00			`File(binPath).setExecutable(true)`

... 2026-01-21 18:30:03 +09:00			`val process = pb.start()`
			`processes[port] = process`
			`println("✅ AI 서버 시작 시도 (Port: $port, Model: ${File(modelPath).name})")`
. 2026-01-10 18:16:50 +09:00
... 2026-01-21 18:30:03 +09:00			`val reader = BufferedReader(InputStreamReader(process.inputStream))`
. 2026-01-10 18:16:50 +09:00			`var line: String?`
			`while (reader.readLine().also { line = it } != null) {`
... 2026-01-21 18:30:03 +09:00			`// 로그 출력 (디버깅용)`
.... 2026-01-23 17:05:09 +09:00			`// println("[Server $port] $line")`
... 2026-01-21 18:30:03 +09:00			`if (line?.contains("server is listening") == true) {`
			`println("🚀 AI 서버 준비 완료 (Port: $port)")`
.... 2026-01-23 17:05:09 +09:00			`if (processes.size > 1) {`
			`println("[Cache] ${processes.size}")`
			`RagService.active()`
			`}`
. 2026-01-10 18:16:50 +09:00			`}`
			`}`
			`} catch (e: Exception) {`
... 2026-01-21 18:30:03 +09:00			`println("❌ AI 서버 실행 실패 (Port: $port): ${e.message}")`
			`processes.remove(port)`
. 2026-01-10 18:16:50 +09:00			`}`
.... 2026-01-23 17:05:09 +09:00
. 2026-01-10 18:16:50 +09:00			`}`
			`}`

... 2026-01-21 18:30:03 +09:00			`fun stopAll() {`
			`processes.forEach { (port, process) ->`
			`process.destroy()`
			`println("🛑 AI 서버 종료 (Port: $port)")`
			`}`
			`processes.clear()`
. 2026-01-10 18:16:50 +09:00			`}`
			`}`