atrade/src/main/kotlin/service/LlamaServerManager.kt

package service

import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.launch
import java.io.BufferedReader
import java.io.File
import java.io.InputStreamReader
import java.util.concurrent.ConcurrentHashMap

object LlamaServerManager {
    // 포트별로 프로세스를 관리합니다.
    private val processes = ConcurrentHashMap<Int, Process>()
    private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())
    init {
        Runtime.getRuntime().addShutdownHook(Thread {
            stopAll()
        })
    }

    fun startServer(binPath: String, modelPath: String, port: Int, nGpuLayers: Int = 99) {
        // 이미 해당 포트에서 실행 중이거나 모델 경로가 비었으면 무시합니다.
        if (processes.containsKey(port) || modelPath.isBlank()) return

        val command = listOf(
            binPath,
            "-m", modelPath,
            "--port", port.toString(),
            "-c", if (port == 8081) "512" else "8192", // 임베딩용은 컨텍스트가 짧아도 충분합니다.
            "-ngl", nGpuLayers.toString(),
            "-t", "8", // M3 Pro의 성능 코어를 고려하여 6~8개 권장
            "--embedding" // 임베딩 기능을 활성화합니다.
        )

        scope.launch {
            try {
                val pb = ProcessBuilder(command)

                pb.redirectErrorStream(true)
                File(binPath).setExecutable(true)

                val process = pb.start()
                processes[port] = process
                println("✅ AI 서버 시작 시도 (Port: $port, Model: ${File(modelPath).name})")

                val reader = BufferedReader(InputStreamReader(process.inputStream))
                var line: String?
                while (reader.readLine().also { line = it } != null) {
                    // 로그 출력 (디버깅용)
//                     println("[Server $port] $line")
                    if (line?.contains("server is listening") == true) {
                        println("🚀 AI 서버 준비 완료 (Port: $port)")
                        if (processes.size > 1) {
                            println("[Cache] ${processes.size}")
                            RagService.active()
                        }
                    }
                }
            } catch (e: Exception) {
                println("❌ AI 서버 실행 실패 (Port: $port): ${e.message}")
                processes.remove(port)
            }

        }
    }

    fun stopAll() {
        processes.forEach { (port, process) ->
            process.destroy()
            println("🛑 AI 서버 종료 (Port: $port)")
        }
        processes.clear()
    }
}
.... 2026-01-23 17:05:09 +09:00			`package service`
. 2026-01-10 18:16:50 +09:00
.... 2026-01-23 17:05:09 +09:00			`import kotlinx.coroutines.CoroutineScope`
			`import kotlinx.coroutines.Dispatchers`
			`import kotlinx.coroutines.SupervisorJob`
			`import kotlinx.coroutines.launch`
. 2026-01-10 18:16:50 +09:00			`import java.io.BufferedReader`
.... 2026-01-23 17:05:09 +09:00			`import java.io.File`
. 2026-01-10 18:16:50 +09:00			`import java.io.InputStreamReader`
... 2026-01-21 18:30:03 +09:00			`import java.util.concurrent.ConcurrentHashMap`
. 2026-01-10 18:16:50 +09:00
			`object LlamaServerManager {`
... 2026-01-21 18:30:03 +09:00			`// 포트별로 프로세스를 관리합니다.`
			`private val processes = ConcurrentHashMap<Int, Process>()`
			`private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())`
			`init {`
			`Runtime.getRuntime().addShutdownHook(Thread {`
			`stopAll()`
			`})`
			`}`
. 2026-01-10 18:16:50 +09:00
... 2026-01-21 18:30:03 +09:00			`fun startServer(binPath: String, modelPath: String, port: Int, nGpuLayers: Int = 99) {`
			`// 이미 해당 포트에서 실행 중이거나 모델 경로가 비었으면 무시합니다.`
			`if (processes.containsKey(port) \|\| modelPath.isBlank()) return`
. 2026-01-10 18:16:50 +09:00
			`val command = listOf(`
			`binPath,`
			`"-m", modelPath,`
... 2026-01-21 18:30:03 +09:00			`"--port", port.toString(),`
.... 2026-01-23 17:05:09 +09:00			`"-c", if (port == 8081) "512" else "8192", // 임베딩용은 컨텍스트가 짧아도 충분합니다.`
... 2026-01-21 18:30:03 +09:00			`"-ngl", nGpuLayers.toString(),`
... 2026-02-06 17:53:17 +09:00			`"-t", "8", // M3 Pro의 성능 코어를 고려하여 6~8개 권장`
... 2026-01-21 18:30:03 +09:00			`"--embedding" // 임베딩 기능을 활성화합니다.`
. 2026-01-10 18:16:50 +09:00			`)`

			`scope.launch {`
			`try {`
			`val pb = ProcessBuilder(command)`
... 2026-01-21 18:30:03 +09:00
			`pb.redirectErrorStream(true)`
. 2026-01-10 18:16:50 +09:00			`File(binPath).setExecutable(true)`

... 2026-01-21 18:30:03 +09:00			`val process = pb.start()`
			`processes[port] = process`
			`println("✅ AI 서버 시작 시도 (Port: $port, Model: ${File(modelPath).name})")`
. 2026-01-10 18:16:50 +09:00
... 2026-01-21 18:30:03 +09:00			`val reader = BufferedReader(InputStreamReader(process.inputStream))`
. 2026-01-10 18:16:50 +09:00			`var line: String?`
			`while (reader.readLine().also { line = it } != null) {`
... 2026-01-21 18:30:03 +09:00			`// 로그 출력 (디버깅용)`
.... 2026-01-23 17:05:09 +09:00			`// println("[Server $port] $line")`
... 2026-01-21 18:30:03 +09:00			`if (line?.contains("server is listening") == true) {`
			`println("🚀 AI 서버 준비 완료 (Port: $port)")`
.... 2026-01-23 17:05:09 +09:00			`if (processes.size > 1) {`
			`println("[Cache] ${processes.size}")`
			`RagService.active()`
			`}`
. 2026-01-10 18:16:50 +09:00			`}`
			`}`
			`} catch (e: Exception) {`
... 2026-01-21 18:30:03 +09:00			`println("❌ AI 서버 실행 실패 (Port: $port): ${e.message}")`
			`processes.remove(port)`
. 2026-01-10 18:16:50 +09:00			`}`
.... 2026-01-23 17:05:09 +09:00
. 2026-01-10 18:16:50 +09:00			`}`
			`}`

... 2026-01-21 18:30:03 +09:00			`fun stopAll() {`
			`processes.forEach { (port, process) ->`
			`process.destroy()`
			`println("🛑 AI 서버 종료 (Port: $port)")`
			`}`
			`processes.clear()`
. 2026-01-10 18:16:50 +09:00			`}`
			`}`