atrade/src/main/kotlin/service/LlamaServerManager.kt

97 lines
3.7 KiB
Kotlin
Raw Normal View History

2026-01-23 17:05:09 +09:00
package service
2026-01-10 18:16:50 +09:00
2026-01-23 17:05:09 +09:00
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.launch
2026-01-10 18:16:50 +09:00
import java.io.BufferedReader
2026-01-23 17:05:09 +09:00
import java.io.File
2026-01-10 18:16:50 +09:00
import java.io.InputStreamReader
2026-01-21 18:30:03 +09:00
import java.util.concurrent.ConcurrentHashMap
2026-01-10 18:16:50 +09:00
object LlamaServerManager {
2026-01-21 18:30:03 +09:00
// 포트별로 프로세스를 관리합니다.
private val processes = ConcurrentHashMap<Int, Process>()
private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())
init {
Runtime.getRuntime().addShutdownHook(Thread {
stopAll()
})
}
2026-01-10 18:16:50 +09:00
2026-01-21 18:30:03 +09:00
fun startServer(binPath: String, modelPath: String, port: Int, nGpuLayers: Int = 99) {
// 이미 해당 포트에서 실행 중이거나 모델 경로가 비었으면 무시합니다.
if (processes.containsKey(port) || modelPath.isBlank()) return
2026-02-19 16:55:59 +09:00
val os = System.getProperty("os.name").lowercase()
val arch = System.getProperty("os.arch").lowercase()
2026-02-19 18:02:37 +09:00
val isWin = os.contains("win")
2026-02-19 16:55:59 +09:00
val (nGpuLayers, threads) = when {
2026-02-19 18:02:37 +09:00
os.contains("mac") && (arch.contains("arm64") || arch.contains("aarch64")) -> 99 to 8
isWin -> 40 to 12 // NUC Core Ultra 7: GPU 레이어 40 내외, 스레드 12 권장
else -> 0 to 4 // 인텔 맥 2017 등
2026-02-19 16:55:59 +09:00
}
2026-01-10 18:16:50 +09:00
val command = listOf(
binPath,
"-m", modelPath,
2026-01-21 18:30:03 +09:00
"--port", port.toString(),
2026-02-19 16:55:59 +09:00
"-c", if (port == 8081) "512" else "8192",
2026-01-21 18:30:03 +09:00
"-ngl", nGpuLayers.toString(),
2026-02-19 16:55:59 +09:00
"-t", threads.toString(),
"--embedding"
2026-01-10 18:16:50 +09:00
)
scope.launch {
try {
val pb = ProcessBuilder(command)
2026-01-21 18:30:03 +09:00
2026-02-19 18:02:37 +09:00
// 2. 윈도우 Vulkan 환경 변수 설정
if (isWin && binPath.contains("win-x64")) {
val env = pb.environment()
// 특정 GPU 선택 (내장 GPU가 여러 개일 경우)
// env["GGML_VULKAN_DEVICE"] = "0"
// DLL 로드 경로 강제 지정 (bin 폴더 내 dll 참조)
val libraryPath = File(binPath).parentFile.absolutePath
val currentPath = System.getenv("PATH") ?: ""
env["PATH"] = "$libraryPath;$currentPath"
println("🔧 [Vulkan] 환경 변수 설정 완료: $libraryPath")
}
2026-01-21 18:30:03 +09:00
pb.redirectErrorStream(true)
2026-01-10 18:16:50 +09:00
File(binPath).setExecutable(true)
2026-01-21 18:30:03 +09:00
val process = pb.start()
processes[port] = process
println("✅ AI 서버 시작 시도 (Port: $port, Model: ${File(modelPath).name})")
2026-01-10 18:16:50 +09:00
2026-01-21 18:30:03 +09:00
val reader = BufferedReader(InputStreamReader(process.inputStream))
2026-01-10 18:16:50 +09:00
var line: String?
while (reader.readLine().also { line = it } != null) {
2026-01-21 18:30:03 +09:00
// 로그 출력 (디버깅용)
2026-01-23 17:05:09 +09:00
// println("[Server $port] $line")
2026-01-21 18:30:03 +09:00
if (line?.contains("server is listening") == true) {
println("🚀 AI 서버 준비 완료 (Port: $port)")
2026-01-23 17:05:09 +09:00
if (processes.size > 1) {
println("[Cache] ${processes.size}")
RagService.active()
}
2026-01-10 18:16:50 +09:00
}
}
} catch (e: Exception) {
2026-01-21 18:30:03 +09:00
println("❌ AI 서버 실행 실패 (Port: $port): ${e.message}")
processes.remove(port)
2026-01-10 18:16:50 +09:00
}
2026-01-23 17:05:09 +09:00
2026-01-10 18:16:50 +09:00
}
}
2026-01-21 18:30:03 +09:00
fun stopAll() {
processes.forEach { (port, process) ->
process.destroy()
println("🛑 AI 서버 종료 (Port: $port)")
}
processes.clear()
2026-01-10 18:16:50 +09:00
}
}