This commit is contained in:
lunaticbum 2025-03-14 17:47:19 +09:00
parent 81417ff8aa
commit c69e18346a
6 changed files with 407 additions and 152 deletions

View File

@ -52,6 +52,10 @@ dependencies {
implementation("org.springframework.boot:spring-boot-starter-thymeleaf") implementation("org.springframework.boot:spring-boot-starter-thymeleaf")
implementation("nz.net.ultraq.thymeleaf:thymeleaf-layout-dialect") implementation("nz.net.ultraq.thymeleaf:thymeleaf-layout-dialect")
implementation ("org.jsoup:jsoup:1.18.1") implementation ("org.jsoup:jsoup:1.18.1")
implementation ("org.seleniumhq.selenium:selenium-java:4.10.0")
implementation ("com.drewnoakes:metadata-extractor:2.19.0") implementation ("com.drewnoakes:metadata-extractor:2.19.0")
implementation("org.springframework.boot:spring-boot-starter-security") implementation("org.springframework.boot:spring-boot-starter-security")
compileOnly("org.projectlombok:lombok") compileOnly("org.projectlombok:lombok")

View File

@ -44,7 +44,7 @@ class BumsInterceptor : HandlerInterceptor {
handler: Any, handler: Any,
@Nullable modelAndView: ModelAndView? @Nullable modelAndView: ModelAndView?
) { ) {
var skippResourcesExtension = arrayListOf(".ajax",".js",".css","/tlg/",".api").filter { request.requestURI.contains(it)}.size > 0 var skippResourcesExtension = arrayListOf(".ajax",".js",".css","/tlg/",".api","error").filter { request.requestURI.contains(it)}.size > 0
if (!skippResourcesExtension) { if (!skippResourcesExtension) {
if (request.requestURI.contains("logout") == false && !request.cookies.isNullOrEmpty() && request.cookies.filter { if (request.requestURI.contains("logout") == false && !request.cookies.isNullOrEmpty() && request.cookies.filter {
it.name.equals( it.name.equals(

View File

@ -371,7 +371,7 @@ class Telegram {
// } // }
// } // }
CoroutineScope(Dispatchers.IO).async { CoroutineScope(Dispatchers.IO).async {
lama.generateResponse(query = originalQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))) lama.generateResponse(originalQuery?.replace("오늘","오늘(${SimpleDateFormat("yyyy-MM-dd").format(Date())})"))
} }
return "TEST" return "TEST"
} }

View File

@ -16,8 +16,8 @@ class QConfig {
} }
class QHnswConfig { class QHnswConfig {
var m: Int = 0 var m: Long = 0
var ef_construct: Int = 0 var ef_construct: Long = 0
var full_scan_threshold: Int = 0 var full_scan_threshold: Int = 0
var max_indexing_threads: Int = 0 var max_indexing_threads: Int = 0
var on_disk: Boolean = false var on_disk: Boolean = false
@ -25,20 +25,20 @@ class QHnswConfig {
class QOptimizerConfig { class QOptimizerConfig {
var deleted_threshold: Double = 0.0 var deleted_threshold: Double = 0.0
var vacuum_min_vector_number: Int = 0 var vacuum_min_vector_number: Long = 0
var default_segment_number: Int = 0 var default_segment_number: Long = 0
var max_segment_size: Any? = null var max_segment_size: Any? = null
var memmap_threshold: Any? = null var memmap_threshold: Any? = null
var indexing_threshold: Int = 0 var indexing_threshold: Long = 0
var flush_interval_sec: Int = 0 var flush_interval_sec: Long = 0
var max_optimization_threads: Any? = null var max_optimization_threads: Any? = null
} }
class QParams { class QParams {
var vectors: QVectors? = null var vectors: QVectors? = null
var shard_number: Int = 0 var shard_number: Long = 0
var replication_factor: Int = 0 var replication_factor: Long = 0
var write_consistency_factor: Int = 0 var write_consistency_factor: Long = 0
var on_disk_payload: Boolean = false var on_disk_payload: Boolean = false
} }
@ -47,9 +47,9 @@ class QPayloadSchema
class QResult { class QResult {
var status: String? = null var status: String? = null
var optimizer_status: String? = null var optimizer_status: String? = null
var indexed_vectors_count: Int = 0 var indexed_vectors_count: Long = 0
var points_count: Long = 0 var points_count: Long = 0
var segments_count: Int = 0 var segments_count: Long = 0
var config: QConfig? = null var config: QConfig? = null
var payload_schema: QPayloadSchema? = null var payload_schema: QPayloadSchema? = null
} }
@ -61,18 +61,18 @@ class QStrictModeConfig {
} }
class QVectors { class QVectors {
var size: Int = 0 var size: Long = 0
var distance: String? = null var distance: String? = null
} }
class QWalConfig { class QWalConfig {
var wal_capacity_mb: Int = 0 var wal_capacity_mb: Long = 0
var wal_segments_ahead: Int = 0 var wal_segments_ahead: Long = 0
} }
class QSearchResult { class QSearchResult {
var id: Int = 0 var id: Long = 0
var version: Int = 0 var version: Long = 0
var score: Double = 0.0 var score: Double = 0.0
} }
@ -103,7 +103,7 @@ class QContentsPayload {
} }
class QContentsResult { class QContentsResult {
var id: Int = 0 var id: Long = 0
var payload: QContentsPayload? = null var payload: QContentsPayload? = null
} }

View File

@ -3,34 +3,40 @@ package kr.lunaticbum.back.lun.service
import com.google.gson.Gson import com.google.gson.Gson
import com.google.gson.annotations.SerializedName import com.google.gson.JsonElement
import com.google.gson.JsonObject
import com.google.gson.JsonParser
import io.micrometer.observation.ObservationRegistry import io.micrometer.observation.ObservationRegistry
import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.launch import kotlinx.coroutines.launch
import kr.lunaticbum.back.lun.configs.GlobalEnvironment import kr.lunaticbum.back.lun.configs.GlobalEnvironment
import kr.lunaticbum.back.lun.controllers.TelegramSendMsg import kr.lunaticbum.back.lun.controllers.TelegramSendMsg
import kr.lunaticbum.back.lun.model.* import kr.lunaticbum.back.lun.model.*
import kr.lunaticbum.back.lun.utils.RssFeedsParser
import org.jsoup.Jsoup import org.jsoup.Jsoup
import org.jsoup.select.Elements import org.jsoup.select.Elements
import org.openqa.selenium.By
import org.openqa.selenium.WebDriver
import org.openqa.selenium.chrome.ChromeOptions
import org.openqa.selenium.remote.RemoteWebDriver
import org.springframework.ai.embedding.EmbeddingRequest import org.springframework.ai.embedding.EmbeddingRequest
import org.springframework.ai.ollama.OllamaEmbeddingModel import org.springframework.ai.ollama.OllamaEmbeddingModel
import org.springframework.ai.ollama.api.OllamaApi import org.springframework.ai.ollama.api.OllamaApi
import org.springframework.ai.ollama.api.OllamaOptions import org.springframework.ai.ollama.api.OllamaOptions
import org.springframework.ai.ollama.management.ModelManagementOptions import org.springframework.ai.ollama.management.ModelManagementOptions
import org.springframework.beans.factory.annotation.Autowired import org.springframework.beans.factory.annotation.Autowired
import org.springframework.beans.factory.annotation.Qualifier
import org.springframework.http.MediaType import org.springframework.http.MediaType
import org.springframework.scheduling.annotation.Async import org.springframework.scheduling.annotation.Async
import org.springframework.stereotype.Service import org.springframework.stereotype.Service
import org.springframework.web.reactive.function.BodyInserters import org.springframework.web.reactive.function.BodyInserters
import org.springframework.web.reactive.function.client.WebClient import org.springframework.web.reactive.function.client.WebClient
import reactor.kotlin.core.publisher.toMono import reactor.kotlin.core.publisher.toMono
import java.net.URL
import java.net.URLEncoder
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.time.Duration import java.time.Duration
import java.util.* import java.util.*
import kotlin.collections.ArrayList
@Service @Service
@ -46,7 +52,7 @@ class Lama {
data class QPut(val points : ArrayList<QData>) data class QPut(val points : ArrayList<QData>)
data class QData(val id : Long, val vector : FloatArray, val payload : SearXngResult) data class QData(val id : Long, val vector : FloatArray, val payload : SearXngResult)
data class QContentsList(var ids : ArrayList<Int> = ArrayList(), var with_payload : Boolean = true, var with_vector : Boolean = false) data class QContentsList(var ids : ArrayList<Long> = ArrayList(), var with_payload : Boolean = true, var with_vector : Boolean = false)
// fun makeCollection() : String{ // fun makeCollection() : String{
// //
// class CollectionPut { // class CollectionPut {
@ -76,51 +82,51 @@ class Lama {
fun jsopFilter(url : String) : String { fun jsopFilter(url : String) : String {
val joinString = "\n#" val joinString = "\n#"
var lastElement : Elements = Elements() var lastElements : Elements = Elements()
var body = Jsoup.connect(url).timeout(30000).get().body() var body = Jsoup.connect(url).timeout(30000).get().body()
var elements : Elements? = null // var elements : Elements? = null
if (url.contains("nate.com", true)) { // if (url.contains("nate.com", true)) {
if (url.contains("view", true)) { // if (url.contains("view", true)) {
elements = body.select("[class*=articleView]") // elements = body.select("[class*=articleView]")
}else { // }else {
elements = body.select("[class*=postRankSubjectList]") // elements = body.select("[class*=postRankSubjectList]")
} // }
} else if (url.contains("newsis.com/view", true)) { // } else if (url.contains("newsis.com/view", true)) {
elements = body.select("[class*=articleView]") // elements = body.select("[class*=articleView]")
} else if (url.contains("blog.naver.com", true)) { // } else if (url.contains("blog.naver.com", true)) {
elements = body.select("[class*=se-viewer]") // elements = body.select("[class*=se-viewer]")
} else if (url.contains("bbc.com/korean/articles", true)) { // } else if (url.contains("bbc.com/korean/articles", true)) {
elements = body.select("main[role$=main]") // elements = body.select("main[role$=main]")
} else if (url.contains("chosun.com/client", true)) { // } else if (url.contains("chosun.com/client", true)) {
elements = body.select("[class*=articleBody]") // elements = body.select("[class*=articleBody]")
} else if (url.contains("nocutnews.co.kr/news", true)) { // } else if (url.contains("nocutnews.co.kr/news", true)) {
elements = body.select("[class*=container]") // elements = body.select("[class*=container]")
} else if (url.contains("hani.co.kr/arti/", true)) { // } else if (url.contains("hani.co.kr/arti/", true)) {
elements = body.select("[class*=ArticleDetail]") // elements = body.select("[class*=ArticleDetail]")
} else if (url.contains("yna.co.kr/view", true)) { // } else if (url.contains("yna.co.kr/view", true)) {
elements = body.select("[class*=container]") // elements = body.select("[class*=container]")
} else if (url.contains("newspim.com/news", true)) { // } else if (url.contains("newspim.com/news", true)) {
elements = body.select("[class*=container]") // elements = body.select("[class*=container]")
} else { // } else {
//
} // }
if (elements?.size ?: 0 > 0) { // if (elements?.size ?: 0 > 0) {
elements?.forEach { // elements?.forEach {
lastElement.add(it) // lastElements.add(it)
} // }
} // }
//
if (lastElement.size < 1) { // if (lastElements.size < 1) {
arrayOf("container","article","main","viewer","content").forEach { // arrayOf("container","article","main","viewer","content").forEach {
var result = Elements() // var result = Elements()
result.addAll(body.select("[class*=$it]")) // result.addAll(body.select("[class*=$it]"))
result.addAll(body.select("[id*=$it]")) // result.addAll(body.select("[id*=$it]"))
result.addAll(body.select(it)) // result.addAll(body.select(it))
result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElement.add(it) } } // result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElements.add(it) } }
} // }
} // }
return if (lastElement.size > 0) { return if (lastElements.size > 0) {
lastElement.eachText().joinToString(joinString) lastElements.eachText().joinToString(joinString)
} else { } else {
body.children().eachText().joinToString(joinString) body.children().eachText().joinToString(joinString)
} }
@ -140,104 +146,200 @@ class Lama {
val embedimgModelEeve ="lancard/korean-yanolja-eeve" val embedimgModelEeve ="lancard/korean-yanolja-eeve"
val embedimgModelBgeM3 = "bge-m3" val embedimgModelBgeM3 = "bge-m3"
val currentEmbedimg = embedimgModelEeve val currentEmbedimg = embedimgModelBgeM3
val llmPhi4 = "phi4:14b" val llmPhi4 = "phi4:14b"
val llmGemma3 = "gemma3:12b" val llmGemma3 = "gemma3:4b"
val llmPhi4Mini = "phi4-mini"
val llmDolphin3 = "dolphin3" val llmDolphin3 = "dolphin3"
val currentLLM = llmDolphin3 val currentLLM = llmGemma3
fun getGoogleSearch(query:String){
Jsoup.connect("https://www.google.com/search?q=".plus(query)).timeout(30000).get().select("a[href]").forEach { }
}
fun addDocuments(query : String , refinedQuery: RefinedQuery?) { val waitTime = 1000L
val topCount = 2
@Async
suspend fun addDocuments(query : String , refinedQuery: RefinedQuery?) {
var querys : ArrayList<String> = ArrayList() var querys : ArrayList<String> = ArrayList()
querys.add(query) querys.add(query)
refinedQuery?.ko_query?.let { querys.add(it) } refinedQuery?.ko_query?.let { querys.add(it) }
refinedQuery?.en_query?.let { querys.add(it) } refinedQuery?.en_query?.let { querys.add(it) }
refinedQuery?.keywords?.let { querys.add(it.joinToString { " " })} refinedQuery?.keywords?.let { querys.add(it.joinToString { " " })}
val readedUrls = ArrayList<String>() val readedUrls = ArrayList<String>()
querys.forEach { refinedQuery ->
CoroutineScope(Dispatchers.IO).launch { try {
val gSearch = "https://psn.lunaticbum.kr/search?q=${ var options : ChromeOptions = ChromeOptions();
refinedQuery?.replace( options.addArguments("--disable-popup-blocking");
"오늘", options.addArguments("--disable-default-apps");
SimpleDateFormat("yyyMMdd").format(Date()) options.addArguments("--disable-notifications");
) options.addArguments("--disable-blink-features=AutomationControlled");
}&language=ko&time_range=month&safesearch=0&categories=general&format=json" val targetUrls = hashSetOf<String>()
println("gSearch >>> ${gSearch}") RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver ->
WebClient.create().get() querys.forEach { refinedQuery->
.uri(gSearch) var findCount = 0
.retrieve() try {
.bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult -> driver.get("https://www.google.com/search?q=$refinedQuery");
gsResult.results?.filter { it.url?.startsWith("https://") == true && it.score > 0.4 }?.forEach { Thread.sleep(waitTime)
println("in filter ${it.url}") println(driver.currentUrl)
if (readedUrls.contains(it.url) == false) { driver.findElement(By.ByTagName("Body"))?.let { webElement ->
readedUrls.add(it.url!!) Jsoup.parse(driver.pageSource).select("[href*=https]").forEach {
it.originQuery = query var href = it.attr("href")
it.refinedQuery = refinedQuery if (href?.length ?: 0 > 5 && href.startsWith("https://") && findCount < topCount && href.contains("google") == false && href.contains("youtube") == false) {
println(it.title) targetUrls.add(href)
try { println("add targetUrls $href")
jsopFilter(it.url!!).let { text -> findCount += 1
it.originHtml = text
webPageSummarize(it, text)
}
} catch (e: Exception) {
e.printStackTrace()
} }
} }
} }
}catch (e:Exception){
e.printStackTrace()
} }
}
driver.close()
driver.quit()
} }
options = ChromeOptions();
options.addArguments("--disable-popup-blocking");
options.addArguments("--disable-default-apps");
options.addArguments("--disable-notifications");
options.addArguments("--disable-blink-features=AutomationControlled");
RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver ->
targetUrls.forEach { url ->
var result = SearXngResult()
if (url?.length ?: 0 > 5 && url?.startsWith("https://") == true && readedUrls.contains(url) == false) {
readedUrls.add(url!!)
result.url = url!!
result.originQuery = query
try {
driver.get(url);
Thread.sleep(waitTime)
driver.findElement(By.ByTagName("Body"))?.let { webElement ->
if(webElement.text.length > 120) {
println(driver.currentUrl)
println(webElement.text)
result.title = driver.title
result.originHtml = webElement.text
webPageSummarize(result, webElement.text)
}
}
} catch (e: Exception) {
e.printStackTrace()
}
}
}
driver.close();
driver.quit()
}
options = ChromeOptions();
options.addArguments("--disable-popup-blocking");
options.addArguments("--disable-default-apps");
options.addArguments("--disable-notifications");
options.addArguments("--disable-blink-features=AutomationControlled");
RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver ->
querys.forEach { refinedQuery ->
var googleSCount = 0
RssFeedsParser().readFeed("https://news.google.com/rss/search?q=${URLEncoder.encode(query)}=ko&gl=KR&ceid=KR%3Ako/")?.messages?.forEach {
var url: String? = it.link
var result = SearXngResult()
println("url >>>> $url")
if (url?.length ?: 0 > 5 && url?.startsWith("https://") == true && readedUrls.contains(url) == false && googleSCount < topCount) {
readedUrls.add(url!!)
result.url = url!!
result.originQuery = query
result.refinedQuery = refinedQuery
result.title = it.title
println(result.title)
try {
driver.get(url);
Thread.sleep(waitTime)
println(driver.currentUrl)
driver.findElement(By.ByTagName("Body"))?.let { webElement ->
println(driver.currentUrl)
println(webElement.text)
result.title = driver.title
result.originHtml = webElement.text
webPageSummarize(result, webElement.text)
googleSCount += 1
}
} catch (e: Exception) {
e.printStackTrace()
}
}
}
}
driver.close()
driver.quit()
}
} catch (e:Exception){e.printStackTrace()}
querys.forEach { refinedQuery ->
val gSearch = "https://psn.lunaticbum.kr/search?q=${refinedQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=ko&time_range=month&safesearch=0&categories=general&format=json"
println("gSearch >>> ${gSearch}")
WebClient.create().get()
.uri(gSearch)
.retrieve()
.bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult ->
gsResult.results?.filter { it.url?.startsWith("https://") == true && it.score > 0.4 }?.forEach {
println("in filter ${it.url}")
if (readedUrls.contains(it.url) == false) {
readedUrls.add(it.url!!)
it.originQuery = query
it.refinedQuery = refinedQuery
println(it.title)
try {
jsopFilter(it.url!!).let { text ->
it.originHtml = text
webPageSummarize(it, text)
}
} catch (e: Exception) {
e.printStackTrace()
}
}
}
}
println("end of search") println("end of search")
} }
} }
var format = "원문:\n'%s'\n원문의 웹 페이지 소스는 '%s'이 질문에 대해 연관 결과로 받은 내용이야. 해당 정보를 파악해서 본문 내용을 최대한 자세히 알려줘 'query:{질문},contents:{본문내용 한국어},summary:{100자 이하로 요약 한국어},keywords:[키워드],related_links:[링크],relatedness_score:{0.0~10.0}'이 형식의 결과만들어줘" var format = "context:'%s'\ncontext는 웹 페이지 문자를 가져온 것 '%s'이 질문에 대해 연관 결과로 받은 내용임. 해당 context 정리 해서 본문 내용을 최대한 자세히 알려줘\n'{query:질문 내용, contents_ko:자세한 내용 한국어 , summary_ko:요약된 내용 한국어, keywords:[키워드], related_links:[{link,description}}], relatedness_score:0.0~10.0}'\n이 형식의 결과로 만들어 "
internal fun makeSummarizeRequestMsg(it : SearXngResult) : String= format.format(it.originHtml,it.originQuery) internal fun makeSummarizeRequestMsg(it : SearXngResult) : String= format.format(it.originHtml,it.originQuery)
internal fun makeCahtReq(reqMsg:String) = OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(reqMsg).build())).build() internal fun makeCahtReq(reqMsg:String) = OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(reqMsg.chunked(100).map { println(it); OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(it).build()}.toList()).build()
@Async @Async
fun webPageSummarize(it : SearXngResult , text : String) { fun webPageSummarize(it : SearXngResult , text : String) {
try { try {
infomationDic.get(it.originQuery)!!.put(it.url!!, text)
val chatClient = OllamaApi("https://lama.lunaticbum.kr") val chatClient = OllamaApi("https://lama.lunaticbum.kr")
val embeddingModel = OllamaEmbeddingModel( val embeddingModel = OllamaEmbeddingModel(chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults())
chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults()) val embeddingResponse = embeddingModel.call(EmbeddingRequest(text.chunked(400).toList(), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build()))
println("text >>>>> ${text?.chunked(50)?.first() ?: ""}") it.originHtml = text
var dispoable = chatClient.chat(makeCahtReq(makeSummarizeRequestMsg(it))).toMono().subscribe({aiResponce -> val sdss = QPut(arrayListOf())
it.pageData = aiResponce.message.content sdss.points.add(QData(id = System.currentTimeMillis(), embeddingResponse.result.output, it))
println("summary result >>>>> ${it.pageData}") if (sdss.points.size > 0) {
val embeddingResponse = embeddingModel.call( val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
EmbeddingRequest( val client = WebClient.create()
listOf(aiResponce.message.content), client.put()
OllamaOptions.builder() .uri(qUrl)
.model(currentEmbedimg) .header("api-key", "blama-admin-key-gb")
.truncate(false).build() .body(BodyInserters.fromValue(Gson().toJson(sdss)))
.retrieve()
.bodyToMono(String::class.java).timeout(Duration.ofMinutes(20L)).subscribe(
{ resultString -> }, { error -> error.printStackTrace() }
) )
) }
infomationDic.put(it.url!!,aiResponce.message.content)
val sdss = QPut(arrayListOf())
sdss.points.add(QData(id = System.currentTimeMillis(),embeddingResponse.result.output,it))
if (sdss.points.size > 0) {
val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
val client = WebClient.create()
client.put()
.uri(qUrl)
.header("api-key", "blama-admin-key-gb")
.body(BodyInserters.fromValue(Gson().toJson(sdss)))
.retrieve()
.bodyToMono(String::class.java).timeout(Duration.ofMinutes(20L)).subscribe(
{resultString -> },{error-> error.printStackTrace()}
)
}
},{err->
err.printStackTrace()
})
}catch (e : Exception) { }catch (e : Exception) {
e.printStackTrace() e.printStackTrace()
} }
} }
@ -281,10 +383,7 @@ class Lama {
println(Gson().toJson(lists)) println(Gson().toJson(lists))
return if (lists?.result?.size ?: 0 > 0) { return if (lists?.result?.size ?: 0 > 0) {
val qContents = QContentsList() val qContents = QContentsList()
lists?.result?.forEach { lists?.result?.filter { it.score > 8.0 }?.forEach { qContents.ids.add(it.id) }
qContents.ids.add(it.id)
}
val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points") val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
val client2 = WebClient.create() val client2 = WebClient.create()
client2.post() client2.post()
@ -301,44 +400,44 @@ class Lama {
@Autowired @Autowired
lateinit var globalEvv : GlobalEnvironment lateinit var globalEvv : GlobalEnvironment
var infomationDic = hashMapOf<String,String>() var infomationDic = hashMapOf<String,HashMap<String,String>>()
suspend fun generateResponse(query: String?, targetId: String? = globalEvv.telegramMyId) { suspend fun generateResponse(query: String?, targetId: String? = globalEvv.telegramMyId) {
infomationDic.clear()
val chatClient = OllamaApi("https://lama.lunaticbum.kr") val chatClient = OllamaApi("https://lama.lunaticbum.kr")
val embeddingModel = OllamaEmbeddingModel( val embeddingModel = OllamaEmbeddingModel(
chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults()) chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults())
println("On generateResponse :: find something ${query}") println("On generateResponse :: find something ${query}")
query?.let { originalQuery -> query?.let { originalQuery ->
infomationDic.put(query!!, hashMapOf())
var embeddingResponse = embeddingModel.call(EmbeddingRequest(listOf(originalQuery), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build())) var embeddingResponse = embeddingModel.call(EmbeddingRequest(listOf(originalQuery), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build()))
addDocuments(originalQuery, querySummarize(originalQuery)) addDocuments(originalQuery, querySummarize(originalQuery))
println("points size ${embeddingResponse.result.output.size}") println("points size ${embeddingResponse.result.output.size}")
var context : String? = "" var context : StringBuffer = StringBuffer()
try { try {
embedQuery(embeddingResponse.result.output)?.result?.forEach { result -> embedQuery(embeddingResponse.result.output)?.result?.forEach { result ->
if (infomationDic.contains(result.payload?.url ?: "NONE") == false) { if (infomationDic.get(query!!)!!.contains(result.payload?.url ?: "NONE") == false) {
context += "\n# :".plus(if (result.payload?.pageData?.length ?: 0 > 10) { context.append("\n# :".plus(if (result.payload?.pageData?.length ?: 0 > 10) {
result.payload?.pageData result.payload?.pageData
} else { } else {
result.payload?.content result.payload?.content
}) }))
} }
} }
}catch (e:Exception){ }catch (e:Exception){
e.printStackTrace() e.printStackTrace()
} }
infomationDic.iterator().forEach { context += "\n#${it.key}:${it.value}" } infomationDic.get(query!!)!!.iterator().forEach { context.append("\n#${it.key}:${it.value}") }
val prompt : StringBuffer = StringBuffer().append("참조:\n").append(context).append("\n참조 내용을 고려 해서\n'$query'").append(query).append("\n에 {querys:[],answers:[],keywords:[],links:[]}형식으로 최대한 자세히 대답 해줘 ")
val fullUrl = "https://api.telegram.org/${globalEvv.telegramBotKey}/sendMessage"
val prompt = "참조:\n$context\n참조 내용을 고려해서\n해당 질문:${query}\n에 {질문내용:[한국어],답변내용:[한국어],전체키워드:[],참조링크:[]}형식으로 대답 해줘 ".trimIndent()
println(prompt)
val response: OllamaApi.ChatResponse = chatClient.chat(OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages( val response: OllamaApi.ChatResponse = chatClient.chat(OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(prompt).build()) prompt.chunked(300).map { println(it); OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(it).build()}.toList()).build())
).build()) // println(response.message.content)
println(response.message.content)
CoroutineScope(Dispatchers.IO).launch { CoroutineScope(Dispatchers.IO).launch {
var toalmsg = "${query}의 대답이 도착했어요.\n${response.message.content}" var toalmsg = "${query}의 대답이 도착했어요.\n${response.message.content}"
val fullUrl = "https://api.telegram.org/${globalEvv.telegramBotKey}/sendMessage"
toalmsg.chunked(512).forEach { chunkedMsg -> toalmsg.chunked(512).forEach { chunkedMsg ->
println("fullUrl >>> ${fullUrl}") println("fullUrl >>> ${fullUrl}")
(targetId ?: globalEvv.telegramMyId)?.let { (targetId ?: globalEvv.telegramMyId)?.let {
@ -354,6 +453,7 @@ class Lama {
} }
} }
} }
infomationDic.remove(query!!)
} }
println("On generateResponse :: END OF Answer") println("On generateResponse :: END OF Answer")
} }

View File

@ -0,0 +1,151 @@
package kr.lunaticbum.back.lun.utils
import java.io.IOException
import java.io.InputStream
import java.net.MalformedURLException
import java.net.URL
import javax.xml.stream.XMLEventReader
import javax.xml.stream.XMLInputFactory
import javax.xml.stream.XMLStreamException
import javax.xml.stream.events.Characters
import javax.xml.stream.events.XMLEvent
class FeedMessage {
var title: String? = null
var description: String? = null
var link: String? = null
var author: String? = null
var guid: String? = null
override fun toString(): String {
return ("FeedMessage [title=" + title + ", description=" + description
+ ", link=" + link + ", author=" + author + ", guid=" + guid
+ "]")
}
}
class Feed(
val title: String, val link: String, val description: String, val language: String,
val copyright: String, val pubDate: String
) {
val messages: ArrayList<FeedMessage> = ArrayList()
override fun toString(): String {
return ("Feed [copyright=" + copyright + ", description=" + description
+ ", language=" + language + ", link=" + link + ", pubDate="
+ pubDate + ", title=" + title + "]")
}
}
class RssFeedsParser {
val TITLE: String = "title"
val DESCRIPTION: String = "description"
val CHANNEL: String = "channel"
val LANGUAGE: String = "language"
val COPYRIGHT: String = "copyright"
val LINK: String = "link"
val AUTHOR: String = "author"
val ITEM: String = "item"
val PUB_DATE: String = "pubDate"
val GUID: String = "guid"
var url: URL? = null
// fun parser(feedUrl: String?) {
// try {
// this.url = URL(feedUrl)
// } catch (e: MalformedURLException) {
// throw RuntimeException(e)
// }
// }
fun readFeed(feedUrl: String?): Feed? {
try {
this.url = URL(feedUrl)
} catch (e: MalformedURLException) {
throw RuntimeException(e)
}
var feed: Feed? = null
try {
var isFeedHeader = true
// Set header values intial to the empty string
var description = ""
var title = ""
var link = ""
var language = ""
var copyright = ""
var author = ""
var pubdate = ""
var guid = ""
// First create a new XMLInputFactory
val inputFactory = XMLInputFactory.newInstance()
// create a new eventReader
val `in` = read()
val eventReader = inputFactory.createXMLEventReader(`in`)
// read the XML document
while (eventReader.hasNext()) {
var event = eventReader.nextEvent()
if (event.isStartElement) {
val localPart = event.asStartElement().name
.localPart
when (localPart) {
ITEM -> {
if (isFeedHeader) {
isFeedHeader = false
feed = Feed(
title!!, link!!, description!!, language!!,
copyright!!, pubdate!!
)
}
event = eventReader.nextEvent()
}
TITLE -> title = getCharacterData(event, eventReader)
DESCRIPTION -> description = getCharacterData(event, eventReader)
LINK -> link = getCharacterData(event, eventReader)
GUID -> guid = getCharacterData(event, eventReader)
LANGUAGE -> language = getCharacterData(event, eventReader)
AUTHOR -> author = getCharacterData(event, eventReader)
PUB_DATE -> pubdate = getCharacterData(event, eventReader)
COPYRIGHT -> copyright = getCharacterData(event, eventReader)
}
} else if (event.isEndElement) {
if (event.asEndElement().name.localPart === (ITEM)) {
val message = FeedMessage()
message.author = author
message.description = description
message.guid = guid
message.link = link
message.title = title
feed!!.messages.add(message)
event = eventReader.nextEvent()
continue
}
}
}
} catch (e: XMLStreamException) {
throw RuntimeException(e)
}
return feed
}
@Throws(XMLStreamException::class)
private fun getCharacterData(event: XMLEvent, eventReader: XMLEventReader): String {
var event = event
var result = ""
event = eventReader.nextEvent()
if (event is Characters) {
result = event.asCharacters().data
}
return result
}
private fun read(): InputStream {
try {
return url!!.openStream()
} catch (e: IOException) {
throw RuntimeException(e)
}
}
}