From c69e18346ae0ea3c99b46ba6166bbe85dd7a9c52 Mon Sep 17 00:00:00 2001 From: lunaticbum Date: Fri, 14 Mar 2025 17:47:19 +0900 Subject: [PATCH] ... --- build.gradle.kts | 4 + .../back/lun/configs/BumsInterceptor.kt | 2 +- .../back/lun/controllers/Telegram.kt | 2 +- .../lunaticbum/back/lun/model/QCollection.kt | 34 +- .../kr/lunaticbum/back/lun/service/Lama.kt | 366 +++++++++++------- .../back/lun/utils/RssFeedsParser.kt | 151 ++++++++ 6 files changed, 407 insertions(+), 152 deletions(-) create mode 100644 src/main/kotlin/kr/lunaticbum/back/lun/utils/RssFeedsParser.kt diff --git a/build.gradle.kts b/build.gradle.kts index 076b69f..246bab6 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -52,6 +52,10 @@ dependencies { implementation("org.springframework.boot:spring-boot-starter-thymeleaf") implementation("nz.net.ultraq.thymeleaf:thymeleaf-layout-dialect") implementation ("org.jsoup:jsoup:1.18.1") + + implementation ("org.seleniumhq.selenium:selenium-java:4.10.0") + + implementation ("com.drewnoakes:metadata-extractor:2.19.0") implementation("org.springframework.boot:spring-boot-starter-security") compileOnly("org.projectlombok:lombok") diff --git a/src/main/kotlin/kr/lunaticbum/back/lun/configs/BumsInterceptor.kt b/src/main/kotlin/kr/lunaticbum/back/lun/configs/BumsInterceptor.kt index d0b33c9..c8415e0 100644 --- a/src/main/kotlin/kr/lunaticbum/back/lun/configs/BumsInterceptor.kt +++ b/src/main/kotlin/kr/lunaticbum/back/lun/configs/BumsInterceptor.kt @@ -44,7 +44,7 @@ class BumsInterceptor : HandlerInterceptor { handler: Any, @Nullable modelAndView: ModelAndView? ) { - var skippResourcesExtension = arrayListOf(".ajax",".js",".css","/tlg/",".api").filter { request.requestURI.contains(it)}.size > 0 + var skippResourcesExtension = arrayListOf(".ajax",".js",".css","/tlg/",".api","error").filter { request.requestURI.contains(it)}.size > 0 if (!skippResourcesExtension) { if (request.requestURI.contains("logout") == false && !request.cookies.isNullOrEmpty() && request.cookies.filter { it.name.equals( diff --git a/src/main/kotlin/kr/lunaticbum/back/lun/controllers/Telegram.kt b/src/main/kotlin/kr/lunaticbum/back/lun/controllers/Telegram.kt index 1dde4f9..401859c 100644 --- a/src/main/kotlin/kr/lunaticbum/back/lun/controllers/Telegram.kt +++ b/src/main/kotlin/kr/lunaticbum/back/lun/controllers/Telegram.kt @@ -371,7 +371,7 @@ class Telegram { // } // } CoroutineScope(Dispatchers.IO).async { - lama.generateResponse(query = originalQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))) + lama.generateResponse(originalQuery?.replace("오늘","오늘(${SimpleDateFormat("yyyy-MM-dd").format(Date())})")) } return "TEST" } diff --git a/src/main/kotlin/kr/lunaticbum/back/lun/model/QCollection.kt b/src/main/kotlin/kr/lunaticbum/back/lun/model/QCollection.kt index 3b260b0..180b1ce 100644 --- a/src/main/kotlin/kr/lunaticbum/back/lun/model/QCollection.kt +++ b/src/main/kotlin/kr/lunaticbum/back/lun/model/QCollection.kt @@ -16,8 +16,8 @@ class QConfig { } class QHnswConfig { - var m: Int = 0 - var ef_construct: Int = 0 + var m: Long = 0 + var ef_construct: Long = 0 var full_scan_threshold: Int = 0 var max_indexing_threads: Int = 0 var on_disk: Boolean = false @@ -25,20 +25,20 @@ class QHnswConfig { class QOptimizerConfig { var deleted_threshold: Double = 0.0 - var vacuum_min_vector_number: Int = 0 - var default_segment_number: Int = 0 + var vacuum_min_vector_number: Long = 0 + var default_segment_number: Long = 0 var max_segment_size: Any? = null var memmap_threshold: Any? = null - var indexing_threshold: Int = 0 - var flush_interval_sec: Int = 0 + var indexing_threshold: Long = 0 + var flush_interval_sec: Long = 0 var max_optimization_threads: Any? = null } class QParams { var vectors: QVectors? = null - var shard_number: Int = 0 - var replication_factor: Int = 0 - var write_consistency_factor: Int = 0 + var shard_number: Long = 0 + var replication_factor: Long = 0 + var write_consistency_factor: Long = 0 var on_disk_payload: Boolean = false } @@ -47,9 +47,9 @@ class QPayloadSchema class QResult { var status: String? = null var optimizer_status: String? = null - var indexed_vectors_count: Int = 0 + var indexed_vectors_count: Long = 0 var points_count: Long = 0 - var segments_count: Int = 0 + var segments_count: Long = 0 var config: QConfig? = null var payload_schema: QPayloadSchema? = null } @@ -61,18 +61,18 @@ class QStrictModeConfig { } class QVectors { - var size: Int = 0 + var size: Long = 0 var distance: String? = null } class QWalConfig { - var wal_capacity_mb: Int = 0 - var wal_segments_ahead: Int = 0 + var wal_capacity_mb: Long = 0 + var wal_segments_ahead: Long = 0 } class QSearchResult { - var id: Int = 0 - var version: Int = 0 + var id: Long = 0 + var version: Long = 0 var score: Double = 0.0 } @@ -103,7 +103,7 @@ class QContentsPayload { } class QContentsResult { - var id: Int = 0 + var id: Long = 0 var payload: QContentsPayload? = null } diff --git a/src/main/kotlin/kr/lunaticbum/back/lun/service/Lama.kt b/src/main/kotlin/kr/lunaticbum/back/lun/service/Lama.kt index 7bcfbc3..0439f41 100644 --- a/src/main/kotlin/kr/lunaticbum/back/lun/service/Lama.kt +++ b/src/main/kotlin/kr/lunaticbum/back/lun/service/Lama.kt @@ -3,34 +3,40 @@ package kr.lunaticbum.back.lun.service import com.google.gson.Gson -import com.google.gson.annotations.SerializedName +import com.google.gson.JsonElement +import com.google.gson.JsonObject +import com.google.gson.JsonParser import io.micrometer.observation.ObservationRegistry import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.async import kotlinx.coroutines.launch import kr.lunaticbum.back.lun.configs.GlobalEnvironment import kr.lunaticbum.back.lun.controllers.TelegramSendMsg import kr.lunaticbum.back.lun.model.* +import kr.lunaticbum.back.lun.utils.RssFeedsParser import org.jsoup.Jsoup import org.jsoup.select.Elements +import org.openqa.selenium.By +import org.openqa.selenium.WebDriver +import org.openqa.selenium.chrome.ChromeOptions +import org.openqa.selenium.remote.RemoteWebDriver import org.springframework.ai.embedding.EmbeddingRequest import org.springframework.ai.ollama.OllamaEmbeddingModel import org.springframework.ai.ollama.api.OllamaApi import org.springframework.ai.ollama.api.OllamaOptions import org.springframework.ai.ollama.management.ModelManagementOptions import org.springframework.beans.factory.annotation.Autowired -import org.springframework.beans.factory.annotation.Qualifier import org.springframework.http.MediaType import org.springframework.scheduling.annotation.Async import org.springframework.stereotype.Service import org.springframework.web.reactive.function.BodyInserters import org.springframework.web.reactive.function.client.WebClient import reactor.kotlin.core.publisher.toMono +import java.net.URL +import java.net.URLEncoder import java.text.SimpleDateFormat import java.time.Duration import java.util.* -import kotlin.collections.ArrayList @Service @@ -46,7 +52,7 @@ class Lama { data class QPut(val points : ArrayList) data class QData(val id : Long, val vector : FloatArray, val payload : SearXngResult) - data class QContentsList(var ids : ArrayList = ArrayList(), var with_payload : Boolean = true, var with_vector : Boolean = false) + data class QContentsList(var ids : ArrayList = ArrayList(), var with_payload : Boolean = true, var with_vector : Boolean = false) // fun makeCollection() : String{ // // class CollectionPut { @@ -76,51 +82,51 @@ class Lama { fun jsopFilter(url : String) : String { val joinString = "\n#" - var lastElement : Elements = Elements() + var lastElements : Elements = Elements() var body = Jsoup.connect(url).timeout(30000).get().body() - var elements : Elements? = null - if (url.contains("nate.com", true)) { - if (url.contains("view", true)) { - elements = body.select("[class*=articleView]") - }else { - elements = body.select("[class*=postRankSubjectList]") - } - } else if (url.contains("newsis.com/view", true)) { - elements = body.select("[class*=articleView]") - } else if (url.contains("blog.naver.com", true)) { - elements = body.select("[class*=se-viewer]") - } else if (url.contains("bbc.com/korean/articles", true)) { - elements = body.select("main[role$=main]") - } else if (url.contains("chosun.com/client", true)) { - elements = body.select("[class*=articleBody]") - } else if (url.contains("nocutnews.co.kr/news", true)) { - elements = body.select("[class*=container]") - } else if (url.contains("hani.co.kr/arti/", true)) { - elements = body.select("[class*=ArticleDetail]") - } else if (url.contains("yna.co.kr/view", true)) { - elements = body.select("[class*=container]") - } else if (url.contains("newspim.com/news", true)) { - elements = body.select("[class*=container]") - } else { - - } - if (elements?.size ?: 0 > 0) { - elements?.forEach { - lastElement.add(it) - } - } - - if (lastElement.size < 1) { - arrayOf("container","article","main","viewer","content").forEach { - var result = Elements() - result.addAll(body.select("[class*=$it]")) - result.addAll(body.select("[id*=$it]")) - result.addAll(body.select(it)) - result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElement.add(it) } } - } - } - return if (lastElement.size > 0) { - lastElement.eachText().joinToString(joinString) +// var elements : Elements? = null +// if (url.contains("nate.com", true)) { +// if (url.contains("view", true)) { +// elements = body.select("[class*=articleView]") +// }else { +// elements = body.select("[class*=postRankSubjectList]") +// } +// } else if (url.contains("newsis.com/view", true)) { +// elements = body.select("[class*=articleView]") +// } else if (url.contains("blog.naver.com", true)) { +// elements = body.select("[class*=se-viewer]") +// } else if (url.contains("bbc.com/korean/articles", true)) { +// elements = body.select("main[role$=main]") +// } else if (url.contains("chosun.com/client", true)) { +// elements = body.select("[class*=articleBody]") +// } else if (url.contains("nocutnews.co.kr/news", true)) { +// elements = body.select("[class*=container]") +// } else if (url.contains("hani.co.kr/arti/", true)) { +// elements = body.select("[class*=ArticleDetail]") +// } else if (url.contains("yna.co.kr/view", true)) { +// elements = body.select("[class*=container]") +// } else if (url.contains("newspim.com/news", true)) { +// elements = body.select("[class*=container]") +// } else { +// +// } +// if (elements?.size ?: 0 > 0) { +// elements?.forEach { +// lastElements.add(it) +// } +// } +// +// if (lastElements.size < 1) { +// arrayOf("container","article","main","viewer","content").forEach { +// var result = Elements() +// result.addAll(body.select("[class*=$it]")) +// result.addAll(body.select("[id*=$it]")) +// result.addAll(body.select(it)) +// result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElements.add(it) } } +// } +// } + return if (lastElements.size > 0) { + lastElements.eachText().joinToString(joinString) } else { body.children().eachText().joinToString(joinString) } @@ -140,104 +146,200 @@ class Lama { val embedimgModelEeve ="lancard/korean-yanolja-eeve" val embedimgModelBgeM3 = "bge-m3" - val currentEmbedimg = embedimgModelEeve + val currentEmbedimg = embedimgModelBgeM3 val llmPhi4 = "phi4:14b" - val llmGemma3 = "gemma3:12b" - + val llmGemma3 = "gemma3:4b" + val llmPhi4Mini = "phi4-mini" val llmDolphin3 = "dolphin3" - val currentLLM = llmDolphin3 + val currentLLM = llmGemma3 + fun getGoogleSearch(query:String){ + Jsoup.connect("https://www.google.com/search?q=".plus(query)).timeout(30000).get().select("a[href]").forEach { } + } - fun addDocuments(query : String , refinedQuery: RefinedQuery?) { + val waitTime = 1000L + val topCount = 2 + + @Async + suspend fun addDocuments(query : String , refinedQuery: RefinedQuery?) { var querys : ArrayList = ArrayList() querys.add(query) + refinedQuery?.ko_query?.let { querys.add(it) } refinedQuery?.en_query?.let { querys.add(it) } refinedQuery?.keywords?.let { querys.add(it.joinToString { " " })} val readedUrls = ArrayList() - querys.forEach { refinedQuery -> - CoroutineScope(Dispatchers.IO).launch { - val gSearch = "https://psn.lunaticbum.kr/search?q=${ - refinedQuery?.replace( - "오늘", - SimpleDateFormat("yyyMMdd").format(Date()) - ) - }&language=ko&time_range=month&safesearch=0&categories=general&format=json" - println("gSearch >>> ${gSearch}") - WebClient.create().get() - .uri(gSearch) - .retrieve() - .bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult -> - gsResult.results?.filter { it.url?.startsWith("https://") == true && it.score > 0.4 }?.forEach { - println("in filter ${it.url}") - if (readedUrls.contains(it.url) == false) { - readedUrls.add(it.url!!) - it.originQuery = query - it.refinedQuery = refinedQuery - println(it.title) - try { - jsopFilter(it.url!!).let { text -> - it.originHtml = text - webPageSummarize(it, text) - } - } catch (e: Exception) { - e.printStackTrace() + + try { + var options : ChromeOptions = ChromeOptions(); + options.addArguments("--disable-popup-blocking"); + options.addArguments("--disable-default-apps"); + options.addArguments("--disable-notifications"); + options.addArguments("--disable-blink-features=AutomationControlled"); + val targetUrls = hashSetOf() + RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver -> + querys.forEach { refinedQuery-> + var findCount = 0 + try { + driver.get("https://www.google.com/search?q=$refinedQuery"); + Thread.sleep(waitTime) + println(driver.currentUrl) + driver.findElement(By.ByTagName("Body"))?.let { webElement -> + Jsoup.parse(driver.pageSource).select("[href*=https]").forEach { + var href = it.attr("href") + if (href?.length ?: 0 > 5 && href.startsWith("https://") && findCount < topCount && href.contains("google") == false && href.contains("youtube") == false) { + targetUrls.add(href) + println("add targetUrls $href") + findCount += 1 } } } + + }catch (e:Exception){ + e.printStackTrace() } + } + driver.close() + driver.quit() } + options = ChromeOptions(); + options.addArguments("--disable-popup-blocking"); + options.addArguments("--disable-default-apps"); + options.addArguments("--disable-notifications"); + options.addArguments("--disable-blink-features=AutomationControlled"); + RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver -> + targetUrls.forEach { url -> + var result = SearXngResult() + if (url?.length ?: 0 > 5 && url?.startsWith("https://") == true && readedUrls.contains(url) == false) { + readedUrls.add(url!!) + result.url = url!! + result.originQuery = query + try { + driver.get(url); + Thread.sleep(waitTime) + driver.findElement(By.ByTagName("Body"))?.let { webElement -> + if(webElement.text.length > 120) { + println(driver.currentUrl) + println(webElement.text) + result.title = driver.title + result.originHtml = webElement.text + webPageSummarize(result, webElement.text) + } + } + + } catch (e: Exception) { + e.printStackTrace() + } + } + } + driver.close(); + driver.quit() + } + options = ChromeOptions(); + options.addArguments("--disable-popup-blocking"); + options.addArguments("--disable-default-apps"); + options.addArguments("--disable-notifications"); + options.addArguments("--disable-blink-features=AutomationControlled"); + RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver -> + querys.forEach { refinedQuery -> + var googleSCount = 0 + RssFeedsParser().readFeed("https://news.google.com/rss/search?q=${URLEncoder.encode(query)}=ko&gl=KR&ceid=KR%3Ako/")?.messages?.forEach { + var url: String? = it.link + var result = SearXngResult() + println("url >>>> $url") + if (url?.length ?: 0 > 5 && url?.startsWith("https://") == true && readedUrls.contains(url) == false && googleSCount < topCount) { + readedUrls.add(url!!) + result.url = url!! + result.originQuery = query + result.refinedQuery = refinedQuery + result.title = it.title + println(result.title) + try { + driver.get(url); + Thread.sleep(waitTime) + println(driver.currentUrl) + driver.findElement(By.ByTagName("Body"))?.let { webElement -> + println(driver.currentUrl) + println(webElement.text) + result.title = driver.title + result.originHtml = webElement.text + webPageSummarize(result, webElement.text) + googleSCount += 1 + } + + } catch (e: Exception) { + e.printStackTrace() + } + } + } + } + driver.close() + driver.quit() + } + + } catch (e:Exception){e.printStackTrace()} + + querys.forEach { refinedQuery -> + val gSearch = "https://psn.lunaticbum.kr/search?q=${refinedQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=ko&time_range=month&safesearch=0&categories=general&format=json" + println("gSearch >>> ${gSearch}") + WebClient.create().get() + .uri(gSearch) + .retrieve() + .bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult -> + gsResult.results?.filter { it.url?.startsWith("https://") == true && it.score > 0.4 }?.forEach { + println("in filter ${it.url}") + if (readedUrls.contains(it.url) == false) { + readedUrls.add(it.url!!) + it.originQuery = query + it.refinedQuery = refinedQuery + println(it.title) + try { + jsopFilter(it.url!!).let { text -> + it.originHtml = text + webPageSummarize(it, text) + } + } catch (e: Exception) { + e.printStackTrace() + } + } + } + } println("end of search") } } - var format = "원문:\n'%s'\n원문의 웹 페이지 소스는 '%s'이 질문에 대해 연관 결과로 받은 내용이야. 해당 정보를 파악해서 본문 내용을 최대한 자세히 알려줘 'query:{질문},contents:{본문내용 한국어},summary:{100자 이하로 요약 한국어},keywords:[키워드],related_links:[링크],relatedness_score:{0.0~10.0}'이 형식의 결과만들어줘" + var format = "context:'%s'\ncontext는 웹 페이지 문자를 가져온 것 '%s'이 질문에 대해 연관 결과로 받은 내용임. 해당 context 정리 해서 본문 내용을 최대한 자세히 알려줘\n'{query:질문 내용, contents_ko:자세한 내용 한국어 , summary_ko:요약된 내용 한국어, keywords:[키워드], related_links:[{link,description}}], relatedness_score:0.0~10.0}'\n이 형식의 결과로 만들어 줘" internal fun makeSummarizeRequestMsg(it : SearXngResult) : String= format.format(it.originHtml,it.originQuery) - internal fun makeCahtReq(reqMsg:String) = OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(reqMsg).build())).build() + internal fun makeCahtReq(reqMsg:String) = OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(reqMsg.chunked(100).map { println(it); OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(it).build()}.toList()).build() @Async fun webPageSummarize(it : SearXngResult , text : String) { try { + infomationDic.get(it.originQuery)!!.put(it.url!!, text) val chatClient = OllamaApi("https://lama.lunaticbum.kr") - val embeddingModel = OllamaEmbeddingModel( - chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults()) - println("text >>>>> ${text?.chunked(50)?.first() ?: ""}") - var dispoable = chatClient.chat(makeCahtReq(makeSummarizeRequestMsg(it))).toMono().subscribe({aiResponce -> - it.pageData = aiResponce.message.content - println("summary result >>>>> ${it.pageData}") - val embeddingResponse = embeddingModel.call( - EmbeddingRequest( - listOf(aiResponce.message.content), - OllamaOptions.builder() - .model(currentEmbedimg) - .truncate(false).build() + val embeddingModel = OllamaEmbeddingModel(chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults()) + val embeddingResponse = embeddingModel.call(EmbeddingRequest(text.chunked(400).toList(), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build())) + it.originHtml = text + val sdss = QPut(arrayListOf()) + sdss.points.add(QData(id = System.currentTimeMillis(), embeddingResponse.result.output, it)) + if (sdss.points.size > 0) { + val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points") + val client = WebClient.create() + client.put() + .uri(qUrl) + .header("api-key", "blama-admin-key-gb") + .body(BodyInserters.fromValue(Gson().toJson(sdss))) + .retrieve() + .bodyToMono(String::class.java).timeout(Duration.ofMinutes(20L)).subscribe( + { resultString -> }, { error -> error.printStackTrace() } ) - ) - infomationDic.put(it.url!!,aiResponce.message.content) - val sdss = QPut(arrayListOf()) - sdss.points.add(QData(id = System.currentTimeMillis(),embeddingResponse.result.output,it)) - if (sdss.points.size > 0) { - val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points") - val client = WebClient.create() - client.put() - .uri(qUrl) - .header("api-key", "blama-admin-key-gb") - .body(BodyInserters.fromValue(Gson().toJson(sdss))) - .retrieve() - .bodyToMono(String::class.java).timeout(Duration.ofMinutes(20L)).subscribe( - {resultString -> },{error-> error.printStackTrace()} - ) - } - },{err-> - err.printStackTrace() - }) + } }catch (e : Exception) { - e.printStackTrace() } } @@ -281,10 +383,7 @@ class Lama { println(Gson().toJson(lists)) return if (lists?.result?.size ?: 0 > 0) { val qContents = QContentsList() - lists?.result?.forEach { - - qContents.ids.add(it.id) - } + lists?.result?.filter { it.score > 8.0 }?.forEach { qContents.ids.add(it.id) } val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points") val client2 = WebClient.create() client2.post() @@ -301,44 +400,44 @@ class Lama { @Autowired lateinit var globalEvv : GlobalEnvironment - var infomationDic = hashMapOf() + var infomationDic = hashMapOf>() suspend fun generateResponse(query: String?, targetId: String? = globalEvv.telegramMyId) { - infomationDic.clear() + val chatClient = OllamaApi("https://lama.lunaticbum.kr") val embeddingModel = OllamaEmbeddingModel( chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults()) println("On generateResponse :: find something ${query}") + query?.let { originalQuery -> + infomationDic.put(query!!, hashMapOf()) var embeddingResponse = embeddingModel.call(EmbeddingRequest(listOf(originalQuery), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build())) addDocuments(originalQuery, querySummarize(originalQuery)) println("points size ${embeddingResponse.result.output.size}") - var context : String? = "" + var context : StringBuffer = StringBuffer() try { embedQuery(embeddingResponse.result.output)?.result?.forEach { result -> - if (infomationDic.contains(result.payload?.url ?: "NONE") == false) { - context += "\n# :".plus(if (result.payload?.pageData?.length ?: 0 > 10) { + if (infomationDic.get(query!!)!!.contains(result.payload?.url ?: "NONE") == false) { + context.append("\n# :".plus(if (result.payload?.pageData?.length ?: 0 > 10) { result.payload?.pageData } else { result.payload?.content - }) + })) } } }catch (e:Exception){ e.printStackTrace() } - infomationDic.iterator().forEach { context += "\n#${it.key}:${it.value}" } + infomationDic.get(query!!)!!.iterator().forEach { context.append("\n#${it.key}:${it.value}") } + + val prompt : StringBuffer = StringBuffer().append("참조:\n").append(context).append("\n참조 내용을 고려 해서\n'$query'").append(query).append("\n에 {querys:[],answers:[],keywords:[],links:[]}형식으로 최대한 자세히 대답 해줘 ") + val fullUrl = "https://api.telegram.org/${globalEvv.telegramBotKey}/sendMessage" - val prompt = "참조:\n$context\n참조 내용을 고려해서\n해당 질문:${query}\n에 {질문내용:[한국어],답변내용:[한국어],전체키워드:[],참조링크:[]}형식으로 대답 해줘 ".trimIndent() - println(prompt) val response: OllamaApi.ChatResponse = chatClient.chat(OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages( - listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(prompt).build()) - ).build()) - - println(response.message.content) + prompt.chunked(300).map { println(it); OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(it).build()}.toList()).build()) +// println(response.message.content) CoroutineScope(Dispatchers.IO).launch { var toalmsg = "${query}의 대답이 도착했어요.\n${response.message.content}" - val fullUrl = "https://api.telegram.org/${globalEvv.telegramBotKey}/sendMessage" toalmsg.chunked(512).forEach { chunkedMsg -> println("fullUrl >>> ${fullUrl}") (targetId ?: globalEvv.telegramMyId)?.let { @@ -354,6 +453,7 @@ class Lama { } } } + infomationDic.remove(query!!) } println("On generateResponse :: END OF Answer") } diff --git a/src/main/kotlin/kr/lunaticbum/back/lun/utils/RssFeedsParser.kt b/src/main/kotlin/kr/lunaticbum/back/lun/utils/RssFeedsParser.kt new file mode 100644 index 0000000..a23170a --- /dev/null +++ b/src/main/kotlin/kr/lunaticbum/back/lun/utils/RssFeedsParser.kt @@ -0,0 +1,151 @@ +package kr.lunaticbum.back.lun.utils + +import java.io.IOException +import java.io.InputStream +import java.net.MalformedURLException +import java.net.URL +import javax.xml.stream.XMLEventReader +import javax.xml.stream.XMLInputFactory +import javax.xml.stream.XMLStreamException +import javax.xml.stream.events.Characters +import javax.xml.stream.events.XMLEvent + + +class FeedMessage { + var title: String? = null + var description: String? = null + var link: String? = null + var author: String? = null + var guid: String? = null + + override fun toString(): String { + return ("FeedMessage [title=" + title + ", description=" + description + + ", link=" + link + ", author=" + author + ", guid=" + guid + + "]") + } +} +class Feed( + val title: String, val link: String, val description: String, val language: String, + val copyright: String, val pubDate: String +) { + val messages: ArrayList = ArrayList() + + override fun toString(): String { + return ("Feed [copyright=" + copyright + ", description=" + description + + ", language=" + language + ", link=" + link + ", pubDate=" + + pubDate + ", title=" + title + "]") + } +} +class RssFeedsParser { + + val TITLE: String = "title" + val DESCRIPTION: String = "description" + val CHANNEL: String = "channel" + val LANGUAGE: String = "language" + val COPYRIGHT: String = "copyright" + val LINK: String = "link" + val AUTHOR: String = "author" + val ITEM: String = "item" + val PUB_DATE: String = "pubDate" + val GUID: String = "guid" + + var url: URL? = null + +// fun parser(feedUrl: String?) { +// try { +// this.url = URL(feedUrl) +// } catch (e: MalformedURLException) { +// throw RuntimeException(e) +// } +// } + + fun readFeed(feedUrl: String?): Feed? { + try { + this.url = URL(feedUrl) + } catch (e: MalformedURLException) { + throw RuntimeException(e) + } + var feed: Feed? = null + try { + var isFeedHeader = true + // Set header values intial to the empty string + var description = "" + var title = "" + var link = "" + var language = "" + var copyright = "" + var author = "" + var pubdate = "" + var guid = "" + + // First create a new XMLInputFactory + val inputFactory = XMLInputFactory.newInstance() + // create a new eventReader + val `in` = read() + val eventReader = inputFactory.createXMLEventReader(`in`) + // read the XML document + while (eventReader.hasNext()) { + var event = eventReader.nextEvent() + if (event.isStartElement) { + val localPart = event.asStartElement().name + .localPart + when (localPart) { + ITEM -> { + if (isFeedHeader) { + isFeedHeader = false + feed = Feed( + title!!, link!!, description!!, language!!, + copyright!!, pubdate!! + ) + } + event = eventReader.nextEvent() + } + + TITLE -> title = getCharacterData(event, eventReader) + DESCRIPTION -> description = getCharacterData(event, eventReader) + LINK -> link = getCharacterData(event, eventReader) + GUID -> guid = getCharacterData(event, eventReader) + LANGUAGE -> language = getCharacterData(event, eventReader) + AUTHOR -> author = getCharacterData(event, eventReader) + PUB_DATE -> pubdate = getCharacterData(event, eventReader) + COPYRIGHT -> copyright = getCharacterData(event, eventReader) + } + } else if (event.isEndElement) { + if (event.asEndElement().name.localPart === (ITEM)) { + val message = FeedMessage() + message.author = author + message.description = description + message.guid = guid + message.link = link + message.title = title + feed!!.messages.add(message) + event = eventReader.nextEvent() + continue + } + } + } + } catch (e: XMLStreamException) { + throw RuntimeException(e) + } + return feed + } + + @Throws(XMLStreamException::class) + private fun getCharacterData(event: XMLEvent, eventReader: XMLEventReader): String { + var event = event + var result = "" + event = eventReader.nextEvent() + if (event is Characters) { + result = event.asCharacters().data + } + return result + } + + private fun read(): InputStream { + try { + return url!!.openStream() + } catch (e: IOException) { + throw RuntimeException(e) + } + } +} \ No newline at end of file