...
This commit is contained in:
parent
81417ff8aa
commit
c69e18346a
@ -52,6 +52,10 @@ dependencies {
|
||||
implementation("org.springframework.boot:spring-boot-starter-thymeleaf")
|
||||
implementation("nz.net.ultraq.thymeleaf:thymeleaf-layout-dialect")
|
||||
implementation ("org.jsoup:jsoup:1.18.1")
|
||||
|
||||
implementation ("org.seleniumhq.selenium:selenium-java:4.10.0")
|
||||
|
||||
|
||||
implementation ("com.drewnoakes:metadata-extractor:2.19.0")
|
||||
implementation("org.springframework.boot:spring-boot-starter-security")
|
||||
compileOnly("org.projectlombok:lombok")
|
||||
|
||||
@ -44,7 +44,7 @@ class BumsInterceptor : HandlerInterceptor {
|
||||
handler: Any,
|
||||
@Nullable modelAndView: ModelAndView?
|
||||
) {
|
||||
var skippResourcesExtension = arrayListOf(".ajax",".js",".css","/tlg/",".api").filter { request.requestURI.contains(it)}.size > 0
|
||||
var skippResourcesExtension = arrayListOf(".ajax",".js",".css","/tlg/",".api","error").filter { request.requestURI.contains(it)}.size > 0
|
||||
if (!skippResourcesExtension) {
|
||||
if (request.requestURI.contains("logout") == false && !request.cookies.isNullOrEmpty() && request.cookies.filter {
|
||||
it.name.equals(
|
||||
|
||||
@ -371,7 +371,7 @@ class Telegram {
|
||||
// }
|
||||
// }
|
||||
CoroutineScope(Dispatchers.IO).async {
|
||||
lama.generateResponse(query = originalQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date())))
|
||||
lama.generateResponse(originalQuery?.replace("오늘","오늘(${SimpleDateFormat("yyyy-MM-dd").format(Date())})"))
|
||||
}
|
||||
return "TEST"
|
||||
}
|
||||
|
||||
@ -16,8 +16,8 @@ class QConfig {
|
||||
}
|
||||
|
||||
class QHnswConfig {
|
||||
var m: Int = 0
|
||||
var ef_construct: Int = 0
|
||||
var m: Long = 0
|
||||
var ef_construct: Long = 0
|
||||
var full_scan_threshold: Int = 0
|
||||
var max_indexing_threads: Int = 0
|
||||
var on_disk: Boolean = false
|
||||
@ -25,20 +25,20 @@ class QHnswConfig {
|
||||
|
||||
class QOptimizerConfig {
|
||||
var deleted_threshold: Double = 0.0
|
||||
var vacuum_min_vector_number: Int = 0
|
||||
var default_segment_number: Int = 0
|
||||
var vacuum_min_vector_number: Long = 0
|
||||
var default_segment_number: Long = 0
|
||||
var max_segment_size: Any? = null
|
||||
var memmap_threshold: Any? = null
|
||||
var indexing_threshold: Int = 0
|
||||
var flush_interval_sec: Int = 0
|
||||
var indexing_threshold: Long = 0
|
||||
var flush_interval_sec: Long = 0
|
||||
var max_optimization_threads: Any? = null
|
||||
}
|
||||
|
||||
class QParams {
|
||||
var vectors: QVectors? = null
|
||||
var shard_number: Int = 0
|
||||
var replication_factor: Int = 0
|
||||
var write_consistency_factor: Int = 0
|
||||
var shard_number: Long = 0
|
||||
var replication_factor: Long = 0
|
||||
var write_consistency_factor: Long = 0
|
||||
var on_disk_payload: Boolean = false
|
||||
}
|
||||
|
||||
@ -47,9 +47,9 @@ class QPayloadSchema
|
||||
class QResult {
|
||||
var status: String? = null
|
||||
var optimizer_status: String? = null
|
||||
var indexed_vectors_count: Int = 0
|
||||
var indexed_vectors_count: Long = 0
|
||||
var points_count: Long = 0
|
||||
var segments_count: Int = 0
|
||||
var segments_count: Long = 0
|
||||
var config: QConfig? = null
|
||||
var payload_schema: QPayloadSchema? = null
|
||||
}
|
||||
@ -61,18 +61,18 @@ class QStrictModeConfig {
|
||||
}
|
||||
|
||||
class QVectors {
|
||||
var size: Int = 0
|
||||
var size: Long = 0
|
||||
var distance: String? = null
|
||||
}
|
||||
|
||||
class QWalConfig {
|
||||
var wal_capacity_mb: Int = 0
|
||||
var wal_segments_ahead: Int = 0
|
||||
var wal_capacity_mb: Long = 0
|
||||
var wal_segments_ahead: Long = 0
|
||||
}
|
||||
|
||||
class QSearchResult {
|
||||
var id: Int = 0
|
||||
var version: Int = 0
|
||||
var id: Long = 0
|
||||
var version: Long = 0
|
||||
var score: Double = 0.0
|
||||
}
|
||||
|
||||
@ -103,7 +103,7 @@ class QContentsPayload {
|
||||
}
|
||||
|
||||
class QContentsResult {
|
||||
var id: Int = 0
|
||||
var id: Long = 0
|
||||
var payload: QContentsPayload? = null
|
||||
}
|
||||
|
||||
|
||||
@ -3,34 +3,40 @@ package kr.lunaticbum.back.lun.service
|
||||
|
||||
|
||||
import com.google.gson.Gson
|
||||
import com.google.gson.annotations.SerializedName
|
||||
import com.google.gson.JsonElement
|
||||
import com.google.gson.JsonObject
|
||||
import com.google.gson.JsonParser
|
||||
import io.micrometer.observation.ObservationRegistry
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.async
|
||||
import kotlinx.coroutines.launch
|
||||
import kr.lunaticbum.back.lun.configs.GlobalEnvironment
|
||||
import kr.lunaticbum.back.lun.controllers.TelegramSendMsg
|
||||
import kr.lunaticbum.back.lun.model.*
|
||||
import kr.lunaticbum.back.lun.utils.RssFeedsParser
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.select.Elements
|
||||
import org.openqa.selenium.By
|
||||
import org.openqa.selenium.WebDriver
|
||||
import org.openqa.selenium.chrome.ChromeOptions
|
||||
import org.openqa.selenium.remote.RemoteWebDriver
|
||||
import org.springframework.ai.embedding.EmbeddingRequest
|
||||
import org.springframework.ai.ollama.OllamaEmbeddingModel
|
||||
import org.springframework.ai.ollama.api.OllamaApi
|
||||
import org.springframework.ai.ollama.api.OllamaOptions
|
||||
import org.springframework.ai.ollama.management.ModelManagementOptions
|
||||
import org.springframework.beans.factory.annotation.Autowired
|
||||
import org.springframework.beans.factory.annotation.Qualifier
|
||||
import org.springframework.http.MediaType
|
||||
import org.springframework.scheduling.annotation.Async
|
||||
import org.springframework.stereotype.Service
|
||||
import org.springframework.web.reactive.function.BodyInserters
|
||||
import org.springframework.web.reactive.function.client.WebClient
|
||||
import reactor.kotlin.core.publisher.toMono
|
||||
import java.net.URL
|
||||
import java.net.URLEncoder
|
||||
import java.text.SimpleDateFormat
|
||||
import java.time.Duration
|
||||
import java.util.*
|
||||
import kotlin.collections.ArrayList
|
||||
|
||||
|
||||
@Service
|
||||
@ -46,7 +52,7 @@ class Lama {
|
||||
data class QPut(val points : ArrayList<QData>)
|
||||
data class QData(val id : Long, val vector : FloatArray, val payload : SearXngResult)
|
||||
|
||||
data class QContentsList(var ids : ArrayList<Int> = ArrayList(), var with_payload : Boolean = true, var with_vector : Boolean = false)
|
||||
data class QContentsList(var ids : ArrayList<Long> = ArrayList(), var with_payload : Boolean = true, var with_vector : Boolean = false)
|
||||
// fun makeCollection() : String{
|
||||
//
|
||||
// class CollectionPut {
|
||||
@ -76,51 +82,51 @@ class Lama {
|
||||
|
||||
fun jsopFilter(url : String) : String {
|
||||
val joinString = "\n#"
|
||||
var lastElement : Elements = Elements()
|
||||
var lastElements : Elements = Elements()
|
||||
var body = Jsoup.connect(url).timeout(30000).get().body()
|
||||
var elements : Elements? = null
|
||||
if (url.contains("nate.com", true)) {
|
||||
if (url.contains("view", true)) {
|
||||
elements = body.select("[class*=articleView]")
|
||||
}else {
|
||||
elements = body.select("[class*=postRankSubjectList]")
|
||||
}
|
||||
} else if (url.contains("newsis.com/view", true)) {
|
||||
elements = body.select("[class*=articleView]")
|
||||
} else if (url.contains("blog.naver.com", true)) {
|
||||
elements = body.select("[class*=se-viewer]")
|
||||
} else if (url.contains("bbc.com/korean/articles", true)) {
|
||||
elements = body.select("main[role$=main]")
|
||||
} else if (url.contains("chosun.com/client", true)) {
|
||||
elements = body.select("[class*=articleBody]")
|
||||
} else if (url.contains("nocutnews.co.kr/news", true)) {
|
||||
elements = body.select("[class*=container]")
|
||||
} else if (url.contains("hani.co.kr/arti/", true)) {
|
||||
elements = body.select("[class*=ArticleDetail]")
|
||||
} else if (url.contains("yna.co.kr/view", true)) {
|
||||
elements = body.select("[class*=container]")
|
||||
} else if (url.contains("newspim.com/news", true)) {
|
||||
elements = body.select("[class*=container]")
|
||||
} else {
|
||||
|
||||
}
|
||||
if (elements?.size ?: 0 > 0) {
|
||||
elements?.forEach {
|
||||
lastElement.add(it)
|
||||
}
|
||||
}
|
||||
|
||||
if (lastElement.size < 1) {
|
||||
arrayOf("container","article","main","viewer","content").forEach {
|
||||
var result = Elements()
|
||||
result.addAll(body.select("[class*=$it]"))
|
||||
result.addAll(body.select("[id*=$it]"))
|
||||
result.addAll(body.select(it))
|
||||
result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElement.add(it) } }
|
||||
}
|
||||
}
|
||||
return if (lastElement.size > 0) {
|
||||
lastElement.eachText().joinToString(joinString)
|
||||
// var elements : Elements? = null
|
||||
// if (url.contains("nate.com", true)) {
|
||||
// if (url.contains("view", true)) {
|
||||
// elements = body.select("[class*=articleView]")
|
||||
// }else {
|
||||
// elements = body.select("[class*=postRankSubjectList]")
|
||||
// }
|
||||
// } else if (url.contains("newsis.com/view", true)) {
|
||||
// elements = body.select("[class*=articleView]")
|
||||
// } else if (url.contains("blog.naver.com", true)) {
|
||||
// elements = body.select("[class*=se-viewer]")
|
||||
// } else if (url.contains("bbc.com/korean/articles", true)) {
|
||||
// elements = body.select("main[role$=main]")
|
||||
// } else if (url.contains("chosun.com/client", true)) {
|
||||
// elements = body.select("[class*=articleBody]")
|
||||
// } else if (url.contains("nocutnews.co.kr/news", true)) {
|
||||
// elements = body.select("[class*=container]")
|
||||
// } else if (url.contains("hani.co.kr/arti/", true)) {
|
||||
// elements = body.select("[class*=ArticleDetail]")
|
||||
// } else if (url.contains("yna.co.kr/view", true)) {
|
||||
// elements = body.select("[class*=container]")
|
||||
// } else if (url.contains("newspim.com/news", true)) {
|
||||
// elements = body.select("[class*=container]")
|
||||
// } else {
|
||||
//
|
||||
// }
|
||||
// if (elements?.size ?: 0 > 0) {
|
||||
// elements?.forEach {
|
||||
// lastElements.add(it)
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (lastElements.size < 1) {
|
||||
// arrayOf("container","article","main","viewer","content").forEach {
|
||||
// var result = Elements()
|
||||
// result.addAll(body.select("[class*=$it]"))
|
||||
// result.addAll(body.select("[id*=$it]"))
|
||||
// result.addAll(body.select(it))
|
||||
// result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElements.add(it) } }
|
||||
// }
|
||||
// }
|
||||
return if (lastElements.size > 0) {
|
||||
lastElements.eachText().joinToString(joinString)
|
||||
} else {
|
||||
body.children().eachText().joinToString(joinString)
|
||||
}
|
||||
@ -140,33 +146,145 @@ class Lama {
|
||||
|
||||
val embedimgModelEeve ="lancard/korean-yanolja-eeve"
|
||||
val embedimgModelBgeM3 = "bge-m3"
|
||||
val currentEmbedimg = embedimgModelEeve
|
||||
val currentEmbedimg = embedimgModelBgeM3
|
||||
|
||||
val llmPhi4 = "phi4:14b"
|
||||
val llmGemma3 = "gemma3:12b"
|
||||
|
||||
val llmGemma3 = "gemma3:4b"
|
||||
val llmPhi4Mini = "phi4-mini"
|
||||
val llmDolphin3 = "dolphin3"
|
||||
|
||||
|
||||
|
||||
|
||||
val currentLLM = llmDolphin3
|
||||
val currentLLM = llmGemma3
|
||||
fun getGoogleSearch(query:String){
|
||||
Jsoup.connect("https://www.google.com/search?q=".plus(query)).timeout(30000).get().select("a[href]").forEach { }
|
||||
}
|
||||
|
||||
fun addDocuments(query : String , refinedQuery: RefinedQuery?) {
|
||||
val waitTime = 1000L
|
||||
val topCount = 2
|
||||
|
||||
@Async
|
||||
suspend fun addDocuments(query : String , refinedQuery: RefinedQuery?) {
|
||||
var querys : ArrayList<String> = ArrayList()
|
||||
querys.add(query)
|
||||
|
||||
refinedQuery?.ko_query?.let { querys.add(it) }
|
||||
refinedQuery?.en_query?.let { querys.add(it) }
|
||||
refinedQuery?.keywords?.let { querys.add(it.joinToString { " " })}
|
||||
val readedUrls = ArrayList<String>()
|
||||
|
||||
try {
|
||||
var options : ChromeOptions = ChromeOptions();
|
||||
options.addArguments("--disable-popup-blocking");
|
||||
options.addArguments("--disable-default-apps");
|
||||
options.addArguments("--disable-notifications");
|
||||
options.addArguments("--disable-blink-features=AutomationControlled");
|
||||
val targetUrls = hashSetOf<String>()
|
||||
RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver ->
|
||||
querys.forEach { refinedQuery->
|
||||
var findCount = 0
|
||||
try {
|
||||
driver.get("https://www.google.com/search?q=$refinedQuery");
|
||||
Thread.sleep(waitTime)
|
||||
println(driver.currentUrl)
|
||||
driver.findElement(By.ByTagName("Body"))?.let { webElement ->
|
||||
Jsoup.parse(driver.pageSource).select("[href*=https]").forEach {
|
||||
var href = it.attr("href")
|
||||
if (href?.length ?: 0 > 5 && href.startsWith("https://") && findCount < topCount && href.contains("google") == false && href.contains("youtube") == false) {
|
||||
targetUrls.add(href)
|
||||
println("add targetUrls $href")
|
||||
findCount += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}catch (e:Exception){
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
driver.close()
|
||||
driver.quit()
|
||||
}
|
||||
options = ChromeOptions();
|
||||
options.addArguments("--disable-popup-blocking");
|
||||
options.addArguments("--disable-default-apps");
|
||||
options.addArguments("--disable-notifications");
|
||||
options.addArguments("--disable-blink-features=AutomationControlled");
|
||||
RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver ->
|
||||
targetUrls.forEach { url ->
|
||||
var result = SearXngResult()
|
||||
if (url?.length ?: 0 > 5 && url?.startsWith("https://") == true && readedUrls.contains(url) == false) {
|
||||
readedUrls.add(url!!)
|
||||
result.url = url!!
|
||||
result.originQuery = query
|
||||
try {
|
||||
driver.get(url);
|
||||
Thread.sleep(waitTime)
|
||||
driver.findElement(By.ByTagName("Body"))?.let { webElement ->
|
||||
if(webElement.text.length > 120) {
|
||||
println(driver.currentUrl)
|
||||
println(webElement.text)
|
||||
result.title = driver.title
|
||||
result.originHtml = webElement.text
|
||||
webPageSummarize(result, webElement.text)
|
||||
}
|
||||
}
|
||||
|
||||
} catch (e: Exception) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
driver.close();
|
||||
driver.quit()
|
||||
}
|
||||
options = ChromeOptions();
|
||||
options.addArguments("--disable-popup-blocking");
|
||||
options.addArguments("--disable-default-apps");
|
||||
options.addArguments("--disable-notifications");
|
||||
options.addArguments("--disable-blink-features=AutomationControlled");
|
||||
RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver ->
|
||||
querys.forEach { refinedQuery ->
|
||||
CoroutineScope(Dispatchers.IO).launch {
|
||||
val gSearch = "https://psn.lunaticbum.kr/search?q=${
|
||||
refinedQuery?.replace(
|
||||
"오늘",
|
||||
SimpleDateFormat("yyyMMdd").format(Date())
|
||||
)
|
||||
}&language=ko&time_range=month&safesearch=0&categories=general&format=json"
|
||||
var googleSCount = 0
|
||||
RssFeedsParser().readFeed("https://news.google.com/rss/search?q=${URLEncoder.encode(query)}=ko&gl=KR&ceid=KR%3Ako/")?.messages?.forEach {
|
||||
var url: String? = it.link
|
||||
var result = SearXngResult()
|
||||
println("url >>>> $url")
|
||||
if (url?.length ?: 0 > 5 && url?.startsWith("https://") == true && readedUrls.contains(url) == false && googleSCount < topCount) {
|
||||
readedUrls.add(url!!)
|
||||
result.url = url!!
|
||||
result.originQuery = query
|
||||
result.refinedQuery = refinedQuery
|
||||
result.title = it.title
|
||||
println(result.title)
|
||||
try {
|
||||
driver.get(url);
|
||||
Thread.sleep(waitTime)
|
||||
println(driver.currentUrl)
|
||||
driver.findElement(By.ByTagName("Body"))?.let { webElement ->
|
||||
println(driver.currentUrl)
|
||||
println(webElement.text)
|
||||
result.title = driver.title
|
||||
result.originHtml = webElement.text
|
||||
webPageSummarize(result, webElement.text)
|
||||
googleSCount += 1
|
||||
}
|
||||
|
||||
} catch (e: Exception) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
driver.close()
|
||||
driver.quit()
|
||||
}
|
||||
|
||||
} catch (e:Exception){e.printStackTrace()}
|
||||
|
||||
querys.forEach { refinedQuery ->
|
||||
val gSearch = "https://psn.lunaticbum.kr/search?q=${refinedQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=ko&time_range=month&safesearch=0&categories=general&format=json"
|
||||
println("gSearch >>> ${gSearch}")
|
||||
WebClient.create().get()
|
||||
.uri(gSearch)
|
||||
@ -190,37 +308,25 @@ class Lama {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
println("end of search")
|
||||
}
|
||||
}
|
||||
|
||||
var format = "원문:\n'%s'\n원문의 웹 페이지 소스는 '%s'이 질문에 대해 연관 결과로 받은 내용이야. 해당 정보를 파악해서 본문 내용을 최대한 자세히 알려줘 'query:{질문},contents:{본문내용 한국어},summary:{100자 이하로 요약 한국어},keywords:[키워드],related_links:[링크],relatedness_score:{0.0~10.0}'이 형식의 결과만들어줘"
|
||||
var format = "context:'%s'\ncontext는 웹 페이지 문자를 가져온 것 '%s'이 질문에 대해 연관 결과로 받은 내용임. 해당 context 정리 해서 본문 내용을 최대한 자세히 알려줘\n'{query:질문 내용, contents_ko:자세한 내용 한국어 , summary_ko:요약된 내용 한국어, keywords:[키워드], related_links:[{link,description}}], relatedness_score:0.0~10.0}'\n이 형식의 결과로 만들어 줘"
|
||||
internal fun makeSummarizeRequestMsg(it : SearXngResult) : String= format.format(it.originHtml,it.originQuery)
|
||||
|
||||
internal fun makeCahtReq(reqMsg:String) = OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(reqMsg).build())).build()
|
||||
internal fun makeCahtReq(reqMsg:String) = OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(reqMsg.chunked(100).map { println(it); OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(it).build()}.toList()).build()
|
||||
|
||||
@Async
|
||||
fun webPageSummarize(it : SearXngResult , text : String) {
|
||||
try {
|
||||
infomationDic.get(it.originQuery)!!.put(it.url!!, text)
|
||||
val chatClient = OllamaApi("https://lama.lunaticbum.kr")
|
||||
val embeddingModel = OllamaEmbeddingModel(
|
||||
chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults())
|
||||
println("text >>>>> ${text?.chunked(50)?.first() ?: ""}")
|
||||
var dispoable = chatClient.chat(makeCahtReq(makeSummarizeRequestMsg(it))).toMono().subscribe({aiResponce ->
|
||||
it.pageData = aiResponce.message.content
|
||||
println("summary result >>>>> ${it.pageData}")
|
||||
val embeddingResponse = embeddingModel.call(
|
||||
EmbeddingRequest(
|
||||
listOf(aiResponce.message.content),
|
||||
OllamaOptions.builder()
|
||||
.model(currentEmbedimg)
|
||||
.truncate(false).build()
|
||||
)
|
||||
)
|
||||
infomationDic.put(it.url!!,aiResponce.message.content)
|
||||
val embeddingModel = OllamaEmbeddingModel(chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults())
|
||||
val embeddingResponse = embeddingModel.call(EmbeddingRequest(text.chunked(400).toList(), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build()))
|
||||
it.originHtml = text
|
||||
val sdss = QPut(arrayListOf())
|
||||
sdss.points.add(QData(id = System.currentTimeMillis(),embeddingResponse.result.output,it))
|
||||
sdss.points.add(QData(id = System.currentTimeMillis(), embeddingResponse.result.output, it))
|
||||
if (sdss.points.size > 0) {
|
||||
val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
||||
val client = WebClient.create()
|
||||
@ -230,14 +336,10 @@ class Lama {
|
||||
.body(BodyInserters.fromValue(Gson().toJson(sdss)))
|
||||
.retrieve()
|
||||
.bodyToMono(String::class.java).timeout(Duration.ofMinutes(20L)).subscribe(
|
||||
{resultString -> },{error-> error.printStackTrace()}
|
||||
{ resultString -> }, { error -> error.printStackTrace() }
|
||||
)
|
||||
}
|
||||
},{err->
|
||||
err.printStackTrace()
|
||||
})
|
||||
}catch (e : Exception) {
|
||||
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
@ -281,10 +383,7 @@ class Lama {
|
||||
println(Gson().toJson(lists))
|
||||
return if (lists?.result?.size ?: 0 > 0) {
|
||||
val qContents = QContentsList()
|
||||
lists?.result?.forEach {
|
||||
|
||||
qContents.ids.add(it.id)
|
||||
}
|
||||
lists?.result?.filter { it.score > 8.0 }?.forEach { qContents.ids.add(it.id) }
|
||||
val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
||||
val client2 = WebClient.create()
|
||||
client2.post()
|
||||
@ -301,44 +400,44 @@ class Lama {
|
||||
@Autowired
|
||||
lateinit var globalEvv : GlobalEnvironment
|
||||
|
||||
var infomationDic = hashMapOf<String,String>()
|
||||
var infomationDic = hashMapOf<String,HashMap<String,String>>()
|
||||
suspend fun generateResponse(query: String?, targetId: String? = globalEvv.telegramMyId) {
|
||||
infomationDic.clear()
|
||||
|
||||
val chatClient = OllamaApi("https://lama.lunaticbum.kr")
|
||||
val embeddingModel = OllamaEmbeddingModel(
|
||||
chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults())
|
||||
println("On generateResponse :: find something ${query}")
|
||||
|
||||
query?.let { originalQuery ->
|
||||
infomationDic.put(query!!, hashMapOf())
|
||||
var embeddingResponse = embeddingModel.call(EmbeddingRequest(listOf(originalQuery), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build()))
|
||||
addDocuments(originalQuery, querySummarize(originalQuery))
|
||||
println("points size ${embeddingResponse.result.output.size}")
|
||||
var context : String? = ""
|
||||
var context : StringBuffer = StringBuffer()
|
||||
try {
|
||||
embedQuery(embeddingResponse.result.output)?.result?.forEach { result ->
|
||||
if (infomationDic.contains(result.payload?.url ?: "NONE") == false) {
|
||||
context += "\n# :".plus(if (result.payload?.pageData?.length ?: 0 > 10) {
|
||||
if (infomationDic.get(query!!)!!.contains(result.payload?.url ?: "NONE") == false) {
|
||||
context.append("\n# :".plus(if (result.payload?.pageData?.length ?: 0 > 10) {
|
||||
result.payload?.pageData
|
||||
} else {
|
||||
result.payload?.content
|
||||
})
|
||||
}))
|
||||
}
|
||||
}
|
||||
}catch (e:Exception){
|
||||
e.printStackTrace()
|
||||
}
|
||||
|
||||
infomationDic.iterator().forEach { context += "\n#${it.key}:${it.value}" }
|
||||
infomationDic.get(query!!)!!.iterator().forEach { context.append("\n#${it.key}:${it.value}") }
|
||||
|
||||
val prompt : StringBuffer = StringBuffer().append("참조:\n").append(context).append("\n참조 내용을 고려 해서\n'$query'").append(query).append("\n에 {querys:[],answers:[],keywords:[],links:[]}형식으로 최대한 자세히 대답 해줘 ")
|
||||
val fullUrl = "https://api.telegram.org/${globalEvv.telegramBotKey}/sendMessage"
|
||||
|
||||
val prompt = "참조:\n$context\n참조 내용을 고려해서\n해당 질문:${query}\n에 {질문내용:[한국어],답변내용:[한국어],전체키워드:[],참조링크:[]}형식으로 대답 해줘 ".trimIndent()
|
||||
println(prompt)
|
||||
val response: OllamaApi.ChatResponse = chatClient.chat(OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(
|
||||
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(prompt).build())
|
||||
).build())
|
||||
|
||||
println(response.message.content)
|
||||
prompt.chunked(300).map { println(it); OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(it).build()}.toList()).build())
|
||||
// println(response.message.content)
|
||||
CoroutineScope(Dispatchers.IO).launch {
|
||||
var toalmsg = "${query}의 대답이 도착했어요.\n${response.message.content}"
|
||||
val fullUrl = "https://api.telegram.org/${globalEvv.telegramBotKey}/sendMessage"
|
||||
toalmsg.chunked(512).forEach { chunkedMsg ->
|
||||
println("fullUrl >>> ${fullUrl}")
|
||||
(targetId ?: globalEvv.telegramMyId)?.let {
|
||||
@ -354,6 +453,7 @@ class Lama {
|
||||
}
|
||||
}
|
||||
}
|
||||
infomationDic.remove(query!!)
|
||||
}
|
||||
println("On generateResponse :: END OF Answer")
|
||||
}
|
||||
|
||||
151
src/main/kotlin/kr/lunaticbum/back/lun/utils/RssFeedsParser.kt
Normal file
151
src/main/kotlin/kr/lunaticbum/back/lun/utils/RssFeedsParser.kt
Normal file
@ -0,0 +1,151 @@
|
||||
package kr.lunaticbum.back.lun.utils
|
||||
|
||||
import java.io.IOException
|
||||
import java.io.InputStream
|
||||
import java.net.MalformedURLException
|
||||
import java.net.URL
|
||||
import javax.xml.stream.XMLEventReader
|
||||
import javax.xml.stream.XMLInputFactory
|
||||
import javax.xml.stream.XMLStreamException
|
||||
import javax.xml.stream.events.Characters
|
||||
import javax.xml.stream.events.XMLEvent
|
||||
|
||||
|
||||
class FeedMessage {
|
||||
var title: String? = null
|
||||
var description: String? = null
|
||||
var link: String? = null
|
||||
var author: String? = null
|
||||
var guid: String? = null
|
||||
|
||||
override fun toString(): String {
|
||||
return ("FeedMessage [title=" + title + ", description=" + description
|
||||
+ ", link=" + link + ", author=" + author + ", guid=" + guid
|
||||
+ "]")
|
||||
}
|
||||
}
|
||||
class Feed(
|
||||
val title: String, val link: String, val description: String, val language: String,
|
||||
val copyright: String, val pubDate: String
|
||||
) {
|
||||
val messages: ArrayList<FeedMessage> = ArrayList()
|
||||
|
||||
override fun toString(): String {
|
||||
return ("Feed [copyright=" + copyright + ", description=" + description
|
||||
+ ", language=" + language + ", link=" + link + ", pubDate="
|
||||
+ pubDate + ", title=" + title + "]")
|
||||
}
|
||||
}
|
||||
class RssFeedsParser {
|
||||
|
||||
val TITLE: String = "title"
|
||||
val DESCRIPTION: String = "description"
|
||||
val CHANNEL: String = "channel"
|
||||
val LANGUAGE: String = "language"
|
||||
val COPYRIGHT: String = "copyright"
|
||||
val LINK: String = "link"
|
||||
val AUTHOR: String = "author"
|
||||
val ITEM: String = "item"
|
||||
val PUB_DATE: String = "pubDate"
|
||||
val GUID: String = "guid"
|
||||
|
||||
var url: URL? = null
|
||||
|
||||
// fun parser(feedUrl: String?) {
|
||||
// try {
|
||||
// this.url = URL(feedUrl)
|
||||
// } catch (e: MalformedURLException) {
|
||||
// throw RuntimeException(e)
|
||||
// }
|
||||
// }
|
||||
|
||||
fun readFeed(feedUrl: String?): Feed? {
|
||||
try {
|
||||
this.url = URL(feedUrl)
|
||||
} catch (e: MalformedURLException) {
|
||||
throw RuntimeException(e)
|
||||
}
|
||||
var feed: Feed? = null
|
||||
try {
|
||||
var isFeedHeader = true
|
||||
// Set header values intial to the empty string
|
||||
var description = ""
|
||||
var title = ""
|
||||
var link = ""
|
||||
var language = ""
|
||||
var copyright = ""
|
||||
var author = ""
|
||||
var pubdate = ""
|
||||
var guid = ""
|
||||
|
||||
// First create a new XMLInputFactory
|
||||
val inputFactory = XMLInputFactory.newInstance()
|
||||
// create a new eventReader
|
||||
val `in` = read()
|
||||
val eventReader = inputFactory.createXMLEventReader(`in`)
|
||||
// read the XML document
|
||||
while (eventReader.hasNext()) {
|
||||
var event = eventReader.nextEvent()
|
||||
if (event.isStartElement) {
|
||||
val localPart = event.asStartElement().name
|
||||
.localPart
|
||||
when (localPart) {
|
||||
ITEM -> {
|
||||
if (isFeedHeader) {
|
||||
isFeedHeader = false
|
||||
feed = Feed(
|
||||
title!!, link!!, description!!, language!!,
|
||||
copyright!!, pubdate!!
|
||||
)
|
||||
}
|
||||
event = eventReader.nextEvent()
|
||||
}
|
||||
|
||||
TITLE -> title = getCharacterData(event, eventReader)
|
||||
DESCRIPTION -> description = getCharacterData(event, eventReader)
|
||||
LINK -> link = getCharacterData(event, eventReader)
|
||||
GUID -> guid = getCharacterData(event, eventReader)
|
||||
LANGUAGE -> language = getCharacterData(event, eventReader)
|
||||
AUTHOR -> author = getCharacterData(event, eventReader)
|
||||
PUB_DATE -> pubdate = getCharacterData(event, eventReader)
|
||||
COPYRIGHT -> copyright = getCharacterData(event, eventReader)
|
||||
}
|
||||
} else if (event.isEndElement) {
|
||||
if (event.asEndElement().name.localPart === (ITEM)) {
|
||||
val message = FeedMessage()
|
||||
message.author = author
|
||||
message.description = description
|
||||
message.guid = guid
|
||||
message.link = link
|
||||
message.title = title
|
||||
feed!!.messages.add(message)
|
||||
event = eventReader.nextEvent()
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: XMLStreamException) {
|
||||
throw RuntimeException(e)
|
||||
}
|
||||
return feed
|
||||
}
|
||||
|
||||
@Throws(XMLStreamException::class)
|
||||
private fun getCharacterData(event: XMLEvent, eventReader: XMLEventReader): String {
|
||||
var event = event
|
||||
var result = ""
|
||||
event = eventReader.nextEvent()
|
||||
if (event is Characters) {
|
||||
result = event.asCharacters().data
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
private fun read(): InputStream {
|
||||
try {
|
||||
return url!!.openStream()
|
||||
} catch (e: IOException) {
|
||||
throw RuntimeException(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user