This commit is contained in:
lunaticbum 2025-03-07 18:33:09 +09:00
parent a89b4904ea
commit b191022e6d
4 changed files with 101 additions and 31 deletions

View File

@ -39,6 +39,7 @@ class AppConfig : WebMvcConfigurer {
@Bean @Bean
fun chatClient(): OllamaApi { fun chatClient(): OllamaApi {
return OllamaApi("https://lama.lunaticbum.kr") return OllamaApi("https://lama.lunaticbum.kr")
// .withDefaultOptions( // .withDefaultOptions(
// OllamaOptions.create() // OllamaOptions.create()
// .withModel("phi4:14b") // .withModel("phi4:14b")

View File

@ -410,7 +410,7 @@ class Telegram {
// } // }
// } // }
CoroutineScope(Dispatchers.IO).async { CoroutineScope(Dispatchers.IO).async {
lama.generateResponse(query = originalQuery) lama.generateResponse(query = originalQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date())))
} }
return "TEST" return "TEST"
} }

View File

@ -12,7 +12,7 @@ class SearXng {
var unresponsive_engines: ArrayList<ArrayList<String>>? = null var unresponsive_engines: ArrayList<ArrayList<String>>? = null
} }
class SearXngResult { class SearXngResult {
var originQuery : String? = null // var originQuery : String? = null
var url: String? = null var url: String? = null
var title: String? = null var title: String? = null
var content: String? = null var content: String? = null
@ -25,4 +25,5 @@ class SearXngResult {
var score: Double = 0.0 var score: Double = 0.0
var category: String? = null var category: String? = null
var pageData : String? = null var pageData : String? = null
var originHtml : String? = null
} }

View File

@ -3,17 +3,16 @@ package kr.lunaticbum.back.lun.service
import com.google.gson.Gson import com.google.gson.Gson
import com.knuddels.jtokkit.api.IntArrayList import com.google.gson.annotations.SerializedName
import io.micrometer.observation.ObservationRegistry import io.micrometer.observation.ObservationRegistry
import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.launch import kotlinx.coroutines.launch
import kr.lunaticbum.back.lun.configs.GlobalEnvironment import kr.lunaticbum.back.lun.configs.GlobalEnvironment
import kr.lunaticbum.back.lun.controllers.BumlamaResp
import kr.lunaticbum.back.lun.controllers.TelegramSendMsg import kr.lunaticbum.back.lun.controllers.TelegramSendMsg
import kr.lunaticbum.back.lun.controllers.lamaGenerated
import kr.lunaticbum.back.lun.model.* import kr.lunaticbum.back.lun.model.*
import org.jsoup.Jsoup import org.jsoup.Jsoup
import org.jsoup.select.Elements
import org.springframework.ai.embedding.EmbeddingRequest import org.springframework.ai.embedding.EmbeddingRequest
import org.springframework.ai.ollama.OllamaEmbeddingModel import org.springframework.ai.ollama.OllamaEmbeddingModel
import org.springframework.ai.ollama.api.OllamaApi import org.springframework.ai.ollama.api.OllamaApi
@ -25,11 +24,10 @@ import org.springframework.http.MediaType
import org.springframework.stereotype.Service import org.springframework.stereotype.Service
import org.springframework.web.reactive.function.BodyInserters import org.springframework.web.reactive.function.BodyInserters
import org.springframework.web.reactive.function.client.WebClient import org.springframework.web.reactive.function.client.WebClient
import java.net.URLEncoder import reactor.kotlin.core.publisher.toMono
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.time.Duration import java.time.Duration
import java.util.* import java.util.*
import kotlin.collections.ArrayList
@Service @Service
@ -74,6 +72,71 @@ class Lama {
.retrieve() .retrieve()
.bodyToMono(QCollection::class.java).timeout(Duration.ofMinutes(20L)).block()?.result?.points_count ?: 0L .bodyToMono(QCollection::class.java).timeout(Duration.ofMinutes(20L)).block()?.result?.points_count ?: 0L
} }
fun jsopFilter(url : String) : String {
val joinString = "\n#"
var lastElement : Elements = Elements()
var body = Jsoup.connect(url).timeout(30000).get().body()
var elements : Elements? = null
if (url.contains("nate.com", true)) {
if (url.contains("view", true)) {
elements = body.select("[class*=articleView]")
}else {
elements = body.select("[class*=postRankSubjectList]")
}
} else if (url.contains("newsis.com/view", true)) {
elements = body.select("[class*=articleView]")
} else if (url.contains("blog.naver.com", true)) {
elements = body.select("[class*=se-viewer]")
} else if (url.contains("bbc.com/korean/articles", true)) {
elements = body.select("main[role$=main]")
} else if (url.contains("chosun.com/client", true)) {
elements = body.select("[class*=articleBody]")
} else if (url.contains("nocutnews.co.kr/news", true)) {
elements = body.select("[class*=container]")
} else if (url.contains("hani.co.kr/arti/", true)) {
elements = body.select("[class*=ArticleDetail]")
} else if (url.contains("yna.co.kr/view", true)) {
elements = body.select("[class*=container]")
} else if (url.contains("newspim.com/news", true)) {
elements = body.select("[class*=container]")
} else {
}
if (elements?.size ?: 0 > 0) {
elements?.forEach {
lastElement.add(it)
}
}
if (lastElement.size < 1) {
arrayOf("container","article","main","viewer","content").forEach {
var result = Elements()
result.addAll(body.select("[class*=$it]"))
result.addAll(body.select("[id*=$it]"))
result.addAll(body.select(it))
result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElement.add(it) } }
}
}
return if (lastElement.size > 0) {
lastElement.map { it.children().eachText() }.joinToString(joinString)
} else {
body.children().map { it.children().eachText() }.joinToString(joinString)
}
}
// class WebScrap {
// @SerializedName("query", alternate = ["question"])
// var query: String? = null
// var original_html: String? = null
// var original_content: String? = null
// var summary: String? = null
// var keywords: ArrayList<String>? = null
// var related_links: ArrayList<String>? = null
// var relatedness_score: Double = 0.0
// }
private fun addDocuments(query : String) { private fun addDocuments(query : String) {
val embeddingModel = OllamaEmbeddingModel( val embeddingModel = OllamaEmbeddingModel(
@ -82,45 +145,49 @@ class Lama {
ObservationRegistry.create(), ObservationRegistry.create(),
ModelManagementOptions.defaults() ModelManagementOptions.defaults()
) )
val gSearch = "https://psn.lunaticbum.kr/search?q=${query?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=auto&time_range=month&safesearch=0&categories=general&format=json" val gSearch = "https://psn.lunaticbum.kr/search?q=${query?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=ko&time_range=month&safesearch=0&categories=general&format=json"
println("gSearch >>> ${gSearch}") println("gSearch >>> ${gSearch}")
val sdss = QPut(arrayListOf()) val sdss = QPut(arrayListOf())
WebClient.create().get() WebClient.create().get()
.uri(gSearch) .uri(gSearch)
.retrieve() .retrieve()
.bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult -> .bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult ->
gsResult.results?.filter { it.score > 0.5}?.forEach { gsResult.results?.filter { it.score > 0.3}?.forEach {
qPointsCount += 1 qPointsCount += 1
println("in filter") println("in filter ${it.url}")
it.originQuery = query // it.originQuery = query
val data = Gson().toJson(it) val data = Gson().toJson(it)
println(it.title) println(it.title)
Jsoup.connect(it.url).get().html().let { text -> jsopFilter(it.url!!).let { text ->
try { try {
println("text >>>>> $text") println("text >>>>> ${text?.chunked(50)?.first() ?: ""}")
it.pageData = chatClient.chat(OllamaApi.ChatRequest.Builder("phi4:14b").stream(false).format("json").messages( var dispoable = chatClient.chat(OllamaApi.ChatRequest.Builder("phi4:14b").stream(false).format("json").messages(
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content("'${text}' 웹 페이지 모든 내욜을 복사 한건데 본문 내용만 정리해줘").build()) listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content("원문:\n'${text}'\n원문의 웹 페이지 소스는 '$query'이 질문에 대해 연관 결과로 받은 내용이야. 해당 정보를 파악해서 'query:{질문},contents:{본문내용},summary:{요약},keywords:[키워드],related_links:[링크],relatedness_score:{0.0~10.0}'이 형식의 결과만들어줘 내용은 한국어로 부탁할께").build())
).build()).message.content ).build()).toMono().subscribe({aiResponce ->
it.pageData = aiResponce.message.content
// println(aiResponce)
println("summary result >>>>> ${it.pageData}") println("summary result >>>>> ${it.pageData}")
// it.originHtml = text
val embeddingResponse = embeddingModel.call( val embeddingResponse = embeddingModel.call(
EmbeddingRequest( EmbeddingRequest(
listOf(data), listOf(data),
OllamaOptions.builder() OllamaOptions.builder()
.model("nomic-embed-text") .model("nomic-embed-text")
.truncate(false) .truncate(false).build()
.build()
) )
) )
sdss.points.add(QData(id = qPointsCount,embeddingResponse.result.output,it)) sdss.points.add(QData(id = qPointsCount,embeddingResponse.result.output,it))
},{err->
err.printStackTrace()
})
}catch (e : Exception) { }catch (e : Exception) {
e.printStackTrace()
} }
} }
} }
} }
println("out filter") println("out filter")
if (sdss.points.size > 0) { if (sdss.points.size > 0) {
println("sdss.points.size ${sdss.points.size} ${Gson().toJson(sdss)}")
val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points") val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
val client = WebClient.create() val client = WebClient.create()
client.put() client.put()
@ -140,18 +207,19 @@ class Lama {
var lists = client.post() var lists = client.post()
.uri(qUrl) .uri(qUrl)
.header("api-key","blama-admin-key-gb") .header("api-key","blama-admin-key-gb")
.body(BodyInserters.fromValue(Gson().toJson(QSearchData(embedFlots,5)))) .body(BodyInserters.fromValue(Gson().toJson(QSearchData(embedFlots,3))))
.retrieve() .retrieve()
.bodyToMono(QSearch::class.java).timeout(Duration.ofMinutes(20L)).block() .bodyToMono(QSearch::class.java).timeout(Duration.ofMinutes(20L)).block()
println(Gson().toJson(lists))
return if (lists?.result?.size ?: 0 > 0) { return if (lists?.result?.size ?: 0 > 0) {
val qContents = QContentsList() val qContents = QContentsList()
lists?.result?.forEach { lists?.result?.forEach {
qContents.ids.add(it.id) qContents.ids.add(it.id)
} }
val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points") val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
val client2 = WebClient.create() val client2 = WebClient.create()
client.post() client2.post()
.uri(qCUrl) .uri(qCUrl)
.header("api-key", "blama-admin-key-gb") .header("api-key", "blama-admin-key-gb")
.body(BodyInserters.fromValue(Gson().toJson(qContents))) .body(BodyInserters.fromValue(Gson().toJson(qContents)))