This commit is contained in:
lunaticbum 2025-03-07 18:33:09 +09:00
parent a89b4904ea
commit b191022e6d
4 changed files with 101 additions and 31 deletions

View File

@ -39,6 +39,7 @@ class AppConfig : WebMvcConfigurer {
@Bean
fun chatClient(): OllamaApi {
return OllamaApi("https://lama.lunaticbum.kr")
// .withDefaultOptions(
// OllamaOptions.create()
// .withModel("phi4:14b")

View File

@ -410,7 +410,7 @@ class Telegram {
// }
// }
CoroutineScope(Dispatchers.IO).async {
lama.generateResponse(query = originalQuery)
lama.generateResponse(query = originalQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date())))
}
return "TEST"
}

View File

@ -12,7 +12,7 @@ class SearXng {
var unresponsive_engines: ArrayList<ArrayList<String>>? = null
}
class SearXngResult {
var originQuery : String? = null
// var originQuery : String? = null
var url: String? = null
var title: String? = null
var content: String? = null
@ -25,4 +25,5 @@ class SearXngResult {
var score: Double = 0.0
var category: String? = null
var pageData : String? = null
var originHtml : String? = null
}

View File

@ -3,17 +3,16 @@ package kr.lunaticbum.back.lun.service
import com.google.gson.Gson
import com.knuddels.jtokkit.api.IntArrayList
import com.google.gson.annotations.SerializedName
import io.micrometer.observation.ObservationRegistry
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.launch
import kr.lunaticbum.back.lun.configs.GlobalEnvironment
import kr.lunaticbum.back.lun.controllers.BumlamaResp
import kr.lunaticbum.back.lun.controllers.TelegramSendMsg
import kr.lunaticbum.back.lun.controllers.lamaGenerated
import kr.lunaticbum.back.lun.model.*
import org.jsoup.Jsoup
import org.jsoup.select.Elements
import org.springframework.ai.embedding.EmbeddingRequest
import org.springframework.ai.ollama.OllamaEmbeddingModel
import org.springframework.ai.ollama.api.OllamaApi
@ -25,11 +24,10 @@ import org.springframework.http.MediaType
import org.springframework.stereotype.Service
import org.springframework.web.reactive.function.BodyInserters
import org.springframework.web.reactive.function.client.WebClient
import java.net.URLEncoder
import reactor.kotlin.core.publisher.toMono
import java.text.SimpleDateFormat
import java.time.Duration
import java.util.*
import kotlin.collections.ArrayList
@Service
@ -74,6 +72,71 @@ class Lama {
.retrieve()
.bodyToMono(QCollection::class.java).timeout(Duration.ofMinutes(20L)).block()?.result?.points_count ?: 0L
}
fun jsopFilter(url : String) : String {
val joinString = "\n#"
var lastElement : Elements = Elements()
var body = Jsoup.connect(url).timeout(30000).get().body()
var elements : Elements? = null
if (url.contains("nate.com", true)) {
if (url.contains("view", true)) {
elements = body.select("[class*=articleView]")
}else {
elements = body.select("[class*=postRankSubjectList]")
}
} else if (url.contains("newsis.com/view", true)) {
elements = body.select("[class*=articleView]")
} else if (url.contains("blog.naver.com", true)) {
elements = body.select("[class*=se-viewer]")
} else if (url.contains("bbc.com/korean/articles", true)) {
elements = body.select("main[role$=main]")
} else if (url.contains("chosun.com/client", true)) {
elements = body.select("[class*=articleBody]")
} else if (url.contains("nocutnews.co.kr/news", true)) {
elements = body.select("[class*=container]")
} else if (url.contains("hani.co.kr/arti/", true)) {
elements = body.select("[class*=ArticleDetail]")
} else if (url.contains("yna.co.kr/view", true)) {
elements = body.select("[class*=container]")
} else if (url.contains("newspim.com/news", true)) {
elements = body.select("[class*=container]")
} else {
}
if (elements?.size ?: 0 > 0) {
elements?.forEach {
lastElement.add(it)
}
}
if (lastElement.size < 1) {
arrayOf("container","article","main","viewer","content").forEach {
var result = Elements()
result.addAll(body.select("[class*=$it]"))
result.addAll(body.select("[id*=$it]"))
result.addAll(body.select(it))
result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElement.add(it) } }
}
}
return if (lastElement.size > 0) {
lastElement.map { it.children().eachText() }.joinToString(joinString)
} else {
body.children().map { it.children().eachText() }.joinToString(joinString)
}
}
// class WebScrap {
// @SerializedName("query", alternate = ["question"])
// var query: String? = null
// var original_html: String? = null
// var original_content: String? = null
// var summary: String? = null
// var keywords: ArrayList<String>? = null
// var related_links: ArrayList<String>? = null
// var relatedness_score: Double = 0.0
// }
private fun addDocuments(query : String) {
val embeddingModel = OllamaEmbeddingModel(
@ -82,45 +145,49 @@ class Lama {
ObservationRegistry.create(),
ModelManagementOptions.defaults()
)
val gSearch = "https://psn.lunaticbum.kr/search?q=${query?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=auto&time_range=month&safesearch=0&categories=general&format=json"
val gSearch = "https://psn.lunaticbum.kr/search?q=${query?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=ko&time_range=month&safesearch=0&categories=general&format=json"
println("gSearch >>> ${gSearch}")
val sdss = QPut(arrayListOf())
WebClient.create().get()
.uri(gSearch)
.retrieve()
.bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult ->
gsResult.results?.filter { it.score > 0.5}?.forEach {
gsResult.results?.filter { it.score > 0.3}?.forEach {
qPointsCount += 1
println("in filter")
it.originQuery = query
println("in filter ${it.url}")
// it.originQuery = query
val data = Gson().toJson(it)
println(it.title)
Jsoup.connect(it.url).get().html().let { text ->
jsopFilter(it.url!!).let { text ->
try {
println("text >>>>> $text")
it.pageData = chatClient.chat(OllamaApi.ChatRequest.Builder("phi4:14b").stream(false).format("json").messages(
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content("'${text}' 웹 페이지 모든 내욜을 복사 한건데 본문 내용만 정리해줘").build())
).build()).message.content
println("text >>>>> ${text?.chunked(50)?.first() ?: ""}")
var dispoable = chatClient.chat(OllamaApi.ChatRequest.Builder("phi4:14b").stream(false).format("json").messages(
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content("원문:\n'${text}'\n원문의 웹 페이지 소스는 '$query'이 질문에 대해 연관 결과로 받은 내용이야. 해당 정보를 파악해서 'query:{질문},contents:{본문내용},summary:{요약},keywords:[키워드],related_links:[링크],relatedness_score:{0.0~10.0}'이 형식의 결과만들어줘 내용은 한국어로 부탁할께").build())
).build()).toMono().subscribe({aiResponce ->
it.pageData = aiResponce.message.content
// println(aiResponce)
println("summary result >>>>> ${it.pageData}")
// it.originHtml = text
val embeddingResponse = embeddingModel.call(
EmbeddingRequest(
listOf(data),
OllamaOptions.builder()
.model("nomic-embed-text")
.truncate(false)
.build()
.truncate(false).build()
)
)
sdss.points.add(QData(id = qPointsCount,embeddingResponse.result.output,it))
},{err->
err.printStackTrace()
})
}catch (e : Exception) {
e.printStackTrace()
}
}
}
}
println("out filter")
if (sdss.points.size > 0) {
println("sdss.points.size ${sdss.points.size} ${Gson().toJson(sdss)}")
val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
val client = WebClient.create()
client.put()
@ -140,18 +207,19 @@ class Lama {
var lists = client.post()
.uri(qUrl)
.header("api-key","blama-admin-key-gb")
.body(BodyInserters.fromValue(Gson().toJson(QSearchData(embedFlots,5))))
.body(BodyInserters.fromValue(Gson().toJson(QSearchData(embedFlots,3))))
.retrieve()
.bodyToMono(QSearch::class.java).timeout(Duration.ofMinutes(20L)).block()
println(Gson().toJson(lists))
return if (lists?.result?.size ?: 0 > 0) {
val qContents = QContentsList()
lists?.result?.forEach {
qContents.ids.add(it.id)
}
val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
val client2 = WebClient.create()
client.post()
client2.post()
.uri(qCUrl)
.header("api-key", "blama-admin-key-gb")
.body(BodyInserters.fromValue(Gson().toJson(qContents)))