....
This commit is contained in:
parent
a89b4904ea
commit
b191022e6d
@ -39,6 +39,7 @@ class AppConfig : WebMvcConfigurer {
|
||||
@Bean
|
||||
fun chatClient(): OllamaApi {
|
||||
return OllamaApi("https://lama.lunaticbum.kr")
|
||||
|
||||
// .withDefaultOptions(
|
||||
// OllamaOptions.create()
|
||||
// .withModel("phi4:14b")
|
||||
|
||||
@ -410,7 +410,7 @@ class Telegram {
|
||||
// }
|
||||
// }
|
||||
CoroutineScope(Dispatchers.IO).async {
|
||||
lama.generateResponse(query = originalQuery)
|
||||
lama.generateResponse(query = originalQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date())))
|
||||
}
|
||||
return "TEST"
|
||||
}
|
||||
|
||||
@ -12,7 +12,7 @@ class SearXng {
|
||||
var unresponsive_engines: ArrayList<ArrayList<String>>? = null
|
||||
}
|
||||
class SearXngResult {
|
||||
var originQuery : String? = null
|
||||
// var originQuery : String? = null
|
||||
var url: String? = null
|
||||
var title: String? = null
|
||||
var content: String? = null
|
||||
@ -25,4 +25,5 @@ class SearXngResult {
|
||||
var score: Double = 0.0
|
||||
var category: String? = null
|
||||
var pageData : String? = null
|
||||
var originHtml : String? = null
|
||||
}
|
||||
|
||||
@ -3,17 +3,16 @@ package kr.lunaticbum.back.lun.service
|
||||
|
||||
|
||||
import com.google.gson.Gson
|
||||
import com.knuddels.jtokkit.api.IntArrayList
|
||||
import com.google.gson.annotations.SerializedName
|
||||
import io.micrometer.observation.ObservationRegistry
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.launch
|
||||
import kr.lunaticbum.back.lun.configs.GlobalEnvironment
|
||||
import kr.lunaticbum.back.lun.controllers.BumlamaResp
|
||||
import kr.lunaticbum.back.lun.controllers.TelegramSendMsg
|
||||
import kr.lunaticbum.back.lun.controllers.lamaGenerated
|
||||
import kr.lunaticbum.back.lun.model.*
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.select.Elements
|
||||
import org.springframework.ai.embedding.EmbeddingRequest
|
||||
import org.springframework.ai.ollama.OllamaEmbeddingModel
|
||||
import org.springframework.ai.ollama.api.OllamaApi
|
||||
@ -25,11 +24,10 @@ import org.springframework.http.MediaType
|
||||
import org.springframework.stereotype.Service
|
||||
import org.springframework.web.reactive.function.BodyInserters
|
||||
import org.springframework.web.reactive.function.client.WebClient
|
||||
import java.net.URLEncoder
|
||||
import reactor.kotlin.core.publisher.toMono
|
||||
import java.text.SimpleDateFormat
|
||||
import java.time.Duration
|
||||
import java.util.*
|
||||
import kotlin.collections.ArrayList
|
||||
|
||||
|
||||
@Service
|
||||
@ -74,6 +72,71 @@ class Lama {
|
||||
.retrieve()
|
||||
.bodyToMono(QCollection::class.java).timeout(Duration.ofMinutes(20L)).block()?.result?.points_count ?: 0L
|
||||
}
|
||||
|
||||
fun jsopFilter(url : String) : String {
|
||||
val joinString = "\n#"
|
||||
var lastElement : Elements = Elements()
|
||||
var body = Jsoup.connect(url).timeout(30000).get().body()
|
||||
var elements : Elements? = null
|
||||
if (url.contains("nate.com", true)) {
|
||||
if (url.contains("view", true)) {
|
||||
elements = body.select("[class*=articleView]")
|
||||
}else {
|
||||
elements = body.select("[class*=postRankSubjectList]")
|
||||
}
|
||||
} else if (url.contains("newsis.com/view", true)) {
|
||||
elements = body.select("[class*=articleView]")
|
||||
} else if (url.contains("blog.naver.com", true)) {
|
||||
elements = body.select("[class*=se-viewer]")
|
||||
} else if (url.contains("bbc.com/korean/articles", true)) {
|
||||
elements = body.select("main[role$=main]")
|
||||
} else if (url.contains("chosun.com/client", true)) {
|
||||
elements = body.select("[class*=articleBody]")
|
||||
} else if (url.contains("nocutnews.co.kr/news", true)) {
|
||||
elements = body.select("[class*=container]")
|
||||
} else if (url.contains("hani.co.kr/arti/", true)) {
|
||||
elements = body.select("[class*=ArticleDetail]")
|
||||
} else if (url.contains("yna.co.kr/view", true)) {
|
||||
elements = body.select("[class*=container]")
|
||||
} else if (url.contains("newspim.com/news", true)) {
|
||||
elements = body.select("[class*=container]")
|
||||
} else {
|
||||
|
||||
}
|
||||
if (elements?.size ?: 0 > 0) {
|
||||
elements?.forEach {
|
||||
lastElement.add(it)
|
||||
}
|
||||
}
|
||||
|
||||
if (lastElement.size < 1) {
|
||||
arrayOf("container","article","main","viewer","content").forEach {
|
||||
var result = Elements()
|
||||
result.addAll(body.select("[class*=$it]"))
|
||||
result.addAll(body.select("[id*=$it]"))
|
||||
result.addAll(body.select(it))
|
||||
result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElement.add(it) } }
|
||||
}
|
||||
}
|
||||
return if (lastElement.size > 0) {
|
||||
lastElement.map { it.children().eachText() }.joinToString(joinString)
|
||||
} else {
|
||||
body.children().map { it.children().eachText() }.joinToString(joinString)
|
||||
}
|
||||
}
|
||||
|
||||
// class WebScrap {
|
||||
// @SerializedName("query", alternate = ["question"])
|
||||
// var query: String? = null
|
||||
// var original_html: String? = null
|
||||
// var original_content: String? = null
|
||||
// var summary: String? = null
|
||||
// var keywords: ArrayList<String>? = null
|
||||
// var related_links: ArrayList<String>? = null
|
||||
// var relatedness_score: Double = 0.0
|
||||
// }
|
||||
|
||||
|
||||
private fun addDocuments(query : String) {
|
||||
|
||||
val embeddingModel = OllamaEmbeddingModel(
|
||||
@ -82,45 +145,49 @@ class Lama {
|
||||
ObservationRegistry.create(),
|
||||
ModelManagementOptions.defaults()
|
||||
)
|
||||
val gSearch = "https://psn.lunaticbum.kr/search?q=${query?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=auto&time_range=month&safesearch=0&categories=general&format=json"
|
||||
val gSearch = "https://psn.lunaticbum.kr/search?q=${query?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=ko&time_range=month&safesearch=0&categories=general&format=json"
|
||||
println("gSearch >>> ${gSearch}")
|
||||
val sdss = QPut(arrayListOf())
|
||||
WebClient.create().get()
|
||||
.uri(gSearch)
|
||||
.retrieve()
|
||||
.bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult ->
|
||||
gsResult.results?.filter { it.score > 0.5}?.forEach {
|
||||
gsResult.results?.filter { it.score > 0.3}?.forEach {
|
||||
qPointsCount += 1
|
||||
println("in filter")
|
||||
it.originQuery = query
|
||||
println("in filter ${it.url}")
|
||||
// it.originQuery = query
|
||||
val data = Gson().toJson(it)
|
||||
println(it.title)
|
||||
Jsoup.connect(it.url).get().html().let { text ->
|
||||
jsopFilter(it.url!!).let { text ->
|
||||
try {
|
||||
println("text >>>>> $text")
|
||||
it.pageData = chatClient.chat(OllamaApi.ChatRequest.Builder("phi4:14b").stream(false).format("json").messages(
|
||||
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content("'${text}' 웹 페이지 모든 내욜을 복사 한건데 본문 내용만 정리해줘").build())
|
||||
).build()).message.content
|
||||
println("text >>>>> ${text?.chunked(50)?.first() ?: ""}")
|
||||
var dispoable = chatClient.chat(OllamaApi.ChatRequest.Builder("phi4:14b").stream(false).format("json").messages(
|
||||
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content("원문:\n'${text}'\n원문의 웹 페이지 소스는 '$query'이 질문에 대해 연관 결과로 받은 내용이야. 해당 정보를 파악해서 'query:{질문},contents:{본문내용},summary:{요약},keywords:[키워드],related_links:[링크],relatedness_score:{0.0~10.0}'이 형식의 결과만들어줘 내용은 한국어로 부탁할께").build())
|
||||
).build()).toMono().subscribe({aiResponce ->
|
||||
it.pageData = aiResponce.message.content
|
||||
// println(aiResponce)
|
||||
println("summary result >>>>> ${it.pageData}")
|
||||
// it.originHtml = text
|
||||
val embeddingResponse = embeddingModel.call(
|
||||
EmbeddingRequest(
|
||||
listOf(data),
|
||||
OllamaOptions.builder()
|
||||
.model("nomic-embed-text")
|
||||
.truncate(false)
|
||||
.build()
|
||||
.truncate(false).build()
|
||||
)
|
||||
)
|
||||
sdss.points.add(QData(id = qPointsCount,embeddingResponse.result.output,it))
|
||||
},{err->
|
||||
err.printStackTrace()
|
||||
})
|
||||
}catch (e : Exception) {
|
||||
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
println("out filter")
|
||||
if (sdss.points.size > 0) {
|
||||
println("sdss.points.size ${sdss.points.size} ${Gson().toJson(sdss)}")
|
||||
val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
||||
val client = WebClient.create()
|
||||
client.put()
|
||||
@ -140,18 +207,19 @@ class Lama {
|
||||
var lists = client.post()
|
||||
.uri(qUrl)
|
||||
.header("api-key","blama-admin-key-gb")
|
||||
.body(BodyInserters.fromValue(Gson().toJson(QSearchData(embedFlots,5))))
|
||||
.body(BodyInserters.fromValue(Gson().toJson(QSearchData(embedFlots,3))))
|
||||
.retrieve()
|
||||
.bodyToMono(QSearch::class.java).timeout(Duration.ofMinutes(20L)).block()
|
||||
println(Gson().toJson(lists))
|
||||
return if (lists?.result?.size ?: 0 > 0) {
|
||||
val qContents = QContentsList()
|
||||
|
||||
lists?.result?.forEach {
|
||||
|
||||
qContents.ids.add(it.id)
|
||||
}
|
||||
val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
||||
val client2 = WebClient.create()
|
||||
client.post()
|
||||
client2.post()
|
||||
.uri(qCUrl)
|
||||
.header("api-key", "blama-admin-key-gb")
|
||||
.body(BodyInserters.fromValue(Gson().toJson(qContents)))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user