....
This commit is contained in:
parent
a89b4904ea
commit
b191022e6d
@ -39,6 +39,7 @@ class AppConfig : WebMvcConfigurer {
|
|||||||
@Bean
|
@Bean
|
||||||
fun chatClient(): OllamaApi {
|
fun chatClient(): OllamaApi {
|
||||||
return OllamaApi("https://lama.lunaticbum.kr")
|
return OllamaApi("https://lama.lunaticbum.kr")
|
||||||
|
|
||||||
// .withDefaultOptions(
|
// .withDefaultOptions(
|
||||||
// OllamaOptions.create()
|
// OllamaOptions.create()
|
||||||
// .withModel("phi4:14b")
|
// .withModel("phi4:14b")
|
||||||
|
|||||||
@ -410,7 +410,7 @@ class Telegram {
|
|||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
CoroutineScope(Dispatchers.IO).async {
|
CoroutineScope(Dispatchers.IO).async {
|
||||||
lama.generateResponse(query = originalQuery)
|
lama.generateResponse(query = originalQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date())))
|
||||||
}
|
}
|
||||||
return "TEST"
|
return "TEST"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -12,7 +12,7 @@ class SearXng {
|
|||||||
var unresponsive_engines: ArrayList<ArrayList<String>>? = null
|
var unresponsive_engines: ArrayList<ArrayList<String>>? = null
|
||||||
}
|
}
|
||||||
class SearXngResult {
|
class SearXngResult {
|
||||||
var originQuery : String? = null
|
// var originQuery : String? = null
|
||||||
var url: String? = null
|
var url: String? = null
|
||||||
var title: String? = null
|
var title: String? = null
|
||||||
var content: String? = null
|
var content: String? = null
|
||||||
@ -25,4 +25,5 @@ class SearXngResult {
|
|||||||
var score: Double = 0.0
|
var score: Double = 0.0
|
||||||
var category: String? = null
|
var category: String? = null
|
||||||
var pageData : String? = null
|
var pageData : String? = null
|
||||||
|
var originHtml : String? = null
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,17 +3,16 @@ package kr.lunaticbum.back.lun.service
|
|||||||
|
|
||||||
|
|
||||||
import com.google.gson.Gson
|
import com.google.gson.Gson
|
||||||
import com.knuddels.jtokkit.api.IntArrayList
|
import com.google.gson.annotations.SerializedName
|
||||||
import io.micrometer.observation.ObservationRegistry
|
import io.micrometer.observation.ObservationRegistry
|
||||||
import kotlinx.coroutines.CoroutineScope
|
import kotlinx.coroutines.CoroutineScope
|
||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
import kotlinx.coroutines.launch
|
import kotlinx.coroutines.launch
|
||||||
import kr.lunaticbum.back.lun.configs.GlobalEnvironment
|
import kr.lunaticbum.back.lun.configs.GlobalEnvironment
|
||||||
import kr.lunaticbum.back.lun.controllers.BumlamaResp
|
|
||||||
import kr.lunaticbum.back.lun.controllers.TelegramSendMsg
|
import kr.lunaticbum.back.lun.controllers.TelegramSendMsg
|
||||||
import kr.lunaticbum.back.lun.controllers.lamaGenerated
|
|
||||||
import kr.lunaticbum.back.lun.model.*
|
import kr.lunaticbum.back.lun.model.*
|
||||||
import org.jsoup.Jsoup
|
import org.jsoup.Jsoup
|
||||||
|
import org.jsoup.select.Elements
|
||||||
import org.springframework.ai.embedding.EmbeddingRequest
|
import org.springframework.ai.embedding.EmbeddingRequest
|
||||||
import org.springframework.ai.ollama.OllamaEmbeddingModel
|
import org.springframework.ai.ollama.OllamaEmbeddingModel
|
||||||
import org.springframework.ai.ollama.api.OllamaApi
|
import org.springframework.ai.ollama.api.OllamaApi
|
||||||
@ -25,11 +24,10 @@ import org.springframework.http.MediaType
|
|||||||
import org.springframework.stereotype.Service
|
import org.springframework.stereotype.Service
|
||||||
import org.springframework.web.reactive.function.BodyInserters
|
import org.springframework.web.reactive.function.BodyInserters
|
||||||
import org.springframework.web.reactive.function.client.WebClient
|
import org.springframework.web.reactive.function.client.WebClient
|
||||||
import java.net.URLEncoder
|
import reactor.kotlin.core.publisher.toMono
|
||||||
import java.text.SimpleDateFormat
|
import java.text.SimpleDateFormat
|
||||||
import java.time.Duration
|
import java.time.Duration
|
||||||
import java.util.*
|
import java.util.*
|
||||||
import kotlin.collections.ArrayList
|
|
||||||
|
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@ -74,6 +72,71 @@ class Lama {
|
|||||||
.retrieve()
|
.retrieve()
|
||||||
.bodyToMono(QCollection::class.java).timeout(Duration.ofMinutes(20L)).block()?.result?.points_count ?: 0L
|
.bodyToMono(QCollection::class.java).timeout(Duration.ofMinutes(20L)).block()?.result?.points_count ?: 0L
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun jsopFilter(url : String) : String {
|
||||||
|
val joinString = "\n#"
|
||||||
|
var lastElement : Elements = Elements()
|
||||||
|
var body = Jsoup.connect(url).timeout(30000).get().body()
|
||||||
|
var elements : Elements? = null
|
||||||
|
if (url.contains("nate.com", true)) {
|
||||||
|
if (url.contains("view", true)) {
|
||||||
|
elements = body.select("[class*=articleView]")
|
||||||
|
}else {
|
||||||
|
elements = body.select("[class*=postRankSubjectList]")
|
||||||
|
}
|
||||||
|
} else if (url.contains("newsis.com/view", true)) {
|
||||||
|
elements = body.select("[class*=articleView]")
|
||||||
|
} else if (url.contains("blog.naver.com", true)) {
|
||||||
|
elements = body.select("[class*=se-viewer]")
|
||||||
|
} else if (url.contains("bbc.com/korean/articles", true)) {
|
||||||
|
elements = body.select("main[role$=main]")
|
||||||
|
} else if (url.contains("chosun.com/client", true)) {
|
||||||
|
elements = body.select("[class*=articleBody]")
|
||||||
|
} else if (url.contains("nocutnews.co.kr/news", true)) {
|
||||||
|
elements = body.select("[class*=container]")
|
||||||
|
} else if (url.contains("hani.co.kr/arti/", true)) {
|
||||||
|
elements = body.select("[class*=ArticleDetail]")
|
||||||
|
} else if (url.contains("yna.co.kr/view", true)) {
|
||||||
|
elements = body.select("[class*=container]")
|
||||||
|
} else if (url.contains("newspim.com/news", true)) {
|
||||||
|
elements = body.select("[class*=container]")
|
||||||
|
} else {
|
||||||
|
|
||||||
|
}
|
||||||
|
if (elements?.size ?: 0 > 0) {
|
||||||
|
elements?.forEach {
|
||||||
|
lastElement.add(it)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastElement.size < 1) {
|
||||||
|
arrayOf("container","article","main","viewer","content").forEach {
|
||||||
|
var result = Elements()
|
||||||
|
result.addAll(body.select("[class*=$it]"))
|
||||||
|
result.addAll(body.select("[id*=$it]"))
|
||||||
|
result.addAll(body.select(it))
|
||||||
|
result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElement.add(it) } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return if (lastElement.size > 0) {
|
||||||
|
lastElement.map { it.children().eachText() }.joinToString(joinString)
|
||||||
|
} else {
|
||||||
|
body.children().map { it.children().eachText() }.joinToString(joinString)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// class WebScrap {
|
||||||
|
// @SerializedName("query", alternate = ["question"])
|
||||||
|
// var query: String? = null
|
||||||
|
// var original_html: String? = null
|
||||||
|
// var original_content: String? = null
|
||||||
|
// var summary: String? = null
|
||||||
|
// var keywords: ArrayList<String>? = null
|
||||||
|
// var related_links: ArrayList<String>? = null
|
||||||
|
// var relatedness_score: Double = 0.0
|
||||||
|
// }
|
||||||
|
|
||||||
|
|
||||||
private fun addDocuments(query : String) {
|
private fun addDocuments(query : String) {
|
||||||
|
|
||||||
val embeddingModel = OllamaEmbeddingModel(
|
val embeddingModel = OllamaEmbeddingModel(
|
||||||
@ -82,45 +145,49 @@ class Lama {
|
|||||||
ObservationRegistry.create(),
|
ObservationRegistry.create(),
|
||||||
ModelManagementOptions.defaults()
|
ModelManagementOptions.defaults()
|
||||||
)
|
)
|
||||||
val gSearch = "https://psn.lunaticbum.kr/search?q=${query?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=auto&time_range=month&safesearch=0&categories=general&format=json"
|
val gSearch = "https://psn.lunaticbum.kr/search?q=${query?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=ko&time_range=month&safesearch=0&categories=general&format=json"
|
||||||
println("gSearch >>> ${gSearch}")
|
println("gSearch >>> ${gSearch}")
|
||||||
val sdss = QPut(arrayListOf())
|
val sdss = QPut(arrayListOf())
|
||||||
WebClient.create().get()
|
WebClient.create().get()
|
||||||
.uri(gSearch)
|
.uri(gSearch)
|
||||||
.retrieve()
|
.retrieve()
|
||||||
.bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult ->
|
.bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult ->
|
||||||
gsResult.results?.filter { it.score > 0.5}?.forEach {
|
gsResult.results?.filter { it.score > 0.3}?.forEach {
|
||||||
qPointsCount += 1
|
qPointsCount += 1
|
||||||
println("in filter")
|
println("in filter ${it.url}")
|
||||||
it.originQuery = query
|
// it.originQuery = query
|
||||||
val data = Gson().toJson(it)
|
val data = Gson().toJson(it)
|
||||||
println(it.title)
|
println(it.title)
|
||||||
Jsoup.connect(it.url).get().html().let { text ->
|
jsopFilter(it.url!!).let { text ->
|
||||||
try {
|
try {
|
||||||
println("text >>>>> $text")
|
println("text >>>>> ${text?.chunked(50)?.first() ?: ""}")
|
||||||
it.pageData = chatClient.chat(OllamaApi.ChatRequest.Builder("phi4:14b").stream(false).format("json").messages(
|
var dispoable = chatClient.chat(OllamaApi.ChatRequest.Builder("phi4:14b").stream(false).format("json").messages(
|
||||||
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content("'${text}' 웹 페이지 모든 내욜을 복사 한건데 본문 내용만 정리해줘").build())
|
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content("원문:\n'${text}'\n원문의 웹 페이지 소스는 '$query'이 질문에 대해 연관 결과로 받은 내용이야. 해당 정보를 파악해서 'query:{질문},contents:{본문내용},summary:{요약},keywords:[키워드],related_links:[링크],relatedness_score:{0.0~10.0}'이 형식의 결과만들어줘 내용은 한국어로 부탁할께").build())
|
||||||
).build()).message.content
|
).build()).toMono().subscribe({aiResponce ->
|
||||||
println("summary result >>>>> ${it.pageData}")
|
it.pageData = aiResponce.message.content
|
||||||
val embeddingResponse = embeddingModel.call(
|
// println(aiResponce)
|
||||||
EmbeddingRequest(
|
println("summary result >>>>> ${it.pageData}")
|
||||||
listOf(data),
|
// it.originHtml = text
|
||||||
OllamaOptions.builder()
|
val embeddingResponse = embeddingModel.call(
|
||||||
.model("nomic-embed-text")
|
EmbeddingRequest(
|
||||||
.truncate(false)
|
listOf(data),
|
||||||
.build()
|
OllamaOptions.builder()
|
||||||
|
.model("nomic-embed-text")
|
||||||
|
.truncate(false).build()
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
sdss.points.add(QData(id = qPointsCount,embeddingResponse.result.output,it))
|
||||||
sdss.points.add(QData(id = qPointsCount,embeddingResponse.result.output,it))
|
},{err->
|
||||||
|
err.printStackTrace()
|
||||||
|
})
|
||||||
}catch (e : Exception) {
|
}catch (e : Exception) {
|
||||||
|
e.printStackTrace()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
println("out filter")
|
println("out filter")
|
||||||
if (sdss.points.size > 0) {
|
if (sdss.points.size > 0) {
|
||||||
println("sdss.points.size ${sdss.points.size} ${Gson().toJson(sdss)}")
|
|
||||||
val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
||||||
val client = WebClient.create()
|
val client = WebClient.create()
|
||||||
client.put()
|
client.put()
|
||||||
@ -140,18 +207,19 @@ class Lama {
|
|||||||
var lists = client.post()
|
var lists = client.post()
|
||||||
.uri(qUrl)
|
.uri(qUrl)
|
||||||
.header("api-key","blama-admin-key-gb")
|
.header("api-key","blama-admin-key-gb")
|
||||||
.body(BodyInserters.fromValue(Gson().toJson(QSearchData(embedFlots,5))))
|
.body(BodyInserters.fromValue(Gson().toJson(QSearchData(embedFlots,3))))
|
||||||
.retrieve()
|
.retrieve()
|
||||||
.bodyToMono(QSearch::class.java).timeout(Duration.ofMinutes(20L)).block()
|
.bodyToMono(QSearch::class.java).timeout(Duration.ofMinutes(20L)).block()
|
||||||
|
println(Gson().toJson(lists))
|
||||||
return if (lists?.result?.size ?: 0 > 0) {
|
return if (lists?.result?.size ?: 0 > 0) {
|
||||||
val qContents = QContentsList()
|
val qContents = QContentsList()
|
||||||
|
|
||||||
lists?.result?.forEach {
|
lists?.result?.forEach {
|
||||||
|
|
||||||
qContents.ids.add(it.id)
|
qContents.ids.add(it.id)
|
||||||
}
|
}
|
||||||
val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
||||||
val client2 = WebClient.create()
|
val client2 = WebClient.create()
|
||||||
client.post()
|
client2.post()
|
||||||
.uri(qCUrl)
|
.uri(qCUrl)
|
||||||
.header("api-key", "blama-admin-key-gb")
|
.header("api-key", "blama-admin-key-gb")
|
||||||
.body(BodyInserters.fromValue(Gson().toJson(qContents)))
|
.body(BodyInserters.fromValue(Gson().toJson(qContents)))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user