...
This commit is contained in:
parent
81417ff8aa
commit
c69e18346a
@ -52,6 +52,10 @@ dependencies {
|
|||||||
implementation("org.springframework.boot:spring-boot-starter-thymeleaf")
|
implementation("org.springframework.boot:spring-boot-starter-thymeleaf")
|
||||||
implementation("nz.net.ultraq.thymeleaf:thymeleaf-layout-dialect")
|
implementation("nz.net.ultraq.thymeleaf:thymeleaf-layout-dialect")
|
||||||
implementation ("org.jsoup:jsoup:1.18.1")
|
implementation ("org.jsoup:jsoup:1.18.1")
|
||||||
|
|
||||||
|
implementation ("org.seleniumhq.selenium:selenium-java:4.10.0")
|
||||||
|
|
||||||
|
|
||||||
implementation ("com.drewnoakes:metadata-extractor:2.19.0")
|
implementation ("com.drewnoakes:metadata-extractor:2.19.0")
|
||||||
implementation("org.springframework.boot:spring-boot-starter-security")
|
implementation("org.springframework.boot:spring-boot-starter-security")
|
||||||
compileOnly("org.projectlombok:lombok")
|
compileOnly("org.projectlombok:lombok")
|
||||||
|
|||||||
@ -44,7 +44,7 @@ class BumsInterceptor : HandlerInterceptor {
|
|||||||
handler: Any,
|
handler: Any,
|
||||||
@Nullable modelAndView: ModelAndView?
|
@Nullable modelAndView: ModelAndView?
|
||||||
) {
|
) {
|
||||||
var skippResourcesExtension = arrayListOf(".ajax",".js",".css","/tlg/",".api").filter { request.requestURI.contains(it)}.size > 0
|
var skippResourcesExtension = arrayListOf(".ajax",".js",".css","/tlg/",".api","error").filter { request.requestURI.contains(it)}.size > 0
|
||||||
if (!skippResourcesExtension) {
|
if (!skippResourcesExtension) {
|
||||||
if (request.requestURI.contains("logout") == false && !request.cookies.isNullOrEmpty() && request.cookies.filter {
|
if (request.requestURI.contains("logout") == false && !request.cookies.isNullOrEmpty() && request.cookies.filter {
|
||||||
it.name.equals(
|
it.name.equals(
|
||||||
|
|||||||
@ -371,7 +371,7 @@ class Telegram {
|
|||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
CoroutineScope(Dispatchers.IO).async {
|
CoroutineScope(Dispatchers.IO).async {
|
||||||
lama.generateResponse(query = originalQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date())))
|
lama.generateResponse(originalQuery?.replace("오늘","오늘(${SimpleDateFormat("yyyy-MM-dd").format(Date())})"))
|
||||||
}
|
}
|
||||||
return "TEST"
|
return "TEST"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,8 +16,8 @@ class QConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class QHnswConfig {
|
class QHnswConfig {
|
||||||
var m: Int = 0
|
var m: Long = 0
|
||||||
var ef_construct: Int = 0
|
var ef_construct: Long = 0
|
||||||
var full_scan_threshold: Int = 0
|
var full_scan_threshold: Int = 0
|
||||||
var max_indexing_threads: Int = 0
|
var max_indexing_threads: Int = 0
|
||||||
var on_disk: Boolean = false
|
var on_disk: Boolean = false
|
||||||
@ -25,20 +25,20 @@ class QHnswConfig {
|
|||||||
|
|
||||||
class QOptimizerConfig {
|
class QOptimizerConfig {
|
||||||
var deleted_threshold: Double = 0.0
|
var deleted_threshold: Double = 0.0
|
||||||
var vacuum_min_vector_number: Int = 0
|
var vacuum_min_vector_number: Long = 0
|
||||||
var default_segment_number: Int = 0
|
var default_segment_number: Long = 0
|
||||||
var max_segment_size: Any? = null
|
var max_segment_size: Any? = null
|
||||||
var memmap_threshold: Any? = null
|
var memmap_threshold: Any? = null
|
||||||
var indexing_threshold: Int = 0
|
var indexing_threshold: Long = 0
|
||||||
var flush_interval_sec: Int = 0
|
var flush_interval_sec: Long = 0
|
||||||
var max_optimization_threads: Any? = null
|
var max_optimization_threads: Any? = null
|
||||||
}
|
}
|
||||||
|
|
||||||
class QParams {
|
class QParams {
|
||||||
var vectors: QVectors? = null
|
var vectors: QVectors? = null
|
||||||
var shard_number: Int = 0
|
var shard_number: Long = 0
|
||||||
var replication_factor: Int = 0
|
var replication_factor: Long = 0
|
||||||
var write_consistency_factor: Int = 0
|
var write_consistency_factor: Long = 0
|
||||||
var on_disk_payload: Boolean = false
|
var on_disk_payload: Boolean = false
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -47,9 +47,9 @@ class QPayloadSchema
|
|||||||
class QResult {
|
class QResult {
|
||||||
var status: String? = null
|
var status: String? = null
|
||||||
var optimizer_status: String? = null
|
var optimizer_status: String? = null
|
||||||
var indexed_vectors_count: Int = 0
|
var indexed_vectors_count: Long = 0
|
||||||
var points_count: Long = 0
|
var points_count: Long = 0
|
||||||
var segments_count: Int = 0
|
var segments_count: Long = 0
|
||||||
var config: QConfig? = null
|
var config: QConfig? = null
|
||||||
var payload_schema: QPayloadSchema? = null
|
var payload_schema: QPayloadSchema? = null
|
||||||
}
|
}
|
||||||
@ -61,18 +61,18 @@ class QStrictModeConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class QVectors {
|
class QVectors {
|
||||||
var size: Int = 0
|
var size: Long = 0
|
||||||
var distance: String? = null
|
var distance: String? = null
|
||||||
}
|
}
|
||||||
|
|
||||||
class QWalConfig {
|
class QWalConfig {
|
||||||
var wal_capacity_mb: Int = 0
|
var wal_capacity_mb: Long = 0
|
||||||
var wal_segments_ahead: Int = 0
|
var wal_segments_ahead: Long = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
class QSearchResult {
|
class QSearchResult {
|
||||||
var id: Int = 0
|
var id: Long = 0
|
||||||
var version: Int = 0
|
var version: Long = 0
|
||||||
var score: Double = 0.0
|
var score: Double = 0.0
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,7 +103,7 @@ class QContentsPayload {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class QContentsResult {
|
class QContentsResult {
|
||||||
var id: Int = 0
|
var id: Long = 0
|
||||||
var payload: QContentsPayload? = null
|
var payload: QContentsPayload? = null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -3,34 +3,40 @@ package kr.lunaticbum.back.lun.service
|
|||||||
|
|
||||||
|
|
||||||
import com.google.gson.Gson
|
import com.google.gson.Gson
|
||||||
import com.google.gson.annotations.SerializedName
|
import com.google.gson.JsonElement
|
||||||
|
import com.google.gson.JsonObject
|
||||||
|
import com.google.gson.JsonParser
|
||||||
import io.micrometer.observation.ObservationRegistry
|
import io.micrometer.observation.ObservationRegistry
|
||||||
import kotlinx.coroutines.CoroutineScope
|
import kotlinx.coroutines.CoroutineScope
|
||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
import kotlinx.coroutines.async
|
|
||||||
import kotlinx.coroutines.launch
|
import kotlinx.coroutines.launch
|
||||||
import kr.lunaticbum.back.lun.configs.GlobalEnvironment
|
import kr.lunaticbum.back.lun.configs.GlobalEnvironment
|
||||||
import kr.lunaticbum.back.lun.controllers.TelegramSendMsg
|
import kr.lunaticbum.back.lun.controllers.TelegramSendMsg
|
||||||
import kr.lunaticbum.back.lun.model.*
|
import kr.lunaticbum.back.lun.model.*
|
||||||
|
import kr.lunaticbum.back.lun.utils.RssFeedsParser
|
||||||
import org.jsoup.Jsoup
|
import org.jsoup.Jsoup
|
||||||
import org.jsoup.select.Elements
|
import org.jsoup.select.Elements
|
||||||
|
import org.openqa.selenium.By
|
||||||
|
import org.openqa.selenium.WebDriver
|
||||||
|
import org.openqa.selenium.chrome.ChromeOptions
|
||||||
|
import org.openqa.selenium.remote.RemoteWebDriver
|
||||||
import org.springframework.ai.embedding.EmbeddingRequest
|
import org.springframework.ai.embedding.EmbeddingRequest
|
||||||
import org.springframework.ai.ollama.OllamaEmbeddingModel
|
import org.springframework.ai.ollama.OllamaEmbeddingModel
|
||||||
import org.springframework.ai.ollama.api.OllamaApi
|
import org.springframework.ai.ollama.api.OllamaApi
|
||||||
import org.springframework.ai.ollama.api.OllamaOptions
|
import org.springframework.ai.ollama.api.OllamaOptions
|
||||||
import org.springframework.ai.ollama.management.ModelManagementOptions
|
import org.springframework.ai.ollama.management.ModelManagementOptions
|
||||||
import org.springframework.beans.factory.annotation.Autowired
|
import org.springframework.beans.factory.annotation.Autowired
|
||||||
import org.springframework.beans.factory.annotation.Qualifier
|
|
||||||
import org.springframework.http.MediaType
|
import org.springframework.http.MediaType
|
||||||
import org.springframework.scheduling.annotation.Async
|
import org.springframework.scheduling.annotation.Async
|
||||||
import org.springframework.stereotype.Service
|
import org.springframework.stereotype.Service
|
||||||
import org.springframework.web.reactive.function.BodyInserters
|
import org.springframework.web.reactive.function.BodyInserters
|
||||||
import org.springframework.web.reactive.function.client.WebClient
|
import org.springframework.web.reactive.function.client.WebClient
|
||||||
import reactor.kotlin.core.publisher.toMono
|
import reactor.kotlin.core.publisher.toMono
|
||||||
|
import java.net.URL
|
||||||
|
import java.net.URLEncoder
|
||||||
import java.text.SimpleDateFormat
|
import java.text.SimpleDateFormat
|
||||||
import java.time.Duration
|
import java.time.Duration
|
||||||
import java.util.*
|
import java.util.*
|
||||||
import kotlin.collections.ArrayList
|
|
||||||
|
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@ -46,7 +52,7 @@ class Lama {
|
|||||||
data class QPut(val points : ArrayList<QData>)
|
data class QPut(val points : ArrayList<QData>)
|
||||||
data class QData(val id : Long, val vector : FloatArray, val payload : SearXngResult)
|
data class QData(val id : Long, val vector : FloatArray, val payload : SearXngResult)
|
||||||
|
|
||||||
data class QContentsList(var ids : ArrayList<Int> = ArrayList(), var with_payload : Boolean = true, var with_vector : Boolean = false)
|
data class QContentsList(var ids : ArrayList<Long> = ArrayList(), var with_payload : Boolean = true, var with_vector : Boolean = false)
|
||||||
// fun makeCollection() : String{
|
// fun makeCollection() : String{
|
||||||
//
|
//
|
||||||
// class CollectionPut {
|
// class CollectionPut {
|
||||||
@ -76,51 +82,51 @@ class Lama {
|
|||||||
|
|
||||||
fun jsopFilter(url : String) : String {
|
fun jsopFilter(url : String) : String {
|
||||||
val joinString = "\n#"
|
val joinString = "\n#"
|
||||||
var lastElement : Elements = Elements()
|
var lastElements : Elements = Elements()
|
||||||
var body = Jsoup.connect(url).timeout(30000).get().body()
|
var body = Jsoup.connect(url).timeout(30000).get().body()
|
||||||
var elements : Elements? = null
|
// var elements : Elements? = null
|
||||||
if (url.contains("nate.com", true)) {
|
// if (url.contains("nate.com", true)) {
|
||||||
if (url.contains("view", true)) {
|
// if (url.contains("view", true)) {
|
||||||
elements = body.select("[class*=articleView]")
|
// elements = body.select("[class*=articleView]")
|
||||||
}else {
|
// }else {
|
||||||
elements = body.select("[class*=postRankSubjectList]")
|
// elements = body.select("[class*=postRankSubjectList]")
|
||||||
}
|
// }
|
||||||
} else if (url.contains("newsis.com/view", true)) {
|
// } else if (url.contains("newsis.com/view", true)) {
|
||||||
elements = body.select("[class*=articleView]")
|
// elements = body.select("[class*=articleView]")
|
||||||
} else if (url.contains("blog.naver.com", true)) {
|
// } else if (url.contains("blog.naver.com", true)) {
|
||||||
elements = body.select("[class*=se-viewer]")
|
// elements = body.select("[class*=se-viewer]")
|
||||||
} else if (url.contains("bbc.com/korean/articles", true)) {
|
// } else if (url.contains("bbc.com/korean/articles", true)) {
|
||||||
elements = body.select("main[role$=main]")
|
// elements = body.select("main[role$=main]")
|
||||||
} else if (url.contains("chosun.com/client", true)) {
|
// } else if (url.contains("chosun.com/client", true)) {
|
||||||
elements = body.select("[class*=articleBody]")
|
// elements = body.select("[class*=articleBody]")
|
||||||
} else if (url.contains("nocutnews.co.kr/news", true)) {
|
// } else if (url.contains("nocutnews.co.kr/news", true)) {
|
||||||
elements = body.select("[class*=container]")
|
// elements = body.select("[class*=container]")
|
||||||
} else if (url.contains("hani.co.kr/arti/", true)) {
|
// } else if (url.contains("hani.co.kr/arti/", true)) {
|
||||||
elements = body.select("[class*=ArticleDetail]")
|
// elements = body.select("[class*=ArticleDetail]")
|
||||||
} else if (url.contains("yna.co.kr/view", true)) {
|
// } else if (url.contains("yna.co.kr/view", true)) {
|
||||||
elements = body.select("[class*=container]")
|
// elements = body.select("[class*=container]")
|
||||||
} else if (url.contains("newspim.com/news", true)) {
|
// } else if (url.contains("newspim.com/news", true)) {
|
||||||
elements = body.select("[class*=container]")
|
// elements = body.select("[class*=container]")
|
||||||
} else {
|
// } else {
|
||||||
|
//
|
||||||
}
|
// }
|
||||||
if (elements?.size ?: 0 > 0) {
|
// if (elements?.size ?: 0 > 0) {
|
||||||
elements?.forEach {
|
// elements?.forEach {
|
||||||
lastElement.add(it)
|
// lastElements.add(it)
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
if (lastElement.size < 1) {
|
// if (lastElements.size < 1) {
|
||||||
arrayOf("container","article","main","viewer","content").forEach {
|
// arrayOf("container","article","main","viewer","content").forEach {
|
||||||
var result = Elements()
|
// var result = Elements()
|
||||||
result.addAll(body.select("[class*=$it]"))
|
// result.addAll(body.select("[class*=$it]"))
|
||||||
result.addAll(body.select("[id*=$it]"))
|
// result.addAll(body.select("[id*=$it]"))
|
||||||
result.addAll(body.select(it))
|
// result.addAll(body.select(it))
|
||||||
result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElement.add(it) } }
|
// result.forEach { if (it.text().length > 100 && it.children().size < 5) { lastElements.add(it) } }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
return if (lastElement.size > 0) {
|
return if (lastElements.size > 0) {
|
||||||
lastElement.eachText().joinToString(joinString)
|
lastElements.eachText().joinToString(joinString)
|
||||||
} else {
|
} else {
|
||||||
body.children().eachText().joinToString(joinString)
|
body.children().eachText().joinToString(joinString)
|
||||||
}
|
}
|
||||||
@ -140,104 +146,200 @@ class Lama {
|
|||||||
|
|
||||||
val embedimgModelEeve ="lancard/korean-yanolja-eeve"
|
val embedimgModelEeve ="lancard/korean-yanolja-eeve"
|
||||||
val embedimgModelBgeM3 = "bge-m3"
|
val embedimgModelBgeM3 = "bge-m3"
|
||||||
val currentEmbedimg = embedimgModelEeve
|
val currentEmbedimg = embedimgModelBgeM3
|
||||||
|
|
||||||
val llmPhi4 = "phi4:14b"
|
val llmPhi4 = "phi4:14b"
|
||||||
val llmGemma3 = "gemma3:12b"
|
val llmGemma3 = "gemma3:4b"
|
||||||
|
val llmPhi4Mini = "phi4-mini"
|
||||||
val llmDolphin3 = "dolphin3"
|
val llmDolphin3 = "dolphin3"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val currentLLM = llmDolphin3
|
val currentLLM = llmGemma3
|
||||||
|
fun getGoogleSearch(query:String){
|
||||||
|
Jsoup.connect("https://www.google.com/search?q=".plus(query)).timeout(30000).get().select("a[href]").forEach { }
|
||||||
|
}
|
||||||
|
|
||||||
fun addDocuments(query : String , refinedQuery: RefinedQuery?) {
|
val waitTime = 1000L
|
||||||
|
val topCount = 2
|
||||||
|
|
||||||
|
@Async
|
||||||
|
suspend fun addDocuments(query : String , refinedQuery: RefinedQuery?) {
|
||||||
var querys : ArrayList<String> = ArrayList()
|
var querys : ArrayList<String> = ArrayList()
|
||||||
querys.add(query)
|
querys.add(query)
|
||||||
|
|
||||||
refinedQuery?.ko_query?.let { querys.add(it) }
|
refinedQuery?.ko_query?.let { querys.add(it) }
|
||||||
refinedQuery?.en_query?.let { querys.add(it) }
|
refinedQuery?.en_query?.let { querys.add(it) }
|
||||||
refinedQuery?.keywords?.let { querys.add(it.joinToString { " " })}
|
refinedQuery?.keywords?.let { querys.add(it.joinToString { " " })}
|
||||||
val readedUrls = ArrayList<String>()
|
val readedUrls = ArrayList<String>()
|
||||||
querys.forEach { refinedQuery ->
|
|
||||||
CoroutineScope(Dispatchers.IO).launch {
|
try {
|
||||||
val gSearch = "https://psn.lunaticbum.kr/search?q=${
|
var options : ChromeOptions = ChromeOptions();
|
||||||
refinedQuery?.replace(
|
options.addArguments("--disable-popup-blocking");
|
||||||
"오늘",
|
options.addArguments("--disable-default-apps");
|
||||||
SimpleDateFormat("yyyMMdd").format(Date())
|
options.addArguments("--disable-notifications");
|
||||||
)
|
options.addArguments("--disable-blink-features=AutomationControlled");
|
||||||
}&language=ko&time_range=month&safesearch=0&categories=general&format=json"
|
val targetUrls = hashSetOf<String>()
|
||||||
println("gSearch >>> ${gSearch}")
|
RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver ->
|
||||||
WebClient.create().get()
|
querys.forEach { refinedQuery->
|
||||||
.uri(gSearch)
|
var findCount = 0
|
||||||
.retrieve()
|
try {
|
||||||
.bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult ->
|
driver.get("https://www.google.com/search?q=$refinedQuery");
|
||||||
gsResult.results?.filter { it.url?.startsWith("https://") == true && it.score > 0.4 }?.forEach {
|
Thread.sleep(waitTime)
|
||||||
println("in filter ${it.url}")
|
println(driver.currentUrl)
|
||||||
if (readedUrls.contains(it.url) == false) {
|
driver.findElement(By.ByTagName("Body"))?.let { webElement ->
|
||||||
readedUrls.add(it.url!!)
|
Jsoup.parse(driver.pageSource).select("[href*=https]").forEach {
|
||||||
it.originQuery = query
|
var href = it.attr("href")
|
||||||
it.refinedQuery = refinedQuery
|
if (href?.length ?: 0 > 5 && href.startsWith("https://") && findCount < topCount && href.contains("google") == false && href.contains("youtube") == false) {
|
||||||
println(it.title)
|
targetUrls.add(href)
|
||||||
try {
|
println("add targetUrls $href")
|
||||||
jsopFilter(it.url!!).let { text ->
|
findCount += 1
|
||||||
it.originHtml = text
|
|
||||||
webPageSummarize(it, text)
|
|
||||||
}
|
|
||||||
} catch (e: Exception) {
|
|
||||||
e.printStackTrace()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}catch (e:Exception){
|
||||||
|
e.printStackTrace()
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
driver.close()
|
||||||
|
driver.quit()
|
||||||
}
|
}
|
||||||
|
options = ChromeOptions();
|
||||||
|
options.addArguments("--disable-popup-blocking");
|
||||||
|
options.addArguments("--disable-default-apps");
|
||||||
|
options.addArguments("--disable-notifications");
|
||||||
|
options.addArguments("--disable-blink-features=AutomationControlled");
|
||||||
|
RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver ->
|
||||||
|
targetUrls.forEach { url ->
|
||||||
|
var result = SearXngResult()
|
||||||
|
if (url?.length ?: 0 > 5 && url?.startsWith("https://") == true && readedUrls.contains(url) == false) {
|
||||||
|
readedUrls.add(url!!)
|
||||||
|
result.url = url!!
|
||||||
|
result.originQuery = query
|
||||||
|
try {
|
||||||
|
driver.get(url);
|
||||||
|
Thread.sleep(waitTime)
|
||||||
|
driver.findElement(By.ByTagName("Body"))?.let { webElement ->
|
||||||
|
if(webElement.text.length > 120) {
|
||||||
|
println(driver.currentUrl)
|
||||||
|
println(webElement.text)
|
||||||
|
result.title = driver.title
|
||||||
|
result.originHtml = webElement.text
|
||||||
|
webPageSummarize(result, webElement.text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
e.printStackTrace()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
driver.close();
|
||||||
|
driver.quit()
|
||||||
|
}
|
||||||
|
options = ChromeOptions();
|
||||||
|
options.addArguments("--disable-popup-blocking");
|
||||||
|
options.addArguments("--disable-default-apps");
|
||||||
|
options.addArguments("--disable-notifications");
|
||||||
|
options.addArguments("--disable-blink-features=AutomationControlled");
|
||||||
|
RemoteWebDriver(URL("https://video.lunaticbum.kr"), options).let { driver ->
|
||||||
|
querys.forEach { refinedQuery ->
|
||||||
|
var googleSCount = 0
|
||||||
|
RssFeedsParser().readFeed("https://news.google.com/rss/search?q=${URLEncoder.encode(query)}=ko&gl=KR&ceid=KR%3Ako/")?.messages?.forEach {
|
||||||
|
var url: String? = it.link
|
||||||
|
var result = SearXngResult()
|
||||||
|
println("url >>>> $url")
|
||||||
|
if (url?.length ?: 0 > 5 && url?.startsWith("https://") == true && readedUrls.contains(url) == false && googleSCount < topCount) {
|
||||||
|
readedUrls.add(url!!)
|
||||||
|
result.url = url!!
|
||||||
|
result.originQuery = query
|
||||||
|
result.refinedQuery = refinedQuery
|
||||||
|
result.title = it.title
|
||||||
|
println(result.title)
|
||||||
|
try {
|
||||||
|
driver.get(url);
|
||||||
|
Thread.sleep(waitTime)
|
||||||
|
println(driver.currentUrl)
|
||||||
|
driver.findElement(By.ByTagName("Body"))?.let { webElement ->
|
||||||
|
println(driver.currentUrl)
|
||||||
|
println(webElement.text)
|
||||||
|
result.title = driver.title
|
||||||
|
result.originHtml = webElement.text
|
||||||
|
webPageSummarize(result, webElement.text)
|
||||||
|
googleSCount += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
e.printStackTrace()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
driver.close()
|
||||||
|
driver.quit()
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (e:Exception){e.printStackTrace()}
|
||||||
|
|
||||||
|
querys.forEach { refinedQuery ->
|
||||||
|
val gSearch = "https://psn.lunaticbum.kr/search?q=${refinedQuery?.replace("오늘", SimpleDateFormat("yyyMMdd").format(Date()))}&language=ko&time_range=month&safesearch=0&categories=general&format=json"
|
||||||
|
println("gSearch >>> ${gSearch}")
|
||||||
|
WebClient.create().get()
|
||||||
|
.uri(gSearch)
|
||||||
|
.retrieve()
|
||||||
|
.bodyToMono(SearXng::class.java).timeout(Duration.ofMinutes(20L)).block()?.let { gsResult ->
|
||||||
|
gsResult.results?.filter { it.url?.startsWith("https://") == true && it.score > 0.4 }?.forEach {
|
||||||
|
println("in filter ${it.url}")
|
||||||
|
if (readedUrls.contains(it.url) == false) {
|
||||||
|
readedUrls.add(it.url!!)
|
||||||
|
it.originQuery = query
|
||||||
|
it.refinedQuery = refinedQuery
|
||||||
|
println(it.title)
|
||||||
|
try {
|
||||||
|
jsopFilter(it.url!!).let { text ->
|
||||||
|
it.originHtml = text
|
||||||
|
webPageSummarize(it, text)
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
e.printStackTrace()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
println("end of search")
|
println("end of search")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var format = "원문:\n'%s'\n원문의 웹 페이지 소스는 '%s'이 질문에 대해 연관 결과로 받은 내용이야. 해당 정보를 파악해서 본문 내용을 최대한 자세히 알려줘 'query:{질문},contents:{본문내용 한국어},summary:{100자 이하로 요약 한국어},keywords:[키워드],related_links:[링크],relatedness_score:{0.0~10.0}'이 형식의 결과만들어줘"
|
var format = "context:'%s'\ncontext는 웹 페이지 문자를 가져온 것 '%s'이 질문에 대해 연관 결과로 받은 내용임. 해당 context 정리 해서 본문 내용을 최대한 자세히 알려줘\n'{query:질문 내용, contents_ko:자세한 내용 한국어 , summary_ko:요약된 내용 한국어, keywords:[키워드], related_links:[{link,description}}], relatedness_score:0.0~10.0}'\n이 형식의 결과로 만들어 줘"
|
||||||
internal fun makeSummarizeRequestMsg(it : SearXngResult) : String= format.format(it.originHtml,it.originQuery)
|
internal fun makeSummarizeRequestMsg(it : SearXngResult) : String= format.format(it.originHtml,it.originQuery)
|
||||||
|
|
||||||
internal fun makeCahtReq(reqMsg:String) = OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(reqMsg).build())).build()
|
internal fun makeCahtReq(reqMsg:String) = OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(reqMsg.chunked(100).map { println(it); OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(it).build()}.toList()).build()
|
||||||
|
|
||||||
@Async
|
@Async
|
||||||
fun webPageSummarize(it : SearXngResult , text : String) {
|
fun webPageSummarize(it : SearXngResult , text : String) {
|
||||||
try {
|
try {
|
||||||
|
infomationDic.get(it.originQuery)!!.put(it.url!!, text)
|
||||||
val chatClient = OllamaApi("https://lama.lunaticbum.kr")
|
val chatClient = OllamaApi("https://lama.lunaticbum.kr")
|
||||||
val embeddingModel = OllamaEmbeddingModel(
|
val embeddingModel = OllamaEmbeddingModel(chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults())
|
||||||
chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults())
|
val embeddingResponse = embeddingModel.call(EmbeddingRequest(text.chunked(400).toList(), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build()))
|
||||||
println("text >>>>> ${text?.chunked(50)?.first() ?: ""}")
|
it.originHtml = text
|
||||||
var dispoable = chatClient.chat(makeCahtReq(makeSummarizeRequestMsg(it))).toMono().subscribe({aiResponce ->
|
val sdss = QPut(arrayListOf())
|
||||||
it.pageData = aiResponce.message.content
|
sdss.points.add(QData(id = System.currentTimeMillis(), embeddingResponse.result.output, it))
|
||||||
println("summary result >>>>> ${it.pageData}")
|
if (sdss.points.size > 0) {
|
||||||
val embeddingResponse = embeddingModel.call(
|
val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
||||||
EmbeddingRequest(
|
val client = WebClient.create()
|
||||||
listOf(aiResponce.message.content),
|
client.put()
|
||||||
OllamaOptions.builder()
|
.uri(qUrl)
|
||||||
.model(currentEmbedimg)
|
.header("api-key", "blama-admin-key-gb")
|
||||||
.truncate(false).build()
|
.body(BodyInserters.fromValue(Gson().toJson(sdss)))
|
||||||
|
.retrieve()
|
||||||
|
.bodyToMono(String::class.java).timeout(Duration.ofMinutes(20L)).subscribe(
|
||||||
|
{ resultString -> }, { error -> error.printStackTrace() }
|
||||||
)
|
)
|
||||||
)
|
}
|
||||||
infomationDic.put(it.url!!,aiResponce.message.content)
|
|
||||||
val sdss = QPut(arrayListOf())
|
|
||||||
sdss.points.add(QData(id = System.currentTimeMillis(),embeddingResponse.result.output,it))
|
|
||||||
if (sdss.points.size > 0) {
|
|
||||||
val qUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
|
||||||
val client = WebClient.create()
|
|
||||||
client.put()
|
|
||||||
.uri(qUrl)
|
|
||||||
.header("api-key", "blama-admin-key-gb")
|
|
||||||
.body(BodyInserters.fromValue(Gson().toJson(sdss)))
|
|
||||||
.retrieve()
|
|
||||||
.bodyToMono(String::class.java).timeout(Duration.ofMinutes(20L)).subscribe(
|
|
||||||
{resultString -> },{error-> error.printStackTrace()}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
},{err->
|
|
||||||
err.printStackTrace()
|
|
||||||
})
|
|
||||||
}catch (e : Exception) {
|
}catch (e : Exception) {
|
||||||
|
|
||||||
e.printStackTrace()
|
e.printStackTrace()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -281,10 +383,7 @@ class Lama {
|
|||||||
println(Gson().toJson(lists))
|
println(Gson().toJson(lists))
|
||||||
return if (lists?.result?.size ?: 0 > 0) {
|
return if (lists?.result?.size ?: 0 > 0) {
|
||||||
val qContents = QContentsList()
|
val qContents = QContentsList()
|
||||||
lists?.result?.forEach {
|
lists?.result?.filter { it.score > 8.0 }?.forEach { qContents.ids.add(it.id) }
|
||||||
|
|
||||||
qContents.ids.add(it.id)
|
|
||||||
}
|
|
||||||
val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
val qCUrl = "https://ollama.lunaticbum.kr/collections/blama_vectors".plus("/points")
|
||||||
val client2 = WebClient.create()
|
val client2 = WebClient.create()
|
||||||
client2.post()
|
client2.post()
|
||||||
@ -301,44 +400,44 @@ class Lama {
|
|||||||
@Autowired
|
@Autowired
|
||||||
lateinit var globalEvv : GlobalEnvironment
|
lateinit var globalEvv : GlobalEnvironment
|
||||||
|
|
||||||
var infomationDic = hashMapOf<String,String>()
|
var infomationDic = hashMapOf<String,HashMap<String,String>>()
|
||||||
suspend fun generateResponse(query: String?, targetId: String? = globalEvv.telegramMyId) {
|
suspend fun generateResponse(query: String?, targetId: String? = globalEvv.telegramMyId) {
|
||||||
infomationDic.clear()
|
|
||||||
val chatClient = OllamaApi("https://lama.lunaticbum.kr")
|
val chatClient = OllamaApi("https://lama.lunaticbum.kr")
|
||||||
val embeddingModel = OllamaEmbeddingModel(
|
val embeddingModel = OllamaEmbeddingModel(
|
||||||
chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults())
|
chatClient, OllamaOptions.builder().build(), ObservationRegistry.create(), ModelManagementOptions.defaults())
|
||||||
println("On generateResponse :: find something ${query}")
|
println("On generateResponse :: find something ${query}")
|
||||||
|
|
||||||
query?.let { originalQuery ->
|
query?.let { originalQuery ->
|
||||||
|
infomationDic.put(query!!, hashMapOf())
|
||||||
var embeddingResponse = embeddingModel.call(EmbeddingRequest(listOf(originalQuery), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build()))
|
var embeddingResponse = embeddingModel.call(EmbeddingRequest(listOf(originalQuery), OllamaOptions.builder().model(currentEmbedimg).truncate(false).build()))
|
||||||
addDocuments(originalQuery, querySummarize(originalQuery))
|
addDocuments(originalQuery, querySummarize(originalQuery))
|
||||||
println("points size ${embeddingResponse.result.output.size}")
|
println("points size ${embeddingResponse.result.output.size}")
|
||||||
var context : String? = ""
|
var context : StringBuffer = StringBuffer()
|
||||||
try {
|
try {
|
||||||
embedQuery(embeddingResponse.result.output)?.result?.forEach { result ->
|
embedQuery(embeddingResponse.result.output)?.result?.forEach { result ->
|
||||||
if (infomationDic.contains(result.payload?.url ?: "NONE") == false) {
|
if (infomationDic.get(query!!)!!.contains(result.payload?.url ?: "NONE") == false) {
|
||||||
context += "\n# :".plus(if (result.payload?.pageData?.length ?: 0 > 10) {
|
context.append("\n# :".plus(if (result.payload?.pageData?.length ?: 0 > 10) {
|
||||||
result.payload?.pageData
|
result.payload?.pageData
|
||||||
} else {
|
} else {
|
||||||
result.payload?.content
|
result.payload?.content
|
||||||
})
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}catch (e:Exception){
|
}catch (e:Exception){
|
||||||
e.printStackTrace()
|
e.printStackTrace()
|
||||||
}
|
}
|
||||||
|
|
||||||
infomationDic.iterator().forEach { context += "\n#${it.key}:${it.value}" }
|
infomationDic.get(query!!)!!.iterator().forEach { context.append("\n#${it.key}:${it.value}") }
|
||||||
|
|
||||||
|
val prompt : StringBuffer = StringBuffer().append("참조:\n").append(context).append("\n참조 내용을 고려 해서\n'$query'").append(query).append("\n에 {querys:[],answers:[],keywords:[],links:[]}형식으로 최대한 자세히 대답 해줘 ")
|
||||||
|
val fullUrl = "https://api.telegram.org/${globalEvv.telegramBotKey}/sendMessage"
|
||||||
|
|
||||||
val prompt = "참조:\n$context\n참조 내용을 고려해서\n해당 질문:${query}\n에 {질문내용:[한국어],답변내용:[한국어],전체키워드:[],참조링크:[]}형식으로 대답 해줘 ".trimIndent()
|
|
||||||
println(prompt)
|
|
||||||
val response: OllamaApi.ChatResponse = chatClient.chat(OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(
|
val response: OllamaApi.ChatResponse = chatClient.chat(OllamaApi.ChatRequest.Builder(currentLLM).stream(false).format("json").messages(
|
||||||
listOf(OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(prompt).build())
|
prompt.chunked(300).map { println(it); OllamaApi.Message.Builder(OllamaApi.Message.Role.USER).content(it).build()}.toList()).build())
|
||||||
).build())
|
// println(response.message.content)
|
||||||
|
|
||||||
println(response.message.content)
|
|
||||||
CoroutineScope(Dispatchers.IO).launch {
|
CoroutineScope(Dispatchers.IO).launch {
|
||||||
var toalmsg = "${query}의 대답이 도착했어요.\n${response.message.content}"
|
var toalmsg = "${query}의 대답이 도착했어요.\n${response.message.content}"
|
||||||
val fullUrl = "https://api.telegram.org/${globalEvv.telegramBotKey}/sendMessage"
|
|
||||||
toalmsg.chunked(512).forEach { chunkedMsg ->
|
toalmsg.chunked(512).forEach { chunkedMsg ->
|
||||||
println("fullUrl >>> ${fullUrl}")
|
println("fullUrl >>> ${fullUrl}")
|
||||||
(targetId ?: globalEvv.telegramMyId)?.let {
|
(targetId ?: globalEvv.telegramMyId)?.let {
|
||||||
@ -354,6 +453,7 @@ class Lama {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
infomationDic.remove(query!!)
|
||||||
}
|
}
|
||||||
println("On generateResponse :: END OF Answer")
|
println("On generateResponse :: END OF Answer")
|
||||||
}
|
}
|
||||||
|
|||||||
151
src/main/kotlin/kr/lunaticbum/back/lun/utils/RssFeedsParser.kt
Normal file
151
src/main/kotlin/kr/lunaticbum/back/lun/utils/RssFeedsParser.kt
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
package kr.lunaticbum.back.lun.utils
|
||||||
|
|
||||||
|
import java.io.IOException
|
||||||
|
import java.io.InputStream
|
||||||
|
import java.net.MalformedURLException
|
||||||
|
import java.net.URL
|
||||||
|
import javax.xml.stream.XMLEventReader
|
||||||
|
import javax.xml.stream.XMLInputFactory
|
||||||
|
import javax.xml.stream.XMLStreamException
|
||||||
|
import javax.xml.stream.events.Characters
|
||||||
|
import javax.xml.stream.events.XMLEvent
|
||||||
|
|
||||||
|
|
||||||
|
class FeedMessage {
|
||||||
|
var title: String? = null
|
||||||
|
var description: String? = null
|
||||||
|
var link: String? = null
|
||||||
|
var author: String? = null
|
||||||
|
var guid: String? = null
|
||||||
|
|
||||||
|
override fun toString(): String {
|
||||||
|
return ("FeedMessage [title=" + title + ", description=" + description
|
||||||
|
+ ", link=" + link + ", author=" + author + ", guid=" + guid
|
||||||
|
+ "]")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Feed(
|
||||||
|
val title: String, val link: String, val description: String, val language: String,
|
||||||
|
val copyright: String, val pubDate: String
|
||||||
|
) {
|
||||||
|
val messages: ArrayList<FeedMessage> = ArrayList()
|
||||||
|
|
||||||
|
override fun toString(): String {
|
||||||
|
return ("Feed [copyright=" + copyright + ", description=" + description
|
||||||
|
+ ", language=" + language + ", link=" + link + ", pubDate="
|
||||||
|
+ pubDate + ", title=" + title + "]")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class RssFeedsParser {
|
||||||
|
|
||||||
|
val TITLE: String = "title"
|
||||||
|
val DESCRIPTION: String = "description"
|
||||||
|
val CHANNEL: String = "channel"
|
||||||
|
val LANGUAGE: String = "language"
|
||||||
|
val COPYRIGHT: String = "copyright"
|
||||||
|
val LINK: String = "link"
|
||||||
|
val AUTHOR: String = "author"
|
||||||
|
val ITEM: String = "item"
|
||||||
|
val PUB_DATE: String = "pubDate"
|
||||||
|
val GUID: String = "guid"
|
||||||
|
|
||||||
|
var url: URL? = null
|
||||||
|
|
||||||
|
// fun parser(feedUrl: String?) {
|
||||||
|
// try {
|
||||||
|
// this.url = URL(feedUrl)
|
||||||
|
// } catch (e: MalformedURLException) {
|
||||||
|
// throw RuntimeException(e)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
fun readFeed(feedUrl: String?): Feed? {
|
||||||
|
try {
|
||||||
|
this.url = URL(feedUrl)
|
||||||
|
} catch (e: MalformedURLException) {
|
||||||
|
throw RuntimeException(e)
|
||||||
|
}
|
||||||
|
var feed: Feed? = null
|
||||||
|
try {
|
||||||
|
var isFeedHeader = true
|
||||||
|
// Set header values intial to the empty string
|
||||||
|
var description = ""
|
||||||
|
var title = ""
|
||||||
|
var link = ""
|
||||||
|
var language = ""
|
||||||
|
var copyright = ""
|
||||||
|
var author = ""
|
||||||
|
var pubdate = ""
|
||||||
|
var guid = ""
|
||||||
|
|
||||||
|
// First create a new XMLInputFactory
|
||||||
|
val inputFactory = XMLInputFactory.newInstance()
|
||||||
|
// create a new eventReader
|
||||||
|
val `in` = read()
|
||||||
|
val eventReader = inputFactory.createXMLEventReader(`in`)
|
||||||
|
// read the XML document
|
||||||
|
while (eventReader.hasNext()) {
|
||||||
|
var event = eventReader.nextEvent()
|
||||||
|
if (event.isStartElement) {
|
||||||
|
val localPart = event.asStartElement().name
|
||||||
|
.localPart
|
||||||
|
when (localPart) {
|
||||||
|
ITEM -> {
|
||||||
|
if (isFeedHeader) {
|
||||||
|
isFeedHeader = false
|
||||||
|
feed = Feed(
|
||||||
|
title!!, link!!, description!!, language!!,
|
||||||
|
copyright!!, pubdate!!
|
||||||
|
)
|
||||||
|
}
|
||||||
|
event = eventReader.nextEvent()
|
||||||
|
}
|
||||||
|
|
||||||
|
TITLE -> title = getCharacterData(event, eventReader)
|
||||||
|
DESCRIPTION -> description = getCharacterData(event, eventReader)
|
||||||
|
LINK -> link = getCharacterData(event, eventReader)
|
||||||
|
GUID -> guid = getCharacterData(event, eventReader)
|
||||||
|
LANGUAGE -> language = getCharacterData(event, eventReader)
|
||||||
|
AUTHOR -> author = getCharacterData(event, eventReader)
|
||||||
|
PUB_DATE -> pubdate = getCharacterData(event, eventReader)
|
||||||
|
COPYRIGHT -> copyright = getCharacterData(event, eventReader)
|
||||||
|
}
|
||||||
|
} else if (event.isEndElement) {
|
||||||
|
if (event.asEndElement().name.localPart === (ITEM)) {
|
||||||
|
val message = FeedMessage()
|
||||||
|
message.author = author
|
||||||
|
message.description = description
|
||||||
|
message.guid = guid
|
||||||
|
message.link = link
|
||||||
|
message.title = title
|
||||||
|
feed!!.messages.add(message)
|
||||||
|
event = eventReader.nextEvent()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e: XMLStreamException) {
|
||||||
|
throw RuntimeException(e)
|
||||||
|
}
|
||||||
|
return feed
|
||||||
|
}
|
||||||
|
|
||||||
|
@Throws(XMLStreamException::class)
|
||||||
|
private fun getCharacterData(event: XMLEvent, eventReader: XMLEventReader): String {
|
||||||
|
var event = event
|
||||||
|
var result = ""
|
||||||
|
event = eventReader.nextEvent()
|
||||||
|
if (event is Characters) {
|
||||||
|
result = event.asCharacters().data
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun read(): InputStream {
|
||||||
|
try {
|
||||||
|
return url!!.openStream()
|
||||||
|
} catch (e: IOException) {
|
||||||
|
throw RuntimeException(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user