Skip to content

Commit

Permalink
feat(compiler): add support for browsing URLs
Browse files Browse the repository at this point in the history
This commit introduces a new command to browse URLs and retrieve their content using the BrowseTool.

- Added BrowseInsCommand class to execute the browsing command
- Implemented execute method to parse the URL and fetch the content
- Updated ShireCompiler to process the new Browse command
- Added DocumentCleaner and DocumentContent classes for HTML parsing and cleaning
- Updated ShireCompiledResult to handle the browsing output
  • Loading branch information
phodal committed May 31, 2024
1 parent 8f5ca99 commit 3b63e52
Show file tree
Hide file tree
Showing 29 changed files with 1,291 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.phodal.shirelang.agenttool

import com.intellij.openapi.project.Project

class AgentToolContext(
val project: Project,
val argument: String
) {

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package com.phodal.shirelang.agenttool

data class AgentToolResult(
val isSuccess: Boolean,
val output: String? = null
) {

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.phodal.shirelang.agenttool.browse

import com.phodal.shirelang.agenttool.AgentToolContext
import com.phodal.shirelang.provider.AgentTool
import com.phodal.shirelang.agenttool.AgentToolResult
import org.jsoup.Jsoup
import org.jsoup.nodes.Document

class BrowseTool : AgentTool {
override val name: String get() = "Browse"
override val description: String = "Get the content of a given URL."

override fun execute(context: AgentToolContext): AgentToolResult {
return AgentToolResult(
isSuccess = true,
output = parse(context.argument).body
)
}

companion object {
/**
* Doc for parseHtml
*
* Intellij API: [com.intellij.inspectopedia.extractor.utils.HtmlUtils.cleanupHtml]
*/
fun parse(url: String): DocumentContent {
val doc: Document = Jsoup.connect(url).get()
return DocumentCleaner().cleanHtml(doc)
}
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package com.phodal.shirelang.agenttool.browse

import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element

class DocumentCleaner {
fun cleanHtml(html: String): DocumentContent {
val doc = Jsoup.parse(html)
return cleanHtml(doc)
}

fun cleanHtml(doc: Document): DocumentContent {
return DocumentContent(
title = doc.title(),
language = metaContent(doc, "http-equiv", "Content-Language"),
description = metaDescription(doc),
body = articleNode(doc)
)
}

fun metaDescription(doc: Document): String? {
val attributes = arrayOf(arrayOf("property", "description"), arrayOf("name", "description"))
return attributes
.asSequence()
.mapNotNull { (key, value) -> metaContent(doc, key, value) }
.firstOrNull()
}

fun metaContent(doc: Document, key: String, value: String): String? {
val metaElements = doc.select("head meta[$key=$value]")
return metaElements
.map { it.attr("content").trim() }
.firstOrNull { it.isNotEmpty() }
}

val ARTICLE_BODY_ATTR: Array<Pair<String, String>> = arrayOf(
Pair("itemprop", "articleBody"),
Pair("data-testid", "article-body"),
Pair("name", "articleBody")
)

fun articleNode(doc: Document): String? {
var bodyElement: Element? = doc.select("html").select("body").first()
val firstBodyElement = bodyElement ?: return null
// the Microdata
for ((attr, value) in ARTICLE_BODY_ATTR) {
bodyElement = doc.selectFirst("[$attr=$value]")
if (bodyElement != null) {
return bodyElement.text()
}
}

return trySelectBestCode(firstBodyElement)
}

private fun trySelectBestCode(doc: Element): String {
val commonBestNodes = doc.select("article, main, #main, #content, #doc-content, #contents, .book-body")
if (commonBestNodes.isNotEmpty()) {
return commonBestNodes.first()?.text() ?: ""
}

return doc.text()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.phodal.shirelang.agenttool.browse

data class DocumentContent(
val title: String?,
val language: String?,
val description: String?,
val body: String?
) {
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.intellij.openapi.editor.Editor
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiFile
import com.intellij.psi.PsiNameIdentifierOwner
import com.phodal.shirelang.completion.provider.CustomVariable
import org.apache.velocity.VelocityContext
import org.apache.velocity.app.Velocity
import java.io.StringWriter
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.phodal.shirelang.compiler

import com.phodal.shirelang.agenttool.browse.BrowseTool
import com.phodal.shirelang.compiler.exec.InsCommand
import com.intellij.openapi.application.runInEdt
import com.intellij.openapi.project.Project

class BrowseInsCommand(val myProject: Project, private val prop: String) : InsCommand {
override suspend fun execute(): String? {
var body: String? = null
runInEdt {
val parse = BrowseTool.parse(prop)
body = parse.body
}

return body
}
}

Loading

0 comments on commit 3b63e52

Please sign in to comment.