generated from JetBrains/intellij-platform-plugin-template
-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(compiler): add support for browsing URLs
This commit introduces a new command to browse URLs and retrieve their content using the BrowseTool. - Added BrowseInsCommand class to execute the browsing command - Implemented execute method to parse the URL and fetch the content - Updated ShireCompiler to process the new Browse command - Added DocumentCleaner and DocumentContent classes for HTML parsing and cleaning - Updated ShireCompiledResult to handle the browsing output
- Loading branch information
Showing
29 changed files
with
1,291 additions
and
2 deletions.
There are no files selected for viewing
10 changes: 10 additions & 0 deletions
10
language/src/main/kotlin/com/phodal/shirelang/agenttool/AgentToolContext.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package com.phodal.shirelang.agenttool | ||
|
||
import com.intellij.openapi.project.Project | ||
|
||
class AgentToolContext( | ||
val project: Project, | ||
val argument: String | ||
) { | ||
|
||
} |
8 changes: 8 additions & 0 deletions
8
language/src/main/kotlin/com/phodal/shirelang/agenttool/AgentToolResult.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package com.phodal.shirelang.agenttool | ||
|
||
data class AgentToolResult( | ||
val isSuccess: Boolean, | ||
val output: String? = null | ||
) { | ||
|
||
} |
32 changes: 32 additions & 0 deletions
32
language/src/main/kotlin/com/phodal/shirelang/agenttool/browse/BrowseTool.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package com.phodal.shirelang.agenttool.browse | ||
|
||
import com.phodal.shirelang.agenttool.AgentToolContext | ||
import com.phodal.shirelang.provider.AgentTool | ||
import com.phodal.shirelang.agenttool.AgentToolResult | ||
import org.jsoup.Jsoup | ||
import org.jsoup.nodes.Document | ||
|
||
class BrowseTool : AgentTool { | ||
override val name: String get() = "Browse" | ||
override val description: String = "Get the content of a given URL." | ||
|
||
override fun execute(context: AgentToolContext): AgentToolResult { | ||
return AgentToolResult( | ||
isSuccess = true, | ||
output = parse(context.argument).body | ||
) | ||
} | ||
|
||
companion object { | ||
/** | ||
* Doc for parseHtml | ||
* | ||
* Intellij API: [com.intellij.inspectopedia.extractor.utils.HtmlUtils.cleanupHtml] | ||
*/ | ||
fun parse(url: String): DocumentContent { | ||
val doc: Document = Jsoup.connect(url).get() | ||
return DocumentCleaner().cleanHtml(doc) | ||
} | ||
} | ||
} | ||
|
65 changes: 65 additions & 0 deletions
65
language/src/main/kotlin/com/phodal/shirelang/agenttool/browse/DocumentCleaner.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package com.phodal.shirelang.agenttool.browse | ||
|
||
import org.jsoup.Jsoup | ||
import org.jsoup.nodes.Document | ||
import org.jsoup.nodes.Element | ||
|
||
class DocumentCleaner { | ||
fun cleanHtml(html: String): DocumentContent { | ||
val doc = Jsoup.parse(html) | ||
return cleanHtml(doc) | ||
} | ||
|
||
fun cleanHtml(doc: Document): DocumentContent { | ||
return DocumentContent( | ||
title = doc.title(), | ||
language = metaContent(doc, "http-equiv", "Content-Language"), | ||
description = metaDescription(doc), | ||
body = articleNode(doc) | ||
) | ||
} | ||
|
||
fun metaDescription(doc: Document): String? { | ||
val attributes = arrayOf(arrayOf("property", "description"), arrayOf("name", "description")) | ||
return attributes | ||
.asSequence() | ||
.mapNotNull { (key, value) -> metaContent(doc, key, value) } | ||
.firstOrNull() | ||
} | ||
|
||
fun metaContent(doc: Document, key: String, value: String): String? { | ||
val metaElements = doc.select("head meta[$key=$value]") | ||
return metaElements | ||
.map { it.attr("content").trim() } | ||
.firstOrNull { it.isNotEmpty() } | ||
} | ||
|
||
val ARTICLE_BODY_ATTR: Array<Pair<String, String>> = arrayOf( | ||
Pair("itemprop", "articleBody"), | ||
Pair("data-testid", "article-body"), | ||
Pair("name", "articleBody") | ||
) | ||
|
||
fun articleNode(doc: Document): String? { | ||
var bodyElement: Element? = doc.select("html").select("body").first() | ||
val firstBodyElement = bodyElement ?: return null | ||
// the Microdata | ||
for ((attr, value) in ARTICLE_BODY_ATTR) { | ||
bodyElement = doc.selectFirst("[$attr=$value]") | ||
if (bodyElement != null) { | ||
return bodyElement.text() | ||
} | ||
} | ||
|
||
return trySelectBestCode(firstBodyElement) | ||
} | ||
|
||
private fun trySelectBestCode(doc: Element): String { | ||
val commonBestNodes = doc.select("article, main, #main, #content, #doc-content, #contents, .book-body") | ||
if (commonBestNodes.isNotEmpty()) { | ||
return commonBestNodes.first()?.text() ?: "" | ||
} | ||
|
||
return doc.text() | ||
} | ||
} |
9 changes: 9 additions & 0 deletions
9
language/src/main/kotlin/com/phodal/shirelang/agenttool/browse/DocumentContent.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package com.phodal.shirelang.agenttool.browse | ||
|
||
data class DocumentContent( | ||
val title: String?, | ||
val language: String?, | ||
val description: String?, | ||
val body: String? | ||
) { | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
language/src/main/kotlin/com/phodal/shirelang/compiler/BrowseInsCommand.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
package com.phodal.shirelang.compiler | ||
|
||
import com.phodal.shirelang.agenttool.browse.BrowseTool | ||
import com.phodal.shirelang.compiler.exec.InsCommand | ||
import com.intellij.openapi.application.runInEdt | ||
import com.intellij.openapi.project.Project | ||
|
||
class BrowseInsCommand(val myProject: Project, private val prop: String) : InsCommand { | ||
override suspend fun execute(): String? { | ||
var body: String? = null | ||
runInEdt { | ||
val parse = BrowseTool.parse(prop) | ||
body = parse.body | ||
} | ||
|
||
return body | ||
} | ||
} | ||
|
Oops, something went wrong.