Fast HTML parser for Swift. 12x faster than SwiftSoup, 295x faster than NSAttributedString.
Built on lexbor v2.6.0. Parses HTML into immutable Swift value types. Why SwiftLexbor over WKWebView, NSAttributedString, and others? →
| Parser | Time | Relative |
|---|---|---|
| SwiftLexbor | 312 µs | 1x |
| JustHTML | 3.66 ms | 12x slower |
| SwiftSoup | 3.83 ms | 12x slower |
| BonMot | 4.14 ms | 13x slower |
| NSAttributedString | 91.9 ms | 295x slower |
Apple M4 Max, macOS 26.2, Swift 6.2, release build. Full results →
import SwiftLexbor
let doc = SwiftLexbor.parseFragment("<p>Hello <b>world</b></p>")
for node in doc.children {
switch node {
case .element(let el): print(el.tagName, el.textContent)
case .text(let text): print(text)
case .comment: break
}
}Add to your Package.swift:
dependencies: [
.package(url: "https://github.com/AgapovOne/swift-lexbor.git", from: "0.1.0"),
],
targets: [
.target(name: "YourTarget", dependencies: [
.product(name: "SwiftLexbor", package: "swift-lexbor"),
]),
]Direct access to C API
If you need the raw lexbor C API without the Swift wrapper:
dependencies: [
.product(name: "CLexbor", package: "swift-lexbor"),
]import CLexbor
let doc = lxb_html_document_create()!
defer { _ = lxb_html_document_destroy(doc) }
let html = "<p>Hello</p>"
let bytes = Array(html.utf8)
bytes.withUnsafeBufferPointer { buffer in
_ = lxb_html_document_parse(doc, buffer.baseAddress, buffer.count)
}// Fragment — no html/head/body wrappers
let doc = SwiftLexbor.parseFragment("<div><p>text</p></div>")
// Full document — includes html/head/body
let fullDoc = SwiftLexbor.parse("<html><body><p>text</p></body></html>")struct TextExtractor: HTMLVisitor {
typealias Result = String?
func visitText(_ text: String) -> String? { text }
func visitElement(_ element: HTMLElement) -> String? { element.textContent }
}
let texts = doc.accept(visitor: TextExtractor())Full HTMLVisitor protocol
protocol HTMLVisitor {
associatedtype Result
func visitHeading(_ element: HTMLElement, level: Int) -> Result
func visitParagraph(_ element: HTMLElement) -> Result
func visitLink(_ element: HTMLElement, href: String?) -> Result
func visitList(_ element: HTMLElement, ordered: Bool) -> Result
func visitListItem(_ element: HTMLElement) -> Result
func visitBlockquote(_ element: HTMLElement) -> Result
func visitCodeBlock(_ element: HTMLElement) -> Result
func visitTable(_ element: HTMLElement) -> Result
func visitBold(_ element: HTMLElement) -> Result
func visitItalic(_ element: HTMLElement) -> Result
func visitCode(_ element: HTMLElement) -> Result
func visitUnderline(_ element: HTMLElement) -> Result
func visitStrikethrough(_ element: HTMLElement) -> Result
func visitSubscript(_ element: HTMLElement) -> Result
func visitSuperscript(_ element: HTMLElement) -> Result
func visitImage(_ element: HTMLElement, src: String?, alt: String?) -> Result
func visitLineBreak() -> Result
func visitHorizontalRule() -> Result
func visitText(_ text: String) -> Result
func visitComment(_ text: String) -> Result
func visitElement(_ element: HTMLElement) -> Result
}All methods have default implementations. visitElement is the catch-all fallback. Semantic methods (visitHeading, visitParagraph, etc.) delegate to visitElement by default.
Inline formatting methods: visitBold (b/strong), visitItalic (i/em), visitCode (code), visitUnderline (u/ins), visitStrikethrough (s/del/strike), visitSubscript (sub), visitSuperscript (sup), visitImage (img), visitLineBreak (br).
let doc = SwiftLexbor.parseFragment("<p>Hello <b>world</b></p>")
let html = HTMLSerializer.serialize(doc) // "<p>Hello <b>world</b></p>"Handles void elements, boolean attributes, HTML entity escaping, and sorted attributes for deterministic output.
HTMLDocument and HTMLElement conform to Sequence, so you can iterate directly:
let doc = SwiftLexbor.parseFragment("<p>one</p><p>two</p>")
for node in doc {
if case .element(let el) = node {
print(el.tagName) // "p", "p"
}
}
// Use map, filter, first(where:), etc.
let tags = doc.compactMap { node -> String? in
guard case .element(let el) = node else { return nil }
return el.tagName
}AttributedString visitor example
Converts HTML to AttributedString. Copy and customize for your needs:
import Foundation
struct AttributedStringBuilder: HTMLVisitor {
typealias Result = AttributedString
func visitText(_ text: String) -> AttributedString {
AttributedString(text)
}
func visitElement(_ element: HTMLElement) -> AttributedString {
element.children.map { $0.accept(visitor: self) }.reduce(AttributedString(), +)
}
func visitBold(_ element: HTMLElement) -> AttributedString {
var result = visitElement(element)
result.inlinePresentationIntent = .stronglyEmphasized
return result
}
func visitItalic(_ element: HTMLElement) -> AttributedString {
var result = visitElement(element)
result.inlinePresentationIntent = .emphasized
return result
}
func visitCode(_ element: HTMLElement) -> AttributedString {
var result = visitElement(element)
result.inlinePresentationIntent = .code
return result
}
func visitStrikethrough(_ element: HTMLElement) -> AttributedString {
var result = visitElement(element)
result.strikethroughStyle = .single
return result
}
func visitLink(_ element: HTMLElement, href: String?) -> AttributedString {
var result = visitElement(element)
if let href, let url = URL(string: href) {
result.link = url
}
return result
}
func visitLineBreak() -> AttributedString {
AttributedString("\n")
}
func visitParagraph(_ element: HTMLElement) -> AttributedString {
visitElement(element) + AttributedString("\n\n")
}
}
// Usage:
let doc = SwiftLexbor.parseFragment("<p>Hello <b>world</b> and <a href=\"https://example.com\">link</a></p>")
let builder = AttributedStringBuilder()
let attributed = doc.children.map { $0.accept(visitor: builder) }.reduce(AttributedString(), +)SwiftUI example
Parse HTML once, render the AST in SwiftUI.
let document = SwiftLexbor.parseFragment(html)
HTMLDocView(document)Full working example with headings, blockquotes, inline formatting, and tappable links → Example/
AST types reference
All types are Equatable, Hashable, and Sendable.
struct HTMLDocument { let children: [HTMLNode] }
enum HTMLNode {
case element(HTMLElement)
case text(String)
case comment(String)
}
struct HTMLElement {
let tagName: String
let attributes: [String: String]
let children: [HTMLNode]
var textContent: String { get }
}script,style,templatetags are skipped- HTML entities are decoded (
&->&) - Invalid HTML is handled via lexbor's error recovery
- Boolean attributes have empty string value (
disabled->""). UsehasAttribute(_:)to check presence.
Run locally:
swift run --package-path Benchmarks -c releaseSee Benchmarks/ for full results and methodology.
- Swift 5.10+
- iOS 13+ / macOS 10.15+
lexbor is licensed under the Apache License 2.0. The Swift wrapper follows the same license.