Indexing Documents
Learn how to add, update, and manage documents in LumoSearch indexes.
Initial Indexing
Create an index by passing documents to the constructor:
import { LumoSearch } from '@lumosearch/search'
const docs = [
{ id: 1, title: 'JavaScript Guide', category: 'tutorials' },
{ id: 2, title: 'TypeScript Handbook', category: 'docs' },
{ id: 3, title: 'React Patterns', category: 'tutorials' }
]
const search = new LumoSearch(docs, {
keys: [{ name: 'title', weight: 3 }]
})
// Indexes are built automaticallyAdding Documents
Add new documents without rebuilding the entire index:
// Add a single document
search.add({
id: 4,
title: 'Node.js in Action',
category: 'tutorials'
})
// Document is indexed immediately
const results = search.search('node')
// => Returns the new documentRemoving Documents
By Index Position
// Remove document at position 0 search.removeAt(0) // Note: positions shift after removal // If you had [A, B, C], after removeAt(0): // [B, C] (B is now at position 0)
By Predicate
// Remove all archived documents
search.remove((doc) => doc.archived === true)
// Remove documents by category
search.remove((doc) => doc.category === 'deprecated')
// Remove old documents
search.remove((doc) => {
const docDate = new Date(doc.publishedAt)
const cutoff = new Date('2020-01-01')
return docDate < cutoff
})Replacing the Collection
Replace all documents and rebuild indexes:
const newDocs = [
{ title: 'Updated Doc 1' },
{ title: 'Updated Doc 2' }
]
// Replace entire collection
search.setCollection(newDocs)
// All indexes are rebuilt
// Previous documents are goneIncremental Updates Pattern
// Real-time updates from API
async function syncDocuments() {
const response = await fetch('/api/documents/recent')
const newDocs = await response.json()
newDocs.forEach(doc => {
// Check if exists
const existing = search.search(doc.id, {
filters: { id: doc.id },
limit: 1
})
if (existing.length > 0) {
// Update: remove old, add new
search.removeAt(existing[0].refIndex)
search.add(doc)
} else {
// New document
search.add(doc)
}
})
}
// Poll every minute
setInterval(syncDocuments, 60000)Batch Operations
// Add multiple documents
const newDocs = [
{ title: 'Doc 1' },
{ title: 'Doc 2' },
{ title: 'Doc 3' }
]
newDocs.forEach(doc => search.add(doc))
// Or for larger batches, use setCollection
const currentDocs = search.exportSnapshot().docs
const allDocs = [...currentDocs, ...newDocs]
search.setCollection(allDocs)Document ID Management
// Track documents by ID for easier updates
class SearchManager {
constructor(docs, options) {
this.search = new LumoSearch(docs, options)
this.idMap = new Map()
docs.forEach((doc, index) => {
this.idMap.set(doc.id, index)
})
}
addDocument(doc) {
this.search.add(doc)
const newIndex = this.search.exportSnapshot().docs.length - 1
this.idMap.set(doc.id, newIndex)
}
updateDocument(id, newDoc) {
const index = this.idMap.get(id)
if (index !== undefined) {
this.search.removeAt(index)
this.search.add(newDoc)
// Rebuild ID map
this.rebuildIdMap()
}
}
deleteDocument(id) {
const index = this.idMap.get(id)
if (index !== undefined) {
this.search.removeAt(index)
this.idMap.delete(id)
// Rebuild ID map
this.rebuildIdMap()
}
}
rebuildIdMap() {
const docs = this.search.exportSnapshot().docs
this.idMap.clear()
docs.forEach((doc, index) => {
this.idMap.set(doc.id, index)
})
}
search(query, options) {
return this.search.search(query, options)
}
}Document Requirements
- Documents must be plain JavaScript objects
- Fields specified in
keysmust exist - Field values should be strings or convertible to strings
- Nested fields are supported with dot notation
- Array fields are flattened and indexed as space-separated strings
Array Fields
const docs = [
{
title: 'JavaScript Tutorial',
tags: ['javascript', 'programming', 'web']
}
]
const search = new LumoSearch(docs, {
keys: [
{ name: 'title', weight: 3 },
{ name: 'tags', weight: 2 }
]
})
// Array is indexed as: "javascript programming web"
const results = search.search('programming')
// => Matches the document via tags arrayLarge Dataset Strategies
Lazy Loading
// Load initial subset
const initialDocs = await fetchDocuments({ limit: 1000 })
const search = new LumoSearch(initialDocs, { keys: ['title'] })
// Load more on demand
async function loadMore() {
const moreDocs = await fetchDocuments({ offset: 1000, limit: 1000 })
moreDocs.forEach(doc => search.add(doc))
}Pagination
// For very large datasets, fetch and index in chunks
async function indexAllDocuments() {
const PAGE_SIZE = 500
let page = 0
let allDocs = []
while (true) {
const docs = await fetchDocuments({
offset: page * PAGE_SIZE,
limit: PAGE_SIZE
})
if (docs.length === 0) break
allDocs = [...allDocs, ...docs]
page++
// Show progress
console.log(`Indexed ${allDocs.length} documents`)
}
const search = new LumoSearch(allDocs, {
keys: ['title', 'body']
})
return search
}Performance Tips
- Use
add()for single documents — it's fast - For bulk additions (>100 docs), use
setCollection() - Avoid frequent rebuilds — batch your updates
- Keep document objects lean — only include searchable fields
- For huge datasets (>100k docs), consider web workers
Handling Updates
// WebSocket updates
socket.on('document:created', (doc) => {
search.add(doc)
})
socket.on('document:updated', ({ id, data }) => {
const results = search.search(id, {
filters: { id },
limit: 1
})
if (results.length > 0) {
search.removeAt(results[0].refIndex)
search.add(data)
}
})
socket.on('document:deleted', ({ id }) => {
const results = search.search(id, {
filters: { id },
limit: 1
})
if (results.length > 0) {
search.removeAt(results[0].refIndex)
}
})Note: Index positions change after removals. Always search for the current position before removing by ID. Or use the SearchManager pattern above for easier ID-based management.