Initial Indexing

Create an index by passing documents to the constructor:

import { LumoSearch } from '@lumosearch/search'

const docs = [
  { id: 1, title: 'JavaScript Guide', category: 'tutorials' },
  { id: 2, title: 'TypeScript Handbook', category: 'docs' },
  { id: 3, title: 'React Patterns', category: 'tutorials' }
]

const search = new LumoSearch(docs, {
  keys: [{ name: 'title', weight: 3 }]
})

// Indexes are built automatically

Adding Documents

Add new documents without rebuilding the entire index:

// Add a single document
search.add({
  id: 4,
  title: 'Node.js in Action',
  category: 'tutorials'
})

// Document is indexed immediately
const results = search.search('node')
// => Returns the new document

Removing Documents

By Index Position

// Remove document at position 0
search.removeAt(0)

// Note: positions shift after removal
// If you had [A, B, C], after removeAt(0):
// [B, C] (B is now at position 0)

By Predicate

// Remove all archived documents
search.remove((doc) => doc.archived === true)

// Remove documents by category
search.remove((doc) => doc.category === 'deprecated')

// Remove old documents
search.remove((doc) => {
  const docDate = new Date(doc.publishedAt)
  const cutoff = new Date('2020-01-01')
  return docDate < cutoff
})

Replacing the Collection

Replace all documents and rebuild indexes:

const newDocs = [
  { title: 'Updated Doc 1' },
  { title: 'Updated Doc 2' }
]

// Replace entire collection
search.setCollection(newDocs)

// All indexes are rebuilt
// Previous documents are gone

Incremental Updates Pattern

// Real-time updates from API
async function syncDocuments() {
  const response = await fetch('/api/documents/recent')
  const newDocs = await response.json()

  newDocs.forEach(doc => {
    // Check if exists
    const existing = search.search(doc.id, {
      filters: { id: doc.id },
      limit: 1
    })

    if (existing.length > 0) {
      // Update: remove old, add new
      search.removeAt(existing[0].refIndex)
      search.add(doc)
    } else {
      // New document
      search.add(doc)
    }
  })
}

// Poll every minute
setInterval(syncDocuments, 60000)

Batch Operations

// Add multiple documents
const newDocs = [
  { title: 'Doc 1' },
  { title: 'Doc 2' },
  { title: 'Doc 3' }
]

newDocs.forEach(doc => search.add(doc))

// Or for larger batches, use setCollection
const currentDocs = search.exportSnapshot().docs
const allDocs = [...currentDocs, ...newDocs]
search.setCollection(allDocs)

Document ID Management

// Track documents by ID for easier updates
class SearchManager {
  constructor(docs, options) {
    this.search = new LumoSearch(docs, options)
    this.idMap = new Map()
    docs.forEach((doc, index) => {
      this.idMap.set(doc.id, index)
    })
  }

  addDocument(doc) {
    this.search.add(doc)
    const newIndex = this.search.exportSnapshot().docs.length - 1
    this.idMap.set(doc.id, newIndex)
  }

  updateDocument(id, newDoc) {
    const index = this.idMap.get(id)
    if (index !== undefined) {
      this.search.removeAt(index)
      this.search.add(newDoc)
      // Rebuild ID map
      this.rebuildIdMap()
    }
  }

  deleteDocument(id) {
    const index = this.idMap.get(id)
    if (index !== undefined) {
      this.search.removeAt(index)
      this.idMap.delete(id)
      // Rebuild ID map
      this.rebuildIdMap()
    }
  }

  rebuildIdMap() {
    const docs = this.search.exportSnapshot().docs
    this.idMap.clear()
    docs.forEach((doc, index) => {
      this.idMap.set(doc.id, index)
    })
  }

  search(query, options) {
    return this.search.search(query, options)
  }
}

Document Requirements

Documents must be plain JavaScript objects
Fields specified in keys must exist
Field values should be strings or convertible to strings
Nested fields are supported with dot notation
Array fields are flattened and indexed as space-separated strings

Array Fields

const docs = [
  {
    title: 'JavaScript Tutorial',
    tags: ['javascript', 'programming', 'web']
  }
]

const search = new LumoSearch(docs, {
  keys: [
    { name: 'title', weight: 3 },
    { name: 'tags', weight: 2 }
  ]
})

// Array is indexed as: "javascript programming web"
const results = search.search('programming')
// => Matches the document via tags array

Large Dataset Strategies

Lazy Loading

// Load initial subset
const initialDocs = await fetchDocuments({ limit: 1000 })
const search = new LumoSearch(initialDocs, { keys: ['title'] })

// Load more on demand
async function loadMore() {
  const moreDocs = await fetchDocuments({ offset: 1000, limit: 1000 })
  moreDocs.forEach(doc => search.add(doc))
}

Pagination

// For very large datasets, fetch and index in chunks
async function indexAllDocuments() {
  const PAGE_SIZE = 500
  let page = 0
  let allDocs = []

  while (true) {
    const docs = await fetchDocuments({
      offset: page * PAGE_SIZE,
      limit: PAGE_SIZE
    })

    if (docs.length === 0) break

    allDocs = [...allDocs, ...docs]
    page++

    // Show progress
    console.log(`Indexed ${allDocs.length} documents`)
  }

  const search = new LumoSearch(allDocs, {
    keys: ['title', 'body']
  })

  return search
}

Performance Tips

Use add() for single documents — it's fast
For bulk additions (>100 docs), use setCollection()
Avoid frequent rebuilds — batch your updates
Keep document objects lean — only include searchable fields
For huge datasets (>100k docs), consider web workers

Handling Updates

// WebSocket updates
socket.on('document:created', (doc) => {
  search.add(doc)
})

socket.on('document:updated', ({ id, data }) => {
  const results = search.search(id, {
    filters: { id },
    limit: 1
  })

  if (results.length > 0) {
    search.removeAt(results[0].refIndex)
    search.add(data)
  }
})

socket.on('document:deleted', ({ id }) => {
  const results = search.search(id, {
    filters: { id },
    limit: 1
  })

  if (results.length > 0) {
    search.removeAt(results[0].refIndex)
  }
})

Note: Index positions change after removals. Always search for the current position before removing by ID. Or use the SearchManager pattern above for easier ID-based management.

Search API →

Query your indexed documents with filters and scoring

Weighted Fields →

Configure which fields matter most for ranking

Indexing Documents