Data Structures and Algorithms. Part 5

Hello, friends!

In this series of articles, we examine the data structures and algorithms presented in this wonderful repositoryThis is the fifth part of the series.

Today we will look at the disjoint set system, Bloom filter and live data cache.

The code presented in this and other articles in the series can be found at in this repository.

Interesting? Then please read on.

15. System of disjoint sets

Description

A disjoint set system (also called a union-find data structure or merge-find set) is a data structure that contains a set of elements divided into a number of disjoint subsets. It provides nearly constant running time (bounded inverse Ackermann function) adding new sets, merging existing sets, and determining whether elements belong to the same set. Among other things, the CHM plays a key role in Kruskal's algorithm for constructing a minimum spanning tree of a weighted connected undirected graph.

Main operations of the SNM:

MakeSet(x) – creates a singleton set {x}
Find(x) — returns the identifier of the set (selected or representative element) containing the element x
Union(x, y) — unites sets containing x And y

8 disjoint sets created with MakeSet

Grouping Sets with Union

Implementation

Let's start with the implementation of the constructor of the selected set element:

// data-structures/disjoin-set/item.js
export default class Item {
  constructor(value, keyCb) {
    // Значение
    this.value = value
    // Кастомная функция извлечения ключа
    this.keyCb = keyCb
    // Родительский узел
    this.parent = null
    // Дочерние узлы
    this.children = {}
  }
}

Methods to get the value of a node, the root node, and determine whether a node is a root:

// Возвращает ключ (значение)
getKey() {
  if (this.keyCb) {
    return this.keyCb(this.value)
  }
  return this.value
}

// Возвращает корневой узел
getRoot() {
  return this.isRoot() ? this : this.parent.getRoot()
}

// Определяет, является ли узел корневым
isRoot() {
  return this.parent === null
}

Methods for getting the rank and descendants of a node:

// Возвращает ранг (вес) узла
getRank() {
  const children = this.getChildren()

  if (children.length === 0) {
    return 0
  }

  let rank = 0
  for (const child of children) {
    rank += 1
    rank += child.getRank()
  }
  return rank
}

// Возвращает потомков
getChildren() {
  return Object.values(this.children)
}

Methods for setting parent and adding child node:

// Устанавливает предка
setParent(parent, forceSettingParentChild = true) {
  this.parent = parent

  if (forceSettingParentChild) {
    parent.addChild(this)
  }

  return this
}

// Добавляет потомка
addChild(child) {
  this.children[child.getKey()] = child
  child.setParent(this, false)

  return this
}

Full node code:

export default class Item {
  constructor(value, keyCb) {
    // Значение
    this.value = value
    // Кастомная функция извлечения ключа
    this.keyCb = keyCb
    // Родительский узел
    this.parent = null
    // Дочерние узлы
    this.children = {}
  }

  // Возвращает ключ (значение)
  getKey() {
    if (this.keyCb) {
      return this.keyCb(this.value)
    }
    return this.value
  }

  // Возвращает корневой узел
  getRoot() {
    return this.isRoot() ? this : this.parent.getRoot()
  }

  // Определяет, является ли узел корневым
  isRoot() {
    return this.parent === null
  }

  // Возвращает ранг (вес) узла
  getRank() {
    const children = this.getChildren()

    if (children.length === 0) {
      return 0
    }

    let rank = 0
    for (const child of children) {
      rank += 1
      rank += child.getRank()
    }
    return rank
  }

  // Возвращает потомков
  getChildren() {
    return Object.values(this.children)
  }

  // Устанавливает предка
  setParent(parent, forceSettingParentChild = true) {
    this.parent = parent

    if (forceSettingParentChild) {
      parent.addChild(this)
    }

    return this
  }

  // Добавляет потомка
  addChild(child) {
    this.children[child.getKey()] = child
    child.setParent(this, false)

    return this
  }
}

Let's start implementing the SNM:

// data-structures/disjoin-set/index.js
import Item from './item'

export default class DisjointSet {
  constructor(cb) {
    // Кастомная функция извлечения ключа (значения) узла
    this.cb = cb
    // Непересекающиеся подмножества
    this.items = {}
  }
}

Subset creation method:

// Создает подмножество
makeSet(value) {
  // Создаем выделенный элемент
  const item = new Item(value, this.cb)

  // Добавляем подмножество в список
  if (!this.items[item.getKey()]) {
    this.items[item.getKey()] = item
  }

  return this
}

Method for searching for a selected element:

// Ищет выделенный элемент
find(value) {
  const temp = new Item(value, this.cb)
  const item = this.items[temp.getKey()]
  return item ? item.getRoot().getKey() : null
}

Method of combining two subsets:

// Объединяет подмножества
union(value1, value2) {
  const root1 = this.find(value1)
  const root2 = this.find(value2)

  if (!root1 || !root2) {
    throw new Error('Одно или оба значения отсутствуют!')
  }

  if (root1 === root2) {
    return this
  }

  const item1 = this.items[root1]
  const item2 = this.items[root2]

  // Определяем, какое подмножество имеет больший ранг.
  // Подмножество с меньшим рангом становится потомком подмножества с большим рангом
  if (item1.getRank() < item2.getRank()) {
    item2.addChild(item1)
    return this
  }

  item1.addChild(item2)
  return this
}

Method for determining whether two elements belong to the same set:

// Определяет, принадлежат ли значения к одному множеству
isSameSet(value1, value2) {
  const root1 = this.find(value1)
  const root2 = this.find(value2)

  if (!root1 || !root2) {
    throw new Error('Одно или оба значения отсутствуют!')
  }

  return root1 === root2
}

Full SNM code:

import Item from './item'

export default class DisjointSet {
  constructor(cb) {
    // Кастомная функция извлечения ключа (значения) узла
    this.cb = cb
    // Непересекающиеся подмножества
    this.items = {}
  }

  // Создает подмножество
  makeSet(value) {
    // Создаем выделенный элемент
    const item = new Item(value, this.cb)

    // Добавляем подмножество в список
    if (!this.items[item.getKey()]) {
      this.items[item.getKey()] = item
    }

    return this
  }

  // Ищет выделенный элемент
  find(value) {
    const temp = new Item(value, this.cb)
    const item = this.items[temp.getKey()]
    return item ? item.getRoot().getKey() : null
  }

  // Объединяет подмножества
  union(value1, value2) {
    const root1 = this.find(value1)
    const root2 = this.find(value2)

    if (!root1 || !root2) {
      throw new Error('Одно или оба значения отсутствуют!')
    }

    if (root1 === root2) {
      return this
    }

    const item1 = this.items[root1]
    const item2 = this.items[root2]

    // Определяем, какое подмножество имеет больший ранг.
    // Подмножество с меньшим рангом становится потомком подмножества с большим рангом
    if (item1.getRank() < item2.getRank()) {
      item2.addChild(item1)
      return this
    }

    item1.addChild(item2)
    return this
  }

  // Определяет, принадлежат ли значения к одному множеству
  isSameSet(value1, value2) {
    const root1 = this.find(value1)
    const root2 = this.find(value2)

    if (!root1 || !root2) {
      throw new Error('Одно или оба значения отсутствуют!')
    }

    return root1 === root2
  }
}

A standalone SNM (without additional dependencies) can be implemented as follows:

export default class DisjointSetAdHoc {
  constructor(size) {
    this.roots = new Array(size).fill(0).map((_, i) => i)
    this.heights = new Array(size).fill(1)
  }

  find(a) {
    if (this.roots[a] === a) return a
    this.roots[a] = this.find(this.roots[a])
    return this.roots[a]
  }

  union(a, b) {
    const rootA = this.find(a)
    const rootB = this.find(b)
    if (rootA === rootB) return

    if (this.heights[rootA] < this.heights[rootB]) {
      this.roots[rootA] = rootB
    } else if (this.heights[rootA] > this.heights[rootB]) {
      this.roots[rootB] = rootA
    } else {
      this.roots[rootB] = rootA
      this.heights[rootA]++
    }
  }

  connected(a, b) {
    return this.find(a) === this.find(b)
  }
}

Testing

We check that the code of the selected element works as expected:

// data-structures/disjoint-set/__tests__/item.test.js
import Item from '../item'

describe('Item', () => {
  it('должен выполнить базовые манипуляции с выделенным элементом', () => {
    const itemA = new Item('A')
    const itemB = new Item('B')
    const itemC = new Item('C')
    const itemD = new Item('D')

    expect(itemA.getRank()).toBe(0)
    expect(itemA.getChildren()).toEqual([])
    expect(itemA.getKey()).toBe('A')
    expect(itemA.getRoot()).toEqual(itemA)
    expect(itemA.isRoot()).toBe(true)
    expect(itemB.isRoot()).toBe(true)

    itemA.addChild(itemB)
    itemD.setParent(itemC)

    expect(itemA.getRank()).toBe(1)
    expect(itemC.getRank()).toBe(1)

    expect(itemB.getRank()).toBe(0)
    expect(itemD.getRank()).toBe(0)

    expect(itemA.getChildren().length).toBe(1)
    expect(itemC.getChildren().length).toBe(1)

    expect(itemA.getChildren()[0]).toEqual(itemB)
    expect(itemC.getChildren()[0]).toEqual(itemD)

    expect(itemB.getChildren().length).toBe(0)
    expect(itemD.getChildren().length).toBe(0)

    expect(itemA.getRoot()).toEqual(itemA)
    expect(itemB.getRoot()).toEqual(itemA)

    expect(itemC.getRoot()).toEqual(itemC)
    expect(itemD.getRoot()).toEqual(itemC)

    expect(itemA.isRoot()).toBe(true)
    expect(itemB.isRoot()).toBe(false)
    expect(itemC.isRoot()).toBe(true)
    expect(itemD.isRoot()).toBe(false)

    itemA.addChild(itemC)

    expect(itemA.isRoot()).toBe(true)
    expect(itemB.isRoot()).toBe(false)
    expect(itemC.isRoot()).toBe(false)
    expect(itemD.isRoot()).toBe(false)

    expect(itemA.getRank()).toEqual(3)
    expect(itemB.getRank()).toEqual(0)
    expect(itemC.getRank()).toEqual(1)
  })

  it('должен выполнить базовые манипуляции с выделенным элементом с кастомной функцией извлечения ключа', () => {
    const keyExtractor = (value) => value.key

    const itemA = new Item({ key: 'A', value: 1 }, keyExtractor)
    const itemB = new Item({ key: 'B', value: 2 }, keyExtractor)
    const itemC = new Item({ key: 'C', value: 3 }, keyExtractor)
    const itemD = new Item({ key: 'D', value: 4 }, keyExtractor)

    expect(itemA.getRank()).toBe(0)
    expect(itemA.getChildren()).toEqual([])
    expect(itemA.getKey()).toBe('A')
    expect(itemA.getRoot()).toEqual(itemA)
    expect(itemA.isRoot()).toBe(true)
    expect(itemB.isRoot()).toBe(true)

    itemA.addChild(itemB)
    itemD.setParent(itemC)

    expect(itemA.getRank()).toBe(1)
    expect(itemC.getRank()).toBe(1)

    expect(itemB.getRank()).toBe(0)
    expect(itemD.getRank()).toBe(0)

    expect(itemA.getChildren().length).toBe(1)
    expect(itemC.getChildren().length).toBe(1)

    expect(itemA.getChildren()[0]).toEqual(itemB)
    expect(itemC.getChildren()[0]).toEqual(itemD)

    expect(itemB.getChildren().length).toBe(0)
    expect(itemD.getChildren().length).toBe(0)

    expect(itemA.getRoot()).toEqual(itemA)
    expect(itemB.getRoot()).toEqual(itemA)

    expect(itemC.getRoot()).toEqual(itemC)
    expect(itemD.getRoot()).toEqual(itemC)

    expect(itemA.isRoot()).toBe(true)
    expect(itemB.isRoot()).toBe(false)
    expect(itemC.isRoot()).toBe(true)
    expect(itemD.isRoot()).toBe(false)

    itemA.addChild(itemC)

    expect(itemA.isRoot()).toBe(true)
    expect(itemB.isRoot()).toBe(false)
    expect(itemC.isRoot()).toBe(false)
    expect(itemD.isRoot()).toBe(false)

    expect(itemA.getRank()).toEqual(3)
    expect(itemB.getRank()).toEqual(0)
    expect(itemC.getRank()).toEqual(1)
  })
})

We perform tests:

npm run test ./data-structures/disjoint-set/__tests__/item

Results:

We check that the SNM code works as expected:

// data-structures/disjoint-set/__tests__/disjoint-set.test.js
import DisjointSet from '..'

describe('DisjointSet', () => {
  it('должен выбросить исключения при объединении и проверке несуществующих множеств', () => {
    function mergeNotExistingSets() {
      const disjointSet = new DisjointSet()

      disjointSet.union('A', 'B')
    }

    function checkNotExistingSets() {
      const disjointSet = new DisjointSet()

      disjointSet.isSameSet('A', 'B')
    }

    expect(mergeNotExistingSets).toThrow()
    expect(checkNotExistingSets).toThrow()
  })

  it('должен выполнить базовые манипуляции с системой непересекающихся множеств', () => {
    const disjointSet = new DisjointSet()

    expect(disjointSet.find('A')).toBeNull()
    expect(disjointSet.find('B')).toBeNull()

    disjointSet.makeSet('A')

    expect(disjointSet.find('A')).toBe('A')
    expect(disjointSet.find('B')).toBeNull()

    disjointSet.makeSet('B')

    expect(disjointSet.find('A')).toBe('A')
    expect(disjointSet.find('B')).toBe('B')

    disjointSet.makeSet('C')

    expect(disjointSet.isSameSet('A', 'B')).toBe(false)

    disjointSet.union('A', 'B')

    expect(disjointSet.find('A')).toBe('A')
    expect(disjointSet.find('B')).toBe('A')
    expect(disjointSet.isSameSet('A', 'B')).toBe(true)
    expect(disjointSet.isSameSet('B', 'A')).toBe(true)
    expect(disjointSet.isSameSet('A', 'C')).toBe(false)

    disjointSet.union('A', 'A')

    disjointSet.union('B', 'C')

    expect(disjointSet.find('A')).toBe('A')
    expect(disjointSet.find('B')).toBe('A')
    expect(disjointSet.find('C')).toBe('A')

    expect(disjointSet.isSameSet('A', 'B')).toBe(true)
    expect(disjointSet.isSameSet('B', 'C')).toBe(true)
    expect(disjointSet.isSameSet('A', 'C')).toBe(true)

    disjointSet.makeSet('E').makeSet('F').makeSet('G').makeSet('H').makeSet('I')

    disjointSet.union('E', 'F').union('F', 'G').union('G', 'H').union('H', 'I')

    expect(disjointSet.isSameSet('A', 'I')).toBe(false)
    expect(disjointSet.isSameSet('E', 'I')).toBe(true)

    disjointSet.union('I', 'C')

    expect(disjointSet.find('I')).toBe('E')
    expect(disjointSet.isSameSet('A', 'I')).toBe(true)
  })

  it('должен выполнить базовые манипуляции с системой непересекающихся множеств с кастомной функцией извлечения ключа', () => {
    const keyExtractor = (value) => value.key

    const disjointSet = new DisjointSet(keyExtractor)

    const itemA = { key: 'A', value: 1 }
    const itemB = { key: 'B', value: 2 }
    const itemC = { key: 'C', value: 3 }

    expect(disjointSet.find(itemA)).toBeNull()
    expect(disjointSet.find(itemB)).toBeNull()

    disjointSet.makeSet(itemA)

    expect(disjointSet.find(itemA)).toBe('A')
    expect(disjointSet.find(itemB)).toBeNull()

    disjointSet.makeSet(itemB)

    expect(disjointSet.find(itemA)).toBe('A')
    expect(disjointSet.find(itemB)).toBe('B')

    disjointSet.makeSet(itemC)

    expect(disjointSet.isSameSet(itemA, itemB)).toBe(false)

    disjointSet.union(itemA, itemB)

    expect(disjointSet.find(itemA)).toBe('A')
    expect(disjointSet.find(itemB)).toBe('A')
    expect(disjointSet.isSameSet(itemA, itemB)).toBe(true)
    expect(disjointSet.isSameSet(itemB, itemA)).toBe(true)
    expect(disjointSet.isSameSet(itemA, itemC)).toBe(false)

    disjointSet.union(itemA, itemC)

    expect(disjointSet.find(itemA)).toBe('A')
    expect(disjointSet.find(itemB)).toBe('A')
    expect(disjointSet.find(itemC)).toBe('A')

    expect(disjointSet.isSameSet(itemA, itemB)).toBe(true)
    expect(disjointSet.isSameSet(itemB, itemC)).toBe(true)
    expect(disjointSet.isSameSet(itemA, itemC)).toBe(true)
  })

  it('должен объединить меньшее множество с большим, сделав большее новым выделенным элементом', () => {
    const disjointSet = new DisjointSet()

    disjointSet
      .makeSet('A')
      .makeSet('B')
      .makeSet('C')
      .union('B', 'C')
      .union('A', 'C')

    expect(disjointSet.find('A')).toBe('B')
  })
})

We perform tests:

npm run test ./data-structures/disjoint-set/__tests__/disjoint-set

Results:

We check that the autonomous SNM code works as expected:

// data-structures/disjoint-set/__tests__/ad-hoc.test.js
import DisjointSetAdhoc from '../ad-hoc'

describe('DisjointSetAdhoc', () => {
  it('должен создать множества и найти соединенные элементы', () => {
    const set = new DisjointSetAdhoc(10)

    // 1-2-5-6-7 3-8-9 4
    set.union(1, 2)
    set.union(2, 5)
    set.union(5, 6)
    set.union(6, 7)

    set.union(3, 8)
    set.union(8, 9)

    expect(set.connected(1, 5)).toBe(true)
    expect(set.connected(5, 7)).toBe(true)
    expect(set.connected(3, 8)).toBe(true)

    expect(set.connected(4, 9)).toBe(false)
    expect(set.connected(4, 7)).toBe(false)

    // 1-2-5-6-7 3-8-9-4
    set.union(9, 4)

    expect(set.connected(4, 9)).toBe(true)
    expect(set.connected(4, 3)).toBe(true)
    expect(set.connected(8, 4)).toBe(true)

    expect(set.connected(8, 7)).toBe(false)
    expect(set.connected(2, 3)).toBe(false)
  })

  it('должен сохранять высоту дерева маленькой', () => {
    const set = new DisjointSetAdhoc(10)

    // 1-2-6-7-9 1 3 4 5
    set.union(7, 6)
    set.union(1, 2)
    set.union(2, 6)
    set.union(1, 7)
    set.union(9, 1)

    expect(set.connected(1, 7)).toBe(true)
    expect(set.connected(6, 9)).toBe(true)
    expect(set.connected(4, 9)).toBe(false)

    expect(Math.max(...set.heights)).toBe(3)
  })
})

We perform tests:

npm run test ./data-structures/disjoint-set/__tests__/ad-hoc

Results:

16. Bloom Filter

Description

A Bloom filter is a space-efficient probabilistic data structure for determining whether an element is in a set. On the one hand, this structure is very fast and uses minimal memory, on the other hand, false positive results are possible. False negative results are impossible. In other words, the filter returns either “the element MAY be in the set” or “the element is DEFINITELY NOT in the set”. A Bloom filter can use any amount of memory, but in general, the more memory, the lower the probability of false positive results.

Bloom proposed this technique for use in areas where the amount of input data would require an impractically large amount of memory if conditionally error-free hashing techniques were used.

Description of the algorithm

An empty Bloom filter is represented by an array of m bits set in 0. It must be determined k independent hash functions that map each element of the set to one of m array positions, generating a uniform random distribution. Typically k is given by a constant that is much smaller m and is proportional to the number of elements added. Precise selection k and constant proportionality m are determined by the level of false positives of the filter.

Here is an example of a Bloom filter representing a set {x, y, z}. The colored arrows show the positions (indices) of the bit array corresponding to the elements of the set. Element w is not contained in the set {x, y, z}because it is bound to an array position equal to 0. In this case m = 18A k = 3.

To add an element to the filter e it is necessary to write down 1 in every position h1(e)…, hk(e) (hash functions) of a bit array.

To determine the presence of an element e in a set it is necessary to check the state of bits h1(e)…, hk(e). If at least one bit has a value 0the element is missing. If all bits have a value 1the structure reports that the element is present. In this case, two situations can arise: either the element is actually contained in the set, or all the bits were set by accident when adding other elements. The second case is the source of false positive results.

Operations

Bloom filter allows two main operations: insertion and searching for an element. Searching can result in false positives. Deleting elements is possible, but problematic.

In other words, you can add elements to the filter, and when asked whether an element exists, the structure answers either “no” or “possibly”.

Insertion and search operations are performed in time O(1).

Creating a filter

When creating a filter, its size is specified. The size of our filter is 100. All elements are installed in false (essentially the same as 0).

During the insertion process, a hash of the element is generated using three hash functions. These functions return indices. The value at each index is updated by true.

The search process also generates a hash of the element. It then checks that all values at the indexes are trueIf so, then the element can be contained in the set.

However, we cannot be 100% sure of this, as the bits could have been set when other elements were added.

If at least one value is equal to falsethe element is definitely absent from the set.

False positive results

The probability of false positives is determined by three factors: filter size (m), the number of hash functions used (k) and the number of elements added to the filter (n).

The formula looks like this:

( 1 — e ^-kn/m ) ^k

The values of these variables are chosen based on the acceptability of false positive results.

Application

Bloom filter can be used in blogs. It is ideal for identifying articles that the user has not yet read by adding articles that he has read to the filter.

Some articles will be filtered out by mistake, but the price is acceptable: the user will not see several articles, but he will be shown new articles every time he visits the site.

You can read more about the Bloom filter in this article.

Implementation

Let's start implementing the Bloom filter:

// data-structures/bloom-filter.js
export default class BloomFilter {
  // Размер фильтра напрямую влияет на вероятность ложноположительных результатов.
  // Как правило, чем больше размер, тем такая вероятность ниже
  constructor(size = 100) {
    // Размер фильтра
    this.size = size
    // Хранилище (по сути, сам фильтр)
    this.storage = this.createStore(size)
  }
}

Method for adding an element to a filter:

// Добавляет элемент в фильтр
insert(item) {
  // Вычисляем хеш элемента (индексы массива в количестве 3 штук)
  const hashValues = this.getHashValues(item)

  // Устанавливаем значение по каждому индексу в `true`
  hashValues.forEach((v) => this.storage.setValue(v))
}

Method for determining the presence of an element in a filter:

// Определяет наличие элемента в фильтре
mayContain(item) {
  // Вычисляем хеш элемента
  const hashValues = this.getHashValues(item)

  // Перебираем индексы
  for (const v of hashValues) {
    // Если хотя бы одно значение равняется `false`
    if (!this.storage.getValue(v)) {
      // Элемент точно отсутствует
      return false
    }
  }

  // Элемент может присутствовать
  return true
}

Method for creating a repository (essentially a filter):

// Создает хранилище
createStore(size) {
  // Хранилище - массив указанного размера, заполненный `false`
  const storage = new Array(size).fill(false)

  // Возвращается объект с "геттером" и "сеттером"
  return {
    getValue(i) {
      return storage[i]
    },
    setValue(i) {
      storage[i] = true
    },
  }
}

Hash functions (rather primitive, I must say):

// Хеш-функции
hash1(item) {
  let hash = 0
  for (let i = 0; i < item.length; i++) {
    const char = item.charCodeAt(i)
    hash = (hash << 5) + hash + char
    hash &= hash // конвертируем значение в 32-битное целое число
    hash = Math.abs(hash)
  }
  return hash % this.size
}

hash2(item) {
  let hash = 5381
  for (let i = 0; i < item.length; i++) {
    const char = item.charCodeAt(i)
    hash = (hash << 5) + hash + char // hash * 33 + char
  }
  return Math.abs(hash % this.size)
}

hash3(item) {
  let hash = 0
  for (let i = 0; i < item.length; i++) {
    const char = item.charCodeAt(i)
    hash = (hash << 5) - hash + char
    hash &= hash // конвертируем значение в 32-битное целое число
  }
  return Math.abs(hash % this.size)
}

Finally, the hash generation method:

// Генерирует хеш элемента.
// Возвращает массив из трех индексов
getHashValues(item) {
  return [this.hash1(item), this.hash2(item), this.hash3(item)]
}

Full Bloom Filter code:

export default class BloomFilter {
  // Размер фильтра напрямую влияет на вероятность ложноположительных результатов.
  // Как правило, чем больше размер, тем такая вероятность ниже
  constructor(size = 100) {
    // Размер фильтра
    this.size = size
    // Хранилище (по сути, сам фильтр)
    this.storage = this.createStore(size)
  }

  // Добавляет элемент в фильтр
  insert(item) {
    // Вычисляем хеш элемента (индексы массива в количестве 3 штук)
    const hashValues = this.getHashValues(item)

    // Устанавливаем значение по каждому индексу в `true`
    hashValues.forEach((v) => this.storage.setValue(v))
  }

  // Определяет наличие элемента в фильтре
  mayContain(item) {
    // Вычисляем хеш элемента
    const hashValues = this.getHashValues(item)

    // Перебираем индексы
    for (const v of hashValues) {
      // Если хотя бы одно значение равняется `false`
      if (!this.storage.getValue(v)) {
        // Элемент точно отсутствует
        return false
      }
    }

    // Элемент может присутствовать
    return true
  }

  // Создает хранилище
  createStore(size) {
    // Хранилище - массив указанного размера, заполненный `false`
    const storage = new Array(size).fill(false)

    // Возвращается объект с "геттером" и "сеттером"
    return {
      getValue(i) {
        return storage[i]
      },
      setValue(i) {
        storage[i] = true
      },
    }
  }

  // Хеш-функции
  hash1(item) {
    let hash = 0
    for (let i = 0; i < item.length; i++) {
      const char = item.charCodeAt(i)
      hash = (hash << 5) + hash + char
      hash &= hash // конвертируем значение в 32-битное целое число
      hash = Math.abs(hash)
    }
    return hash % this.size
  }

  hash2(item) {
    let hash = 5381
    for (let i = 0; i < item.length; i++) {
      const char = item.charCodeAt(i)
      hash = (hash << 5) + hash + char // hash * 33 + char
    }
    return Math.abs(hash % this.size)
  }

  hash3(item) {
    let hash = 0
    for (let i = 0; i < item.length; i++) {
      const char = item.charCodeAt(i)
      hash = (hash << 5) - hash + char
      hash &= hash // конвертируем значение в 32-битное целое число
    }
    return Math.abs(hash % this.size)
  }

  // Генерирует хеш элемента.
  // Возвращает массив из трех индексов
  getHashValues(item) {
    return [this.hash1(item), this.hash2(item), this.hash3(item)]
  }
}

Testing

Let's check that the filter code works as expected:

// data-structures/__tests__/bloom-filter.test.js
import BloomFilter from '../bloom-filter'

describe('BloomFilter', () => {
  let bloomFilter
  const people = ['Bruce Wayne', 'Clark Kent', 'Barry Allen']

  beforeEach(() => {
    bloomFilter = new BloomFilter()
  })

  it('должен содержать методы "insert" и "mayContain"', () => {
    expect(typeof bloomFilter.insert).toBe('function')
    expect(typeof bloomFilter.mayContain).toBe('function')
  })

  it('должен создать хранилище с указанными методами', () => {
    const store = bloomFilter.createStore(18)
    expect(typeof store.getValue).toBe('function')
    expect(typeof store.setValue).toBe('function')
  })

  it('должен стабильно хешировать элементы с помощью трех хеш-функций', () => {
    const str1 = 'apple'

    expect(bloomFilter.hash1(str1)).toEqual(bloomFilter.hash1(str1))
    expect(bloomFilter.hash2(str1)).toEqual(bloomFilter.hash2(str1))
    expect(bloomFilter.hash3(str1)).toEqual(bloomFilter.hash3(str1))

    expect(bloomFilter.hash1(str1)).toBe(14)
    expect(bloomFilter.hash2(str1)).toBe(43)
    expect(bloomFilter.hash3(str1)).toBe(10)

    const str2 = 'orange'

    expect(bloomFilter.hash1(str2)).toEqual(bloomFilter.hash1(str2))
    expect(bloomFilter.hash2(str2)).toEqual(bloomFilter.hash2(str2))
    expect(bloomFilter.hash3(str2)).toEqual(bloomFilter.hash3(str2))

    expect(bloomFilter.hash1(str2)).toBe(0)
    expect(bloomFilter.hash2(str2)).toBe(61)
    expect(bloomFilter.hash3(str2)).toBe(10)
  })

  it('должен создать массив с тремя хешированными значениями', () => {
    expect(bloomFilter.getHashValues('abc').length).toBe(3)
    expect(bloomFilter.getHashValues('abc')).toEqual([66, 63, 54])
  })

  it('должен добавить строки и возвращать `true` при проверке их наличия', () => {
    people.forEach((person) => bloomFilter.insert(person))

    // expect(bloomFilter.mayContain('Bruce Wayne')).toBe(true)
    // expect(bloomFilter.mayContain('Clark Kent')).toBe(true)
    // expect(bloomFilter.mayContain('Barry Allen')).toBe(true)

    expect(bloomFilter.mayContain('Tony Stark')).toBe(false)
  })
})

Let's run the tests:

npm run test ./data-structures/__tests__/bloom-filter

Results:

17. Cache of actual data

Description

The least recently used (LRU) cache organizes (stores) items in such a way that we can easily determine which items are used frequently and which are used infrequently.

The implementation of the KAD includes:

constructor LRUCache(capacity: number)initializing a CAD of a certain size (capacitypositive integer)
method get(key: string)returning the value by key (key) or null
method set(key: string, value: any)updating or adding a value (value) by key (key). When exceeding capacity the oldest (least used) value is removed (evicted) from the cache

Functions get And set must be executed in constant time O(1).

Complexity

Temporary

Spatial

O(n)

Implementation

Doubly linked list and hash table

In the next implementation of the CAD, we will use a hash table (see Part 2, Section 5) for constant (on average) access to elements and a doubly linked list (see Part 1, Section 2) for constant updating and evicting of elements.

Let's start with the node implementation:

// data-structures/lru-cache.js
// Узел
class Node {
  constructor(key, val, prev = null, next = null) {
    // Ключ
    this.key = key
    // Значение
    this.val = val
    // Ссылка на предыдущий узел
    this.prev = prev
    // Ссылка на следующий узел
    this.next = next
  }
}

Now let's deal with the ring road:

// КАД
export default class LRUCache {
  // Конструктор принимает емкость кэша
  constructor(capacity) {
    // Максимальный размер кэша
    this.capacity = capacity
    // Кэшированные узлы
    this.nodesMap = {}
    // Текущий размер кэша
    this.size = 0
    // Головной узел
    this.head = new Node()
    // Хвостовой узел
    this.tail = new Node()
  }
}

Method for getting an element by key:

// Возвращает значение по ключу
get(key) {
  const node = this.nodesMap[key]
  if (!node) return null
  // Обновляем "приоритет" узла
  this.promote(node)
  return node.val
}

Method for adding an item to the cache:

// Добавляет узел в кэш
set(key, val) {
  if (this.nodesMap[key]) {
    // Обновляем значение существующего узла
    const node = this.nodesMap[key]
    node.val = val
    this.promote(node)
  } else {
    // Добавляем новый узел
    const node = new Node(key, val)
    this.append(node)
  }
}

Method to update node priority:

/**
 * Перемещает узел в конец связного списка.
 * Это означает, что узел используется наиболее часто.
 * Это также снижает вероятность удаления такого узла из кэша
 */
promote(node) {
  this.evict(node)
  this.append(node)
}

Method to add a node to the end of a linked list:

// Перемещает узел в конец связного списка
append(node) {
  this.nodesMap[node.key] = node

  if (!this.head.next) {
    // Первый узел
    this.head.next = node
    this.tail.prev = node
    node.prev = this.head
    node.next = this.tail
  } else {
    // Добавляем узел в конец
    const oldTail = this.tail.prev
    oldTail.next = node
    node.prev = oldTail
    node.next = this.tail
    this.tail.prev = node
  }

  // Увеличиваем текущий размер кэша
  this.size += 1
  // Если достигнут максимальный размер кэша,
  // то удаляем первый узел
  if (this.size > this.capacity) {
    this.evict(this.head.next)
  }
}

And the method for removing a node:

// Удаляет (вытесняет) узел из кэша
evict(node) {
  delete this.nodesMap[node.key]
  // Уменьшаем текущий размер кэша
  this.size -= 1

  const prev = node.prev
  const next = node.next

  // Имеется только один узел
  if (prev === this.head && next === this.tail) {
    this.head.next = null
    this.tail.prev = null
    this.size = 0
    return
  }

  // Это головной узел
  if (prev === this.head) {
    next.prev = this.head
    this.head.next = next
    return
  }

  // Это хвостовой узел
  if (next === this.tail) {
    prev.next = this.tail
    this.tail.prev = prev
    return
  }

  // Это узел в середине списка
  prev.next = next
  next.prev = prev
}

Full CAD code with the node:

// Узел
class Node {
  constructor(key, val, prev = null, next = null) {
    // Ключ
    this.key = key
    // Значение
    this.val = val
    // Ссылка на предыдущий узел
    this.prev = prev
    // Ссылка на следующий узел
    this.next = next
  }
}

// КАД
export default class LRUCache {
  // Конструктор принимает емкость кэша
  constructor(capacity) {
    // Максимальный размер кэша
    this.capacity = capacity
    // Кэшированные узлы
    this.nodesMap = {}
    // Текущий размер кэша
    this.size = 0
    // Головной узел
    this.head = new Node()
    // Хвостовой узел
    this.tail = new Node()
  }

  // Возвращает значение по ключу
  get(key) {
    const node = this.nodesMap[key]
    if (!node) return null
    // Обновляем "приоритет" узла
    this.promote(node)
    return node.val
  }

  // Добавляет узел в кэш
  set(key, val) {
    if (this.nodesMap[key]) {
      // Обновляем значение существующего узла
      const node = this.nodesMap[key]
      node.val = val
      this.promote(node)
    } else {
      // Добавляем новый узел
      const node = new Node(key, val)
      this.append(node)
    }
  }

  /**
   * Перемещает узел в конец связного списка.
   * Это означает, что узел используется наиболее часто.
   * Это также снижает вероятность удаления такого узла из кэша
   */
  promote(node) {
    this.evict(node)
    this.append(node)
  }

  // Перемещает узел в конец связного списка
  append(node) {
    this.nodesMap[node.key] = node

    if (!this.head.next) {
      // Первый узел
      this.head.next = node
      this.tail.prev = node
      node.prev = this.head
      node.next = this.tail
    } else {
      // Добавляем узел в конец
      const oldTail = this.tail.prev
      oldTail.next = node
      node.prev = oldTail
      node.next = this.tail
      this.tail.prev = node
    }

    // Увеличиваем текущий размер кэша
    this.size += 1
    // Если достигнут максимальный размер кэша,
    // то удаляем первый узел
    if (this.size > this.capacity) {
      this.evict(this.head.next)
    }
  }

  // Удаляет (вытесняет) узел из кэша
  evict(node) {
    delete this.nodesMap[node.key]
    // Уменьшаем текущий размер кэша
    this.size -= 1

    const prev = node.prev
    const next = node.next

    // Имеется только один узел
    if (prev === this.head && next === this.tail) {
      this.head.next = null
      this.tail.prev = null
      this.size = 0
      return
    }

    // Это головной узел
    if (prev === this.head) {
      next.prev = this.head
      this.head.next = next
      return
    }

    // Это хвостовой узел
    if (next === this.tail) {
      prev.next = this.tail
      this.tail.prev = prev
      return
    }

    // Это узел в середине списка
    prev.next = next
    next.prev = prev
  }
}

We check that the CAD works as expected:

import LRUCache from '../lru-cache'

describe('LRUCache', () => {
  it('должен добавить/извлечь значения в/из кэша', () => {
    const cache = new LRUCache(100)
    expect(cache.get('key-1')).toBeNull()

    cache.set('key-1', 15)
    cache.set('key-2', 16)
    cache.set('key-3', 17)
    expect(cache.get('key-1')).toBe(15)
    expect(cache.get('key-2')).toBe(16)
    expect(cache.get('key-3')).toBe(17)
    expect(cache.get('key-3')).toBe(17)
    expect(cache.get('key-2')).toBe(16)
    expect(cache.get('key-1')).toBe(15)

    cache.set('key-1', 5)
    cache.set('key-2', 6)
    cache.set('key-3', 7)
    expect(cache.get('key-1')).toBe(5)
    expect(cache.get('key-2')).toBe(6)
    expect(cache.get('key-3')).toBe(7)
  })

  it('должен вытеснить элементы из кэша размером 1', () => {
    const cache = new LRUCache(1)
    expect(cache.get('key-1')).toBeNull()

    cache.set('key-1', 15)
    expect(cache.get('key-1')).toBe(15)

    cache.set('key-2', 16)
    expect(cache.get('key-1')).toBeNull()
    expect(cache.get('key-2')).toBe(16)

    cache.set('key-2', 17)
    expect(cache.get('key-2')).toBe(17)

    cache.set('key-3', 18)
    cache.set('key-4', 19)
    expect(cache.get('key-2')).toBeNull()
    expect(cache.get('key-3')).toBeNull()
    expect(cache.get('key-4')).toBe(19)
  })

  it('должен вытеснить элементы из кэша размером 2', () => {
    const cache = new LRUCache(2)
    expect(cache.get('key-21')).toBeNull()

    cache.set('key-21', 15)
    expect(cache.get('key-21')).toBe(15)

    cache.set('key-22', 16)
    expect(cache.get('key-21')).toBe(15)
    expect(cache.get('key-22')).toBe(16)

    cache.set('key-22', 17)
    expect(cache.get('key-22')).toBe(17)

    cache.set('key-23', 18)
    expect(cache.size).toBe(2)
    expect(cache.get('key-21')).toBeNull()
    expect(cache.get('key-22')).toBe(17)
    expect(cache.get('key-23')).toBe(18)

    cache.set('key-24', 19)
    expect(cache.size).toBe(2)
    expect(cache.get('key-21')).toBeNull()
    expect(cache.get('key-22')).toBeNull()
    expect(cache.get('key-23')).toBe(18)
    expect(cache.get('key-24')).toBe(19)
  })

  it('должен вытеснить элемент из кэша размером 3', () => {
    const cache = new LRUCache(3)

    cache.set('key-1', 1)
    cache.set('key-2', 2)
    cache.set('key-3', 3)
    expect(cache.get('key-1')).toBe(1)
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(3)

    cache.set('key-3', 4)
    expect(cache.get('key-1')).toBe(1)
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(4)

    cache.set('key-4', 5)
    expect(cache.get('key-1')).toBeNull()
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(4)
    expect(cache.get('key-4')).toBe(5)
  })

  it('должен обновить узел при вызове метода `set`', () => {
    const cache = new LRUCache(2)

    cache.set('2', 1)
    cache.set('1', 1)
    cache.set('2', 3)
    cache.set('4', 1)
    expect(cache.get('1')).toBeNull()
    expect(cache.get('2')).toBe(3)
  })

  it('должен обновить элементы в кэше размером 3', () => {
    const cache = new LRUCache(3)

    cache.set('key-1', 1)
    cache.set('key-2', 2)
    cache.set('key-3', 3)
    expect(cache.get('key-1')).toBe(1)

    cache.set('key-4', 4)
    expect(cache.get('key-1')).toBe(1)
    expect(cache.get('key-3')).toBe(3)
    expect(cache.get('key-4')).toBe(4)
    expect(cache.get('key-2')).toBeNull()
  })

  it('должен обновить элементы в кэше размером 4', () => {
    const cache = new LRUCache(4)

    cache.set('key-1', 1)
    cache.set('key-2', 2)
    cache.set('key-3', 3)
    cache.set('key-4', 4)
    expect(cache.get('key-4')).toBe(4)
    expect(cache.get('key-3')).toBe(3)
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-1')).toBe(1)

    cache.set('key-5', 5)
    expect(cache.get('key-1')).toBe(1)
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(3)
    expect(cache.get('key-4')).toBeNull()
    expect(cache.get('key-5')).toBe(5)

    cache.set('key-6', 6)
    expect(cache.get('key-1')).toBeNull()
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(3)
    expect(cache.get('key-4')).toBeNull()
    expect(cache.get('key-5')).toBe(5)
    expect(cache.get('key-6')).toBe(6)
  })
})

We perform tests:

npm run test ./data-structures/__tests__/lru-cache

Results:

Ordered map

An implementation that uses a linked list is useful for learning purposes as it allows one to understand how the execution time is achieved. O(1) operations get And set.

However, the CAD is easier to implement using an object Mapwhich stores key-value pairs and remembers the order in which keys were added. We can store the last used element at the end of the map by removing it and adding it again. The element at the beginning of the map is evicted from the cache first. To check the order of elements, we can use IteratorIterablesuch as map.keys().

// data-structure/lru-cache-on-map.js
export default class LruCacheOnMap {
  // Конструктор принимает размер кэша
  constructor(size) {
    // Размер кэша
    this.size = size
    // Хранилище (по сути, сам кэш)
    this.map = new Map()
  }

  // Возвращает значение по ключу
  get(key) {
    const val = this.map.get(key)
    if (!val) return null
    // Обновляем "приоритет" элемента
    this.map.delete(key)
    this.map.set(key, val)
    return val
  }

  // Добавляет элемент в кэш
  set(key, val) {
    // Обновляем "приоритет" элемента
    this.map.delete(key)
    this.map.set(key, val)
    // Если кэш переполнен, удаляем первый (самый редко используемый) элемент
    if (this.map.size > this.size) {
      for (const key of this.map.keys()) {
        this.map.delete(key)
        break
      }
    }
  }
}

We check that the CAD implemented using the map works as expected:

// data-structure/__tests__/lru-cache-on-map.test.js
import LRUCache from '../lru-cache-on-map'

describe('LRUCacheOnMap', () => {
  it('должен добавить/извлечь значения в/из кэша', () => {
    const cache = new LRUCache(100)
    expect(cache.get('key-1')).toBeNull()

    cache.set('key-1', 15)
    cache.set('key-2', 16)
    cache.set('key-3', 17)
    expect(cache.get('key-1')).toBe(15)
    expect(cache.get('key-2')).toBe(16)
    expect(cache.get('key-3')).toBe(17)
    expect(cache.get('key-3')).toBe(17)
    expect(cache.get('key-2')).toBe(16)
    expect(cache.get('key-1')).toBe(15)

    cache.set('key-1', 5)
    cache.set('key-2', 6)
    cache.set('key-3', 7)
    expect(cache.get('key-1')).toBe(5)
    expect(cache.get('key-2')).toBe(6)
    expect(cache.get('key-3')).toBe(7)
  })

  it('должен вытеснить элементы из кэша размером 1', () => {
    const cache = new LRUCache(1)
    expect(cache.get('key-1')).toBeNull()

    cache.set('key-1', 15)
    expect(cache.get('key-1')).toBe(15)

    cache.set('key-2', 16)
    expect(cache.get('key-1')).toBeNull()
    expect(cache.get('key-2')).toBe(16)

    cache.set('key-2', 17)
    expect(cache.get('key-2')).toBe(17)

    cache.set('key-3', 18)
    cache.set('key-4', 19)
    expect(cache.get('key-2')).toBeNull()
    expect(cache.get('key-3')).toBeNull()
    expect(cache.get('key-4')).toBe(19)
  })

  it('должен вытеснить элементы из кэша размером 2', () => {
    const cache = new LRUCache(2)
    expect(cache.get('key-21')).toBeNull()

    cache.set('key-21', 15)
    expect(cache.get('key-21')).toBe(15)

    cache.set('key-22', 16)
    expect(cache.get('key-21')).toBe(15)
    expect(cache.get('key-22')).toBe(16)

    cache.set('key-22', 17)
    expect(cache.get('key-22')).toBe(17)

    cache.set('key-23', 18)
    expect(cache.get('key-21')).toBeNull()
    expect(cache.get('key-22')).toBe(17)
    expect(cache.get('key-23')).toBe(18)

    cache.set('key-24', 19)
    expect(cache.get('key-21')).toBeNull()
    expect(cache.get('key-22')).toBeNull()
    expect(cache.get('key-23')).toBe(18)
    expect(cache.get('key-24')).toBe(19)
  })

  it('должен вытеснить элемент из кэша размером 3', () => {
    const cache = new LRUCache(3)

    cache.set('key-1', 1)
    cache.set('key-2', 2)
    cache.set('key-3', 3)
    expect(cache.get('key-1')).toBe(1)
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(3)

    cache.set('key-3', 4)
    expect(cache.get('key-1')).toBe(1)
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(4)

    cache.set('key-4', 5)
    expect(cache.get('key-1')).toBeNull()
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(4)
    expect(cache.get('key-4')).toBe(5)
  })

  it('должен обновить узел при вызове метода `set`', () => {
    const cache = new LRUCache(2)

    cache.set('2', 1)
    cache.set('1', 1)
    cache.set('2', 3)
    cache.set('4', 1)
    expect(cache.get('1')).toBeNull()
    expect(cache.get('2')).toBe(3)
  })

  it('должен обновить элементы в кэше размером 3', () => {
    const cache = new LRUCache(3)

    cache.set('key-1', 1)
    cache.set('key-2', 2)
    cache.set('key-3', 3)
    expect(cache.get('key-1')).toBe(1)

    cache.set('key-4', 4)
    expect(cache.get('key-1')).toBe(1)
    expect(cache.get('key-3')).toBe(3)
    expect(cache.get('key-4')).toBe(4)
    expect(cache.get('key-2')).toBeNull()
  })

  it('должен обновить элементы в кэше размером 4', () => {
    const cache = new LRUCache(4)

    cache.set('key-1', 1)
    cache.set('key-2', 2)
    cache.set('key-3', 3)
    cache.set('key-4', 4)
    expect(cache.get('key-4')).toBe(4)
    expect(cache.get('key-3')).toBe(3)
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-1')).toBe(1)

    cache.set('key-5', 5)
    expect(cache.get('key-1')).toBe(1)
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(3)
    expect(cache.get('key-4')).toBeNull()
    expect(cache.get('key-5')).toBe(5)

    cache.set('key-6', 6)
    expect(cache.get('key-1')).toBeNull()
    expect(cache.get('key-2')).toBe(2)
    expect(cache.get('key-3')).toBe(3)
    expect(cache.get('key-4')).toBeNull()
    expect(cache.get('key-5')).toBe(5)
    expect(cache.get('key-6')).toBe(6)
  })
})

We perform tests:

npm run test ./data-structures/__tests__/lru-cache-on-map

Results:

That's all for today, friends. See you in the next part.

News, product reviews and contests from the Timeweb.Cloud team — in our Telegram channel ↩

Data Structures and Algorithms. Part 5

15. System of disjoint sets

16. Bloom Filter

17. Cache of actual data

Flutter: flip animation

PANGIT: Developer Diary #7

The ghost of the 1C Development Integrator hovers over the Russian Federation

How we created a robotic assistant for information business

How to quickly remove duplicate rows in CSV, Excel and Markdown tables?

Behavioural Interview + Bonuses

Leave a Reply Cancel reply

15. System of disjoint sets

16. Bloom Filter

17. Cache of actual data

Similar Posts

Leave a Reply Cancel reply