mjdileep / Opencone

A Pinecone alternative written on top of OpenSearch (Open Source)

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Opencone

A Pinecone alternative written on top of OpenSearch by simplifying features of OpenSearch

Requirements

Python libraries

pip3 install opensearch-py
pip3 install opencone 

Usage

Create an OpenconeClient

from opencone import OpenconeClient
import random
import numpy as np
import time
from opensearchpy import OpenSearch


host = 'localhost'
port = 9200
auth = ('admin', 'admin') # For testing only. Don't store credentials in code.

# Create an OpenSearch client
client = OpenSearch(
            hosts=[{"host": host, "port": port}],
            http_auth=auth,
            use_ssl=True,
            verify_certs=False,
            ssl_assert_hostname=False,
            ssl_show_warn=False
        )

oc = OpenconeClient(client=client)

Create an Index

# Create an index
dimensions = 4096
t = time.time()
try:
    oc.create_index(index_name="test", dimensions=dimensions)
except Exception as ex:
    if "resource_already_exists_exception" in str(ex):
        oc.delete_index(index_name="test")
        oc.create_index(index_name="test", dimensions=dimensions)
    else:
        raise ex
print("Index created in ", time.time()-t)

Upsert Vectors

# Upsert vectors
# [(<id>, <embeddings>, <metadata>),...]
titles = ["pp", "qq", "rr", "ss", "tt"]
tags = ["p1", "p2", "p3", "p4", "p5"]
vectors = []
# Recommend to upsert 100 or less at a time
for i in range(100):
    vectors.append((
        "id:"+str(i),
        np.random.randint(5, size=dimensions).tolist(),
        {
            "name": titles[random.randint(0, 4)],
            "tags": tags[random.randint(0, 4):random.randint(0, 4)],
            "no": random.randint(2000, 3000)
        }
    ))
t = time.time()
oc.upsert(index_name="test", vectors=vectors)
print("Upsert time:", time.time()-t)

Fetch Vector

# Fetch vector
print(oc.fetch(index_name="test", _id="id:1"))

Delete vector

# Delete vector
print(oc.delete(index_name="test", _id="id:1"))

Search

# Search
filters = {
    "no": {"$gte":2200, "$lte":2800},
    "name": {"$eq": "pp"},
    "tags": {"$in": ["p1", "t3"]}
}
t = time.time()
rs = oc.search(index_name="test", vector=(np.random.randint(5, size=dimensions)/1.1).tolist(), filters=filters, metadata=False, limit=10000)

print("Search time:", time.time()-t)
for each in rs:
    print(each)

Delete an Index

# Delete index
oc.delete_index(index_name="test")

About

A Pinecone alternative written on top of OpenSearch (Open Source)

License:MIT License


Languages

Language:Python 100.0%