{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility\n", "from towhee.dc2 import pipe, ops, DataCollection\n", "import pandas as pd\n", "import glob\n", "import cv2\n", "from towhee.types.image import Image\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!docker compose up -d " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "connections.connect(\n", " alias=\"default\", \n", " host='localhost', \n", " port='19530'\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "MODEL = 'resnet50'\n", "TOPK = 10\n", "DIM = 512 # dimension of embedding extracted by MODEL\n", "COLLECTION_NAME = 'reverse_image_search'\n", "INDEX_TYPE = 'IVF_FLAT'\n", "METRIC_TYPE = 'L2'\n", "HOST = 'localhost'\n", "PORT = '19530'\n", "COLLECTION_NAME = 'reverse_image_search'\n", "\n", "\n", "# path to csv (column_1 indicates image path) OR a pattern of image paths\n", "INSERT_SRC = 'reverse_image_search.csv'\n", "IAMGES_PATH = '/home/ubuntu/data/dota/images'\n", "QUERY_SRC = IAMGES_PATH + '/*.png'" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "files = glob.glob(QUERY_SRC)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "df_image_metadata = pd.DataFrame(dict(path=files)).reset_index().rename(columns=dict(index='id'))\n", "df_image_metadata.to_csv(INSERT_SRC)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def create_milvus_collection(collection_name, dim):\n", " if utility.has_collection(collection_name):\n", " utility.drop_collection(collection_name)\n", " \n", " fields = [\n", " FieldSchema(\n", " name='path', dtype=DataType.VARCHAR, description='path to image', max_length=500, \n", " is_primary=True, auto_id=False\n", " ),\n", " FieldSchema(\n", " name='embedding', dtype=DataType.FLOAT_VECTOR, description='image embedding vectors', dim=dim\n", " )\n", " ]\n", " schema = CollectionSchema(fields=fields, description='reverse image search')\n", " collection = Collection(name=collection_name, schema=schema)\n", "\n", " index_params = {\n", " 'metric_type': METRIC_TYPE,\n", " 'index_type': INDEX_TYPE,\n", " 'params': {\"nlist\": 2048}\n", " }\n", " collection.create_index(field_name='embedding', index_params=index_params)\n", " return collection" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A new collection created: reverse_image_search\n" ] } ], "source": [ "collection = create_milvus_collection(COLLECTION_NAME, DIM)\n", "print(f'A new collection created: {COLLECTION_NAME}')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "img_pipe = (\n", " pipe.input('url')\n", " .map('url', 'img', ops.image_decode.cv2_rgb())\n", " .map('img', 'vec', ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='image'))\n", " # .output('img', 'vec')\n", ")\n", "\n", "text_pipe = (\n", " pipe.input('text')\n", " .map('text', 'vec', ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='text'))\n", " # output('text', 'vec') \n", ")\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: datasets[vision] in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (2.11.0)\n", "Requirement already satisfied: requests>=2.19.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.28.2)\n", "Requirement already satisfied: numpy>=1.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (1.24.2)\n", "Requirement already satisfied: multiprocess in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.70.14)\n", "Requirement already satisfied: pyyaml>=5.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (6.0)\n", "Requirement already satisfied: xxhash in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.2.0)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (11.0.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (4.65.0)\n", "Requirement already satisfied: dill<0.3.7,>=0.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.3.6)\n", "Requirement already satisfied: fsspec[http]>=2021.11.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2023.3.0)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.13.3)\n", "Requirement already satisfied: responses<0.19 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.18.0)\n", "Requirement already satisfied: pandas in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.0.0)\n", "Requirement already satisfied: aiohttp in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.8.4)\n", "Requirement already satisfied: packaging in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (23.0)\n", "Requirement already satisfied: Pillow>=6.2.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (9.5.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (3.1.0)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.8.2)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.3)\n", "Requirement already satisfied: attrs>=17.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (22.2.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (6.0.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (4.0.2)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (4.5.0)\n", "Requirement already satisfied: filelock in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (3.10.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (2022.12.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (3.4)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (1.26.15)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: tzdata>=2022.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: six>=1.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets[vision]) (1.16.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING: You are using pip version 22.0.4; however, version 23.0.1 is available.\n", "You should consider upgrading via the '/home/ubuntu/dev/vector-searchers-benchmark/.venv/bin/python -m pip install --upgrade pip' command.\n", "/home/ubuntu/dev/vector-searchers-benchmark/.venv/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "data": { "text/html": [ "
img vec
[-0.4224454, -0.82606244, 0.27163455, ...] shape=(512,)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "DataCollection(img_pipe.output('img', 'vec')('/home/ubuntu/data/dota/images/P0006.png')).show()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: datasets[vision] in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (2.11.0)\n", "Requirement already satisfied: multiprocess in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.70.14)\n", "Requirement already satisfied: requests>=2.19.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.28.2)\n", "Requirement already satisfied: xxhash in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.2.0)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (11.0.0)\n", "Requirement already satisfied: pandas in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.0.0)\n", "Requirement already satisfied: numpy>=1.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (1.24.2)\n", "Requirement already satisfied: packaging in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (23.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (6.0)\n", "Requirement already satisfied: aiohttp in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.8.4)\n", "Requirement already satisfied: fsspec[http]>=2021.11.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2023.3.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (4.65.0)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.13.3)\n", "Requirement already satisfied: responses<0.19 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.18.0)\n", "Requirement already satisfied: dill<0.3.7,>=0.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.3.6)\n", "Requirement already satisfied: Pillow>=6.2.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (9.5.0)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.8.2)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (3.1.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (22.2.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.1)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (6.0.4)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.3)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (4.0.2)\n", "Requirement already satisfied: filelock in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (3.10.7)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (4.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (2022.12.7)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (1.26.15)\n", "Requirement already satisfied: idna<4,>=2.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (3.4)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2.8.2)\n", "Requirement already satisfied: tzdata>=2022.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: pytz>=2020.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: six>=1.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets[vision]) (1.16.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING: You are using pip version 22.0.4; however, version 23.0.1 is available.\n", "You should consider upgrading via the '/home/ubuntu/dev/vector-searchers-benchmark/.venv/bin/python -m pip install --upgrade pip' command.\n" ] }, { "data": { "text/html": [ "
text vec
a greenm yard [-0.28066933, -0.7002137, -0.28758025, ...] shape=(512,)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\n", "DataCollection(text_pipe.output('text', 'vec')('a greenm yard')).show()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "def load_image(x):\n", " for item in glob.glob(x):\n", " yield item" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "p_embed = (\n", " pipe.input('src')\n", " .flat_map('src', 'img_path', load_image)\n", " .map('img_path', 'img', ops.image_decode.cv2_rgb())\n", " .map('img', 'vec', ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='image'))\n", ")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: datasets[vision] in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (2.11.0)\n", "Requirement already satisfied: multiprocess in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.70.14)\n", "Requirement already satisfied: responses<0.19 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.18.0)\n", "Requirement already satisfied: xxhash in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.2.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (6.0)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (11.0.0)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.13.3)\n", "Requirement already satisfied: dill<0.3.7,>=0.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.3.6)\n", "Requirement already satisfied: tqdm>=4.62.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (4.65.0)\n", "Requirement already satisfied: numpy>=1.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (1.24.2)\n", "Requirement already satisfied: requests>=2.19.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.28.2)\n", "Requirement already satisfied: fsspec[http]>=2021.11.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2023.3.0)\n", "Requirement already satisfied: pandas in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.0.0)\n", "Requirement already satisfied: packaging in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (23.0)\n", "Requirement already satisfied: aiohttp in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.8.4)\n", "Requirement already satisfied: Pillow>=6.2.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (9.5.0)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.8.2)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (4.0.2)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (3.1.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (6.0.4)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.3)\n", "Requirement already satisfied: attrs>=17.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (22.2.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (4.5.0)\n", "Requirement already satisfied: filelock in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (3.10.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (3.4)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (1.26.15)\n", "Requirement already satisfied: certifi>=2017.4.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (2022.12.7)\n", "Requirement already satisfied: tzdata>=2022.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: pytz>=2020.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets[vision]) (1.16.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING: You are using pip version 22.0.4; however, version 23.0.1 is available.\n", "You should consider upgrading via the '/home/ubuntu/dev/vector-searchers-benchmark/.venv/bin/python -m pip install --upgrade pip' command.\n" ] } ], "source": [ "p_insert = (\n", " p_embed.map(\n", " input_schema=('img_path', 'vec'), \n", " output_schema='mr', \n", " fn=ops.ann_insert.milvus_client(\n", " host=HOST,\n", " port=PORT,\n", " collection_name=COLLECTION_NAME\n", " )\n", " )\n", " .output('mr')\n", ")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p_insert(QUERY_SRC)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: datasets[vision] in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (2.11.0)\n", "Requirement already satisfied: aiohttp in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.8.4)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.13.3)\n", "Requirement already satisfied: packaging in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (23.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (6.0)\n", "Requirement already satisfied: pandas in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.0.0)\n", "Requirement already satisfied: numpy>=1.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (1.24.2)\n", "Requirement already satisfied: dill<0.3.7,>=0.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.3.6)\n", "Requirement already satisfied: multiprocess in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.70.14)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (11.0.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (4.65.0)\n", "Requirement already satisfied: fsspec[http]>=2021.11.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2023.3.0)\n", "Requirement already satisfied: requests>=2.19.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.28.2)\n", "Requirement already satisfied: xxhash in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.2.0)\n", "Requirement already satisfied: responses<0.19 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.18.0)\n", "Requirement already satisfied: Pillow>=6.2.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (9.5.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (6.0.4)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.3)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (4.0.2)\n", "Requirement already satisfied: attrs>=17.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (22.2.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (3.1.0)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.8.2)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (4.5.0)\n", "Requirement already satisfied: filelock in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (3.10.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (3.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (2022.12.7)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (1.26.15)\n", "Requirement already satisfied: pytz>=2020.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2.8.2)\n", "Requirement already satisfied: tzdata>=2022.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: six>=1.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets[vision]) (1.16.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING: You are using pip version 22.0.4; however, version 23.0.1 is available.\n", "You should consider upgrading via the '/home/ubuntu/dev/vector-searchers-benchmark/.venv/bin/python -m pip install --upgrade pip' command.\n" ] } ], "source": [ "p_search_pre = (\n", " text_pipe\n", " .map(\n", " 'vec', \n", " 'distance result',\n", " ops.ann_search.milvus_client(\n", " host=HOST, port=PORT, limit=TOPK,\n", " collection_name=COLLECTION_NAME\n", " )\n", " )\n", " .map(\n", " 'distance result',\n", " ('search_res'), \n", " lambda y: list(map(lambda x: x[0], y))\n", " )\n", ")\n", "\n", "p_search = p_search_pre.output('text', 'search_res')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
text search_res
green yard
/home/ubuntu/data/dota/images/P2277.png

/home/ubuntu/data/dota/images/P1797.png

/home/ubuntu/data/dota/images/P2588.png

/home/ubuntu/data/dota/images/P2084.png

/home/ubuntu/data/dota/images/P0291.png

/home/ubuntu/data/dota/images/P1953.png

/home/ubuntu/data/dota/images/P1840.png

/home/ubuntu/data/dota/images/P1777.png

/home/ubuntu/data/dota/images/P2774.png

/home/ubuntu/data/dota/images/P2102.png
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "DataCollection(p_search('green yard')).show()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "def read_images(img_paths):\n", " imgs = []\n", " for p in img_paths:\n", " imgs.append(Image(cv2.imread(p), 'BGR'))\n", " return imgs" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "p_search_img = (\n", " p_search_pre\n", " .map(\n", " 'search_res', \n", " 'pred images', \n", " read_images\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "search_res = p_search_img.output('text', 'pred images')('parking lot with multiple vehicles')" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
text pred images
parking lot with multiple vehicles
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "DataCollection(search_res).show()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "DataCollection(p_search('green yard')).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "search_res = p_search_img.output('text', 'pred images')('parking lot no vehicles')" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
text pred images
parking lot no vehicles
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "DataCollection(search_res).show()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "search_res = p_search_img.output('text', 'pred images')('absolutely empty parking lot')" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
text pred images
absolutely empty parking lot
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "DataCollection(search_res).show()" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "78e896e3cebdb5e8cfc83ce386ece667c3e4daa8b6aaa0d2113e592fc1301ab6" } } }, "nbformat": 4, "nbformat_minor": 2 }