{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility\n", "from towhee.dc2 import pipe, ops, DataCollection\n", "import pandas as pd\n", "import glob\n", "import cv2\n", "from towhee.types.image import Image\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!docker compose up -d " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "connections.connect(\n", " alias=\"default\", \n", " host='localhost', \n", " port='19530'\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "MODEL = 'resnet50'\n", "TOPK = 10\n", "DIM = 512 # dimension of embedding extracted by MODEL\n", "COLLECTION_NAME = 'reverse_image_search'\n", "INDEX_TYPE = 'IVF_FLAT'\n", "METRIC_TYPE = 'L2'\n", "HOST = 'localhost'\n", "PORT = '19530'\n", "COLLECTION_NAME = 'reverse_image_search'\n", "\n", "\n", "# path to csv (column_1 indicates image path) OR a pattern of image paths\n", "INSERT_SRC = 'reverse_image_search.csv'\n", "IAMGES_PATH = '/home/ubuntu/data/dota/images'\n", "QUERY_SRC = IAMGES_PATH + '/*.png'" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "files = glob.glob(QUERY_SRC)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "df_image_metadata = pd.DataFrame(dict(path=files)).reset_index().rename(columns=dict(index='id'))\n", "df_image_metadata.to_csv(INSERT_SRC)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def create_milvus_collection(collection_name, dim):\n", " if utility.has_collection(collection_name):\n", " utility.drop_collection(collection_name)\n", " \n", " fields = [\n", " FieldSchema(\n", " name='path', dtype=DataType.VARCHAR, description='path to image', max_length=500, \n", " is_primary=True, auto_id=False\n", " ),\n", " FieldSchema(\n", " name='embedding', dtype=DataType.FLOAT_VECTOR, description='image embedding vectors', dim=dim\n", " )\n", " ]\n", " schema = CollectionSchema(fields=fields, description='reverse image search')\n", " collection = Collection(name=collection_name, schema=schema)\n", "\n", " index_params = {\n", " 'metric_type': METRIC_TYPE,\n", " 'index_type': INDEX_TYPE,\n", " 'params': {\"nlist\": 2048}\n", " }\n", " collection.create_index(field_name='embedding', index_params=index_params)\n", " return collection" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A new collection created: reverse_image_search\n" ] } ], "source": [ "collection = create_milvus_collection(COLLECTION_NAME, DIM)\n", "print(f'A new collection created: {COLLECTION_NAME}')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "img_pipe = (\n", " pipe.input('url')\n", " .map('url', 'img', ops.image_decode.cv2_rgb())\n", " .map('img', 'vec', ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='image'))\n", " # .output('img', 'vec')\n", ")\n", "\n", "text_pipe = (\n", " pipe.input('text')\n", " .map('text', 'vec', ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='text'))\n", " # output('text', 'vec') \n", ")\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: datasets[vision] in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (2.11.0)\n", "Requirement already satisfied: requests>=2.19.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.28.2)\n", "Requirement already satisfied: numpy>=1.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (1.24.2)\n", "Requirement already satisfied: multiprocess in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.70.14)\n", "Requirement already satisfied: pyyaml>=5.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (6.0)\n", "Requirement already satisfied: xxhash in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.2.0)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (11.0.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (4.65.0)\n", "Requirement already satisfied: dill<0.3.7,>=0.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.3.6)\n", "Requirement already satisfied: fsspec[http]>=2021.11.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2023.3.0)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.13.3)\n", "Requirement already satisfied: responses<0.19 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (0.18.0)\n", "Requirement already satisfied: pandas in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (2.0.0)\n", "Requirement already satisfied: aiohttp in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (3.8.4)\n", "Requirement already satisfied: packaging in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (23.0)\n", "Requirement already satisfied: Pillow>=6.2.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from datasets[vision]) (9.5.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (3.1.0)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.8.2)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.3)\n", "Requirement already satisfied: attrs>=17.3.0 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (22.2.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (6.0.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (4.0.2)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from aiohttp->datasets[vision]) (1.3.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (4.5.0)\n", "Requirement already satisfied: filelock in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets[vision]) (3.10.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (2022.12.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (3.4)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from requests>=2.19.0->datasets[vision]) (1.26.15)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: tzdata>=2022.1 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from pandas->datasets[vision]) (2023.3)\n", "Requirement already satisfied: six>=1.5 in /home/ubuntu/.environments/marqovenv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets[vision]) (1.16.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING: You are using pip version 22.0.4; however, version 23.0.1 is available.\n", "You should consider upgrading via the '/home/ubuntu/dev/vector-searchers-benchmark/.venv/bin/python -m pip install --upgrade pip' command.\n", "/home/ubuntu/dev/vector-searchers-benchmark/.venv/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "data": { "text/html": [ "
img | vec |
---|---|
[-0.4224454, -0.82606244, 0.27163455, ...] shape=(512,) |
text | vec |
---|---|
a greenm yard | [-0.28066933, -0.7002137, -0.28758025, ...] shape=(512,) |
text | search_res |
---|---|
green yard | /home/ubuntu/data/dota/images/P2277.png /home/ubuntu/data/dota/images/P1797.png /home/ubuntu/data/dota/images/P2588.png /home/ubuntu/data/dota/images/P2084.png /home/ubuntu/data/dota/images/P0291.png /home/ubuntu/data/dota/images/P1953.png /home/ubuntu/data/dota/images/P1840.png /home/ubuntu/data/dota/images/P1777.png /home/ubuntu/data/dota/images/P2774.png /home/ubuntu/data/dota/images/P2102.png |
text | pred images |
---|---|
parking lot with multiple vehicles |