Marqo Clothing Apparel Web Application Demo
Getting Started
-
Download the Dataset from Clothing Dataset into the directory where the
streamlit_marqo_demo.py
script is found. -
Run this command inside the script directory to setup an HTTP server
This is for the marqo docker container to read files from local os. For more info on this please visit this link.python3 -m http.server 8222
-
Make sure to run the Marqo docker container via the following command:
docker run --name marqo -it -p 8882:8882 --add-host host.docker.internal:host-gateway marqoai/marqo:2.0.0
-
Install Streamlit. This can be done by via the following link.
-
Install Marqo
Note: if you are using Anaconda, make sure to install marqo in the anaconda environment.pip install marqo==3.0.0
-
Once Streamlit is installed, we can start the Streamlit application by running the following command inside the directory where the
streamlit_marqo_demo.py
script is located:streamlit run streamlit_marqo_demo.py
For more information on Streamlit's functions and features, please visit the Streamlit Documentation Page.
Code
streamlit_marqo_demo.py
import os
import requests
import streamlit as st
import pandas as pd
from PIL import Image
import pprint
import marqo
# Streamlit configuration settings
st.set_page_config(
page_title="Marqo Demo App",
page_icon="favicon.png", # name of website favicon image
layout="centered",
initial_sidebar_state="collapsed",
menu_items={}
)
mq = marqo.Client(url='http://localhost:8882') # Connection to Marqo Docker Container
cwd = os.getcwd() # Get current working directory
def load_index(number_data):
try:
shirt_data = pd.read_csv('clothing-dataset/images.csv').head(number_data)[['image','label','kids']].to_dict('records')
for data in shirt_data:
path = "http://host.docker.internal:8222/clothing-dataset/images/" + data['image'] + ".jpg"
data['image'] = path
settings = {
"treatUrlsAndPointersAsImages":True, # allows us to find an image file and index it
"model":"ViT-B/16"
}
mq.create_index("demo-search-index", settings_dict=settings)
with st.spinner("Creating Index..."):
mq.index("demo-search-index").add_documents(
shirt_data, tensor_fields=['image'],
tensor_fields=['image_docker'], client_batch_size=64
)
st.success("Index successfully created.")
except:
st.error("Index already exists.")
def delete_index():
try:
mq.index("demo-search-index").delete()
st.success("Index successfully deleted.")
except:
st.error("Index does not exist.")
def save_uploadedfile(uploadedfile):
with open(os.path.join(cwd, uploadedfile.name), "wb") as f:
f.write(uploadedfile.getbuffer())
return uploadedfile.name
def reset_state():
st.session_state['results'] = {}
st.session_state['page'] = -1
def create_filter_str(filter_list):
filter_string = ""
if 'Kids' in filter_list:
filter_string += 'kids:true'
filter_list.remove('Kids')
else:
filter_string += 'kids:false'
for field in filter_list:
filter_string += f" AND label:({field})"
print(filter_string)
return filter_string
def main():
# Streamlit state variables (this is to save the state of the session for pagination of Marqo query results)
if 'results' not in st.session_state:
st.session_state['results'] = {}
if 'page' not in st.session_state:
st.session_state['page'] = -1
# Index Settings Frontend
with st.sidebar:
st.write("Index Settings:")
values = st.slider(
label='Select a range of values',
min_value=10.0,
max_value=2000.0,
value=1000.0,
step=10.0)
create_col, _, delete_col = st.columns([1,1,1])
with create_col:
create_btn = st.button('Create Index')
if create_btn:
load_index(int(values))
with delete_col:
delete_btn = st.button('Delete Index')
if delete_btn:
delete_index()
# Main application frontend
logo = Image.open("{}\marqo-logo.jpg".format(cwd))
st.image(logo)
search_text, search_image_url, search_image = None, None, None
search_mode = st.radio("",("Text", "Image"), horizontal=True, on_change=reset_state)
if search_mode == "Text":
box_col, search_mode_col = st.columns([6,1])
with box_col:
search_text = st.text_input("Text Search")
with search_mode_col:
search_text_mode = st.radio("Search mode", ("Tensor", "Lexical"))
else:
image_input_col, image_type_col = st.columns([6,1])
with image_type_col:
image_type = st.radio("Image type", ("Web", "Local"))
with image_input_col:
if image_type=="Web":
search_image_url = st.text_input("Provide an Image URL")
else:
search_image = st.file_uploader('Upload an Image', type=['jpg'])
with st.expander("Search Settings"):
attr_col, filter_col = st.columns(2)
with attr_col:
searchable_attr = st.multiselect('Searchable Attributes', ['Image', 'Label'], default=['Label'])
with filter_col:
filtering = st.multiselect('Pre-filtering Options', ['Dress', 'Hat', 'Longsleeve', 'Outwear', 'Pants', 'Shirt', 'Shoes', 'Shorts', 'Skirt', 'T-Shirt', 'Kids'], default=None)
search_btn = st.button('Search')
# Marqo Results logic
if ((search_image is not None) or (search_image_url) or (search_text)) and search_btn:
if search_text != "" and search_text != None:
results = mq.index("demo-search-index").search(
search_text,
filter_string=create_filter_str(filtering),
search_method=search_text_mode.upper(),
limit=30
)
elif search_image_url != "" and search_image_url != None:
results = mq.index("demo-search-index").search(
search_image_url,
filter_string=create_filter_str(filtering),
limit=30
)
else:
uploaded_img_name = save_uploadedfile(search_image)
uploaded_img_path = f"http://host.docker.internal:8222/{uploaded_img_name}"
print(uploaded_img_path)
results = mq.index("demo-search-index").search(
uploaded_img_path,
filter_string=create_filter_str(filtering),
limit=30
)
pprint.pprint(results)
st.session_state['results'] = results
if st.session_state['results']['hits']:
st.session_state['page'] = 0
else:
st.session_state['page'] = -1
# Results Pagination Logic
if st.session_state['page'] > -1:
prev_col, page_col, next_col = st.columns([1,9,1])
with prev_col:
prev_btn = st.button("Prev")
if prev_btn and (st.session_state['page'] > 0):
st.session_state['page']-=1
with next_col:
next_btn = st.button("Next")
if next_btn and (st.session_state['page'] < 2):
st.session_state['page'] += 1
with page_col:
st.markdown('<div style="text-align: center"> {}</div>'.format("Page " + str(st.session_state['page']+1)), unsafe_allow_html=True)
if st.session_state['results'] != {}:
if st.session_state['results']['hits']:
st.write("Results (Top 30):")
col = st.columns(5)
for img in enumerate(st.session_state['results']['hits']):
pic = img[1]['image']
if pic is not None:
if (img[0] >= st.session_state['page']*10) and (img[0] < (st.session_state['page']*10 + 10)):
with col[(img[0]%10)//2]:
if pic.startswith("http://host.docker.internal:8222/"):
pic_url = 'http://localhost:8222/' + pic.split("http://host.docker.internal:8222/")[1]
image = Image.open(requests.get(pic_url, stream=True).raw)
st.image(image, caption=img[1]['label'])
else:
st.write("No results")
main()
Function References
-
load_index(number_data)
Name Type Description number_data
Integer number of data lines to parse from dataset This function reads the data from the images.csv file from the dataset and creates an index in the Marqo client. By default, the created index name is
demo-search-index
. -
delete_index()
This function deletes the
demo-search-index
index if it exists, else an error message is displayed. -
save_uploadedfile(uploadedfile)
Name Type Description uploadedfile
String name of image file uploaded onto streamlit This function saves the uploaded image into the directory where the script is running.
-
create_filter_str(filter_list)
Name Type Description filter_list
List[String] list of pre-filtering string options This functions returns the pre-filtering string passed when searching based on the elements in
filter_list
.
Preview
An online version of the Web App is hosted via this link.
Usage
Feel free to checkout the code in order to have a better understanding on how Marqo functions are used :).