How to use the File Search feature in Azure, like the same which is available in OpenAI

Question

HI,

I want to use the File Search feature in Azure, like the same which is available in OpenAI. My use case involves having a collection name and its field descriptions stored in a file—let’s say five collections. Users may ask various queries, and I want to perform a vector search to find the best-matched collection details. Then, give that collection details and description which is extracted from file search (vector search) needs to be provide to AI to generate code based on the descriptions in the collection.

I can achieve this with OpenAI's File Search, but I would like to know if Azure offers similar services. Maintaining context is also important for user queries, such as:

give me the last location of vehicle ED3456?
forAB1234?

Can you assist me with this?

I can provide the code I used in OpenAI, and I would like to implement the same use case in Azure. Need our assistance in doing configurations in azure and request to provide the sample python code.

import os
from openai import AssistantEventHandler, OpenAI


query_str_2 = '''f"mongodb://{username}:{password}@{host}:{port}""'''

default_prompt = f"""
call this user query {query_str_2} in MongoClient and give me the python code along with the db name and collection names. 
At the end of the code instead of printing the results, store the final code result in "result_data" variable strictly, 
in case final code result contains more than one record convert to dataframe and assigned to "result_data". 
IMPORTANT: Do not provide any explanations in the response, provide python code only.
"""
# Set the OpenAI API key from environment variable

client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

def file_upload():
    # Create the assistant
    assistant1 = client.beta.assistants.create(
        name="You Are Mongo Query expert",
        instructions=default_prompt,
        model="gpt-4o",
        tools=[{"type": "file_search"}],
    )

    # Create a vector store
    vector_store1 = client.beta.vector_stores.create(name="Mongocollection_query")
    print("vector_store_id",vector_store1.id)
    file_paths = [r"prompt_files	est.txt"]

    # Use context manager to handle file streams
    try:
        with open(file_paths[0], "rb") as file_stream:
            file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
                vector_store_id=vector_store1.id, files=[file_stream]
            )
            print("file batch status:", file_batch.status)
            print(file_batch.file_counts)

            uploaded_files = client.beta.vector_stores.files.list(vector_store_id=vector_store1.id)
            print(uploaded_files)
            uploaded_file_id = uploaded_files.data[0].id

            # Update the assistant with the vector store
            assistant = client.beta.assistants.update(
                assistant_id=assistant1.id,
                tool_resources={"file_search": {"vector_store_ids": [vector_store1.id]}},
            )

            print("assistant_id:", assistant.id)
            print("Uploaded file_id:", uploaded_file_id)
            return assistant.id, uploaded_file_id
    except Exception as e:
        print(f"An error occurred: {e}")


assistant_id, uploaded_file_id = file_upload()

thread = client.beta.threads.create(
     messages=[
         {
            "role": "user",
           "content": "give me the last known location of vehicle ED3456",
            "attachments": [
               {"file_id": uploaded_file_id, "tools": [{"type": "file_search"}]}
             ],
         }
     ]
 )


thread_id=thread.id

run = client.beta.threads.runs.create_and_poll(
    thread_id=thread_id, assistant_id=assistant_id
)
messages = list(client.beta.threads.messages.list(thread_id=thread_id, run_id=run.id))

message_content = messages[0].content[0].text
annotations = message_content.annotations
citations = []
for index, annotation in enumerate(annotations):
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    if file_citation := getattr(annotation, "file_citation", None):
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f"[{index}] {cited_file.filename}")


print(message_content.value)
print("
".join(citations))

#######################################################################

new_question = "for this vehicle AB1234"
"""Send a new question to an existing thread to maintain conversation context."""
# Create a new message in the existing thread
new_message_response = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=new_question
)

"""Run the thread to process the latest message."""
run_response = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id,
    assistant_id=assistant_id
)

messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run_response.id))
# message_content = messages[-1].content  # Assuming the last message is the response

# Optionally handle annotations and citations as before
message_content = messages[0].content[0].text
annotations = message_content.annotations

citations = []
for index, annotation in enumerate(annotations):
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    if file_citation := getattr(annotation, "file_citation", None):
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f"[{index}] {cited_file.filename}")

# Output the response and any citations
print(message_content.value)
print("
".join(citations))

Answer

You will need an Azure Storage account to store and manage your collection description files. Once uploaded, these files can be indexed in Azure Cognitive Search.

Create an Azure Cognitive Search Service

Go to the Azure portal and create a Cognitive Search resource.
Create an Index in Cognitive Search for your collection descriptions. Each file will be an indexed document, where each collection description is indexed as a searchable field.
Use the Azure Search SDK or Azure portal to configure the index schema (field descriptions, searchable text).

Enable Semantic Search / Vector Search

Enable semantic search in Azure Cognitive Search to leverage the power of vector search.
In your query, use the built-in semantic search API to perform searches based on user input (like vehicle location queries).

Call Azure OpenAI for Code Generation

Once the best-matched collection is found using Cognitive Search, extract the relevant details.
Pass these details to Azure OpenAI (use the davinci-codex or gpt-4 models) to generate code snippets based on the collection description.

Python Code Sample for Azure Cognitive Search


import openai

from azure.core.credentials import AzureKeyCredential

from azure.search.documents import SearchClient

from azure.search.documents.indexes import SearchIndexClient

from azure.search.documents.models import QueryType

# Initialize OpenAI API

openai.api_key = "your-openai-api-key"

# Initialize Azure Search Client

search_client = SearchClient(endpoint="your-search-endpoint", 

                             index_name="your-index-name", 

                             credential=AzureKeyCredential("your-api-key"))

# Perform a vector search based on user query

def search_collections(query):

    results = search_client.search(query, query_type=QueryType.SEMANTIC)

    best_match = next(results)

    return best_match['collection_description']

# Generate Python code using Azure OpenAI based on collection description

def generate_code(collection_description):

    response = openai.Completion.create(

        model="gpt-4",

        prompt=f"Generate Python code for {collection_description}",

        max_tokens=150

    )

    return response.choices[0].text.strip()

# Example query for last vehicle location

query = "give me the last known location of vehicle ED3456"

collection_description = search_collections(query)

generated_code = generate_code(collection_description)

print(generated_code)

If you need more specific help , feel free to ask!

Share via

How to use the File Search feature in Azure, like the same which is available in OpenAI

1 answer

Create an Azure Cognitive Search Service

Enable Semantic Search / Vector Search

Call Azure OpenAI for Code Generation

Python Code Sample for Azure Cognitive Search

Your answer