How to use the File Search feature in Azure, like the same which is available in OpenAI

Anonymous
2024-09-28T06:26:59.77+00:00

HI,

I want to use the File Search feature in Azure, like the same which is available in OpenAI. My use case involves having a collection name and its field descriptions stored in a file—let’s say five collections. Users may ask various queries, and I want to perform a vector search to find the best-matched collection details. Then, give that collection details and description which is extracted from file search (vector search) needs to be provide to AI to generate code based on the descriptions in the collection.

I can achieve this with OpenAI's File Search, but I would like to know if Azure offers similar services. Maintaining context is also important for user queries, such as:

  1. give me the last location of vehicle ED3456?
  2. forAB1234?

Can you assist me with this?

I can provide the code I used in OpenAI, and I would like to implement the same use case in Azure. Need our assistance in doing configurations in azure and request to provide the sample python code.

import os
from openai import AssistantEventHandler, OpenAI


query_str_2 = '''f"mongodb://{username}:{password}@{host}:{port}""'''

default_prompt = f"""
call this user query {query_str_2} in MongoClient and give me the python code along with the db name and collection names. 
At the end of the code instead of printing the results, store the final code result in "result_data" variable strictly, 
in case final code result contains more than one record convert to dataframe and assigned to "result_data". 
IMPORTANT: Do not provide any explanations in the response, provide python code only.
"""
# Set the OpenAI API key from environment variable

client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

def file_upload():
    # Create the assistant
    assistant1 = client.beta.assistants.create(
        name="You Are Mongo Query expert",
        instructions=default_prompt,
        model="gpt-4o",
        tools=[{"type": "file_search"}],
    )

    # Create a vector store
    vector_store1 = client.beta.vector_stores.create(name="Mongocollection_query")
    print("vector_store_id",vector_store1.id)
    file_paths = [r"prompt_files\test.txt"]

    # Use context manager to handle file streams
    try:
        with open(file_paths[0], "rb") as file_stream:
            file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
                vector_store_id=vector_store1.id, files=[file_stream]
            )
            print("file batch status:", file_batch.status)
            print(file_batch.file_counts)

            uploaded_files = client.beta.vector_stores.files.list(vector_store_id=vector_store1.id)
            print(uploaded_files)
            uploaded_file_id = uploaded_files.data[0].id

            # Update the assistant with the vector store
            assistant = client.beta.assistants.update(
                assistant_id=assistant1.id,
                tool_resources={"file_search": {"vector_store_ids": [vector_store1.id]}},
            )

            print("assistant_id:", assistant.id)
            print("Uploaded file_id:", uploaded_file_id)
            return assistant.id, uploaded_file_id
    except Exception as e:
        print(f"An error occurred: {e}")


assistant_id, uploaded_file_id = file_upload()

thread = client.beta.threads.create(
     messages=[
         {
            "role": "user",
           "content": "give me the last known location of vehicle ED3456",
            "attachments": [
               {"file_id": uploaded_file_id, "tools": [{"type": "file_search"}]}
             ],
         }
     ]
 )


thread_id=thread.id

run = client.beta.threads.runs.create_and_poll(
    thread_id=thread_id, assistant_id=assistant_id
)
messages = list(client.beta.threads.messages.list(thread_id=thread_id, run_id=run.id))

message_content = messages[0].content[0].text
annotations = message_content.annotations
citations = []
for index, annotation in enumerate(annotations):
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    if file_citation := getattr(annotation, "file_citation", None):
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f"[{index}] {cited_file.filename}")


print(message_content.value)
print("\n".join(citations))

#######################################################################

new_question = "for this vehicle AB1234"
"""Send a new question to an existing thread to maintain conversation context."""
# Create a new message in the existing thread
new_message_response = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=new_question
)

"""Run the thread to process the latest message."""
run_response = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id,
    assistant_id=assistant_id
)

messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run_response.id))
# message_content = messages[-1].content  # Assuming the last message is the response

# Optionally handle annotations and citations as before
message_content = messages[0].content[0].text
annotations = message_content.annotations

citations = []
for index, annotation in enumerate(annotations):
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    if file_citation := getattr(annotation, "file_citation", None):
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f"[{index}] {cited_file.filename}")

# Output the response and any citations
print(message_content.value)
print("\n".join(citations))




Azure AI Search
Azure AI Search
An Azure search service with built-in artificial intelligence capabilities that enrich information to help identify and explore relevant content at scale.
1,080 questions
{count} votes

1 answer

Sort by: Most helpful
  1. Amira Bedhiafi 26,656 Reputation points
    2024-10-02T20:25:00.0233333+00:00

    You will need an Azure Storage account to store and manage your collection description files. Once uploaded, these files can be indexed in Azure Cognitive Search.

    Create an Azure Cognitive Search Service

    1. Go to the Azure portal and create a Cognitive Search resource.
    2. Create an Index in Cognitive Search for your collection descriptions. Each file will be an indexed document, where each collection description is indexed as a searchable field.
    3. Use the Azure Search SDK or Azure portal to configure the index schema (field descriptions, searchable text).
    1. Enable semantic search in Azure Cognitive Search to leverage the power of vector search.
    2. In your query, use the built-in semantic search API to perform searches based on user input (like vehicle location queries).

    Call Azure OpenAI for Code Generation

    1. Once the best-matched collection is found using Cognitive Search, extract the relevant details.
    2. Pass these details to Azure OpenAI (use the davinci-codex or gpt-4 models) to generate code snippets based on the collection description.
    
    import openai
    
    from azure.core.credentials import AzureKeyCredential
    
    from azure.search.documents import SearchClient
    
    from azure.search.documents.indexes import SearchIndexClient
    
    from azure.search.documents.models import QueryType
    
    # Initialize OpenAI API
    
    openai.api_key = "your-openai-api-key"
    
    # Initialize Azure Search Client
    
    search_client = SearchClient(endpoint="your-search-endpoint", 
    
                                 index_name="your-index-name", 
    
                                 credential=AzureKeyCredential("your-api-key"))
    
    # Perform a vector search based on user query
    
    def search_collections(query):
    
        results = search_client.search(query, query_type=QueryType.SEMANTIC)
    
        best_match = next(results)
    
        return best_match['collection_description']
    
    # Generate Python code using Azure OpenAI based on collection description
    
    def generate_code(collection_description):
    
        response = openai.Completion.create(
    
            model="gpt-4",
    
            prompt=f"Generate Python code for {collection_description}",
    
            max_tokens=150
    
        )
    
        return response.choices[0].text.strip()
    
    # Example query for last vehicle location
    
    query = "give me the last known location of vehicle ED3456"
    
    collection_description = search_collections(query)
    
    generated_code = generate_code(collection_description)
    
    print(generated_code)
    
    

    If you need more specific help , feel free to ask!

    0 comments No comments

Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.