# Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses. # The final response object will include statistics and additional data from the request. Use the callback function to override # the default handler. defgenerate(model_name, prompt, system=None): try: url = "http://localhost:11434/api/generate" payload = {"model": model_name, "prompt": prompt, "system": system} # Remove keys with None values payload = {k: v for k, v in payload.items() if v isnotNone} with requests.post(url, json=payload, stream=True) as response: response.raise_for_status() # Creating a variable to hold the context history of the final chunk final_context = None # Variable to hold concatenated response strings if no callback is provided full_response = ""
# Iterating over the response line by line and displaying the details for line in response.iter_lines(): if line: # Parsing each line (JSON chunk) and extracting the details chunk = json.loads(line)
ifnot chunk.get("done"): response_piece = chunk.get("response", "") full_response += response_piece # print(response_piece, end="", flush=True) # Check if it's the last chunk (done is true) if chunk.get("done"): final_context = chunk.get("context") # Return the full response and the final context return full_response, final_context except requests.exceptions.RequestException as e: print(f"An error occurred: {e}") returnNone, None
if __name__ == '__main__': model = 'name-to-your-ollama-model' SYS_PROMPT= 'hello' USER_PROMPT = "what your favorate movie?" response1, _ = generate(model_name=model, system=SYS_PROMPT, prompt=USER_PROMPT) print(response1)