import httpx import time import json def test_performance(): url = "http://localhost:11434/api/chat" payload = { "model": "qwen3.5:9b", "messages": [{"role": "user", "content": "Hi."}], "stream": True } print(f"Starting streaming test for qwen3.5:9b...") start_time = time.time() try: with httpx.Client(timeout=300.0) as client: with client.stream("POST", url, json=payload) as response: response.raise_for_status() print("\n--- Response ---") for line in response.iter_lines(): if line: chunk = json.loads(line) content = chunk.get("message", {}).get("content", "") print(content, end="", flush=True) if chunk.get("done"): break print("\n----------------") end_time = time.time() duration = end_time - start_time print(f"Duration: {duration:.2f} seconds") except Exception as e: print(f"Error during test: {e}") if __name__ == "__main__": test_performance()