test_qwen_perf.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. import httpx
  2. import time
  3. import json
  4. def test_performance():
  5. url = "http://localhost:11434/api/chat"
  6. payload = {
  7. "model": "qwen3.5:9b",
  8. "messages": [{"role": "user", "content": "Hi."}],
  9. "stream": True
  10. }
  11. print(f"Starting streaming test for qwen3.5:9b...")
  12. start_time = time.time()
  13. try:
  14. with httpx.Client(timeout=300.0) as client:
  15. with client.stream("POST", url, json=payload) as response:
  16. response.raise_for_status()
  17. print("\n--- Response ---")
  18. for line in response.iter_lines():
  19. if line:
  20. chunk = json.loads(line)
  21. content = chunk.get("message", {}).get("content", "")
  22. print(content, end="", flush=True)
  23. if chunk.get("done"):
  24. break
  25. print("\n----------------")
  26. end_time = time.time()
  27. duration = end_time - start_time
  28. print(f"Duration: {duration:.2f} seconds")
  29. except Exception as e:
  30. print(f"Error during test: {e}")
  31. if __name__ == "__main__":
  32. test_performance()