Rox-Turbo commited on
Commit
f2f8b4a
·
verified ·
1 Parent(s): c5e13e1

Upload 13 files

Browse files
Files changed (3) hide show
  1. README.md +1 -27
  2. python-version +1 -0
  3. server.py +6 -98
README.md CHANGED
@@ -35,22 +35,9 @@ curl -X POST https://Rox-Turbo-API.hf.space/chat \
35
  -d '{"messages":[{"role":"user","content":"Hello"}]}'
36
  ```
37
 
38
- ### With Web Search (NEW!)
39
-
40
- Get real-time information from the web:
41
-
42
- ```bash
43
- curl -X POST https://Rox-Turbo-API.hf.space/chat \
44
- -H "Content-Type: application/json" \
45
- -d '{
46
- "messages":[{"role":"user","content":"Latest AI news?"}],
47
- "websearch": true
48
- }'
49
- ```
50
-
51
  ## Usage
52
 
53
- ### Basic Request
54
 
55
  ```json
56
  {
@@ -62,18 +49,6 @@ curl -X POST https://Rox-Turbo-API.hf.space/chat \
62
  }
63
  ```
64
 
65
- ### With Web Search
66
-
67
- ```json
68
- {
69
- "messages": [
70
- {"role": "user", "content": "What's new in AI?"}
71
- ],
72
- "websearch": true,
73
- "websearch_results": 5
74
- }
75
- ```
76
-
77
  ### Response
78
 
79
  ```json
@@ -136,7 +111,6 @@ curl -X POST https://Rox-Turbo-API.hf.space/turbo45 \
136
 
137
  ## Documentation
138
 
139
- - [Web Search Guide](docs/WEBSEARCH.md) - Real-time web search
140
  - [Code Examples](docs/CODE.md) - Copy-paste code
141
  - [Developer Guide](docs/DEVELOPER_GUIDE.md) - Integration guide
142
  - [Models Guide](docs/MODELS.md) - Model details
 
35
  -d '{"messages":[{"role":"user","content":"Hello"}]}'
36
  ```
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ## Usage
39
 
40
+ ### Request
41
 
42
  ```json
43
  {
 
49
  }
50
  ```
51
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  ### Response
53
 
54
  ```json
 
111
 
112
  ## Documentation
113
 
 
114
  - [Code Examples](docs/CODE.md) - Copy-paste code
115
  - [Developer Guide](docs/DEVELOPER_GUIDE.md) - Integration guide
116
  - [Models Guide](docs/MODELS.md) - Model details
python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.14
server.py CHANGED
@@ -16,7 +16,6 @@ from pydantic import BaseModel, Field
16
  from openai import AsyncOpenAI
17
  import httpx
18
  import json
19
- from websearch import perform_web_search
20
 
21
 
22
  # Load environment variables
@@ -71,10 +70,10 @@ SYSTEM_PROMPT_MODE = os.getenv("SYSTEM_PROMPT_MODE", "full").strip().lower()
71
 
72
  # Model configurations
73
  ROX_CORE_MODEL = "z-ai/glm4.7"
74
- ROX_TURBO_MODEL = "deepseek-ai/deepseek-v3.2"
75
  ROX_CODER_MODEL = "z-ai/glm5"
76
  ROX_TURBO_45_MODEL = "qwen/qwen3-coder-480b-a35b-instruct"
77
- ROX_ULTRA_MODEL = "deepseek-ai/deepseek-v4-pro"
78
  ROX_DYNO_MODEL = "moonshotai/kimi-k2.5"
79
  ROX_CODER_7_MODEL = "z-ai/glm-5.1"
80
  ROX_VISION_MODEL = "google/gemma-3-27b-it"
@@ -285,70 +284,6 @@ def _sse_headers() -> dict:
285
  }
286
 
287
 
288
- async def _handle_websearch(
289
- app_: FastAPI,
290
- messages: list,
291
- websearch_enabled: bool,
292
- num_results: int
293
- ) -> list:
294
- """
295
- Handle web search if enabled and inject results into messages
296
-
297
- Args:
298
- app_: FastAPI app instance
299
- messages: Current message list
300
- websearch_enabled: Whether web search is enabled
301
- num_results: Number of search results to fetch
302
-
303
- Returns:
304
- Updated messages list with search results if applicable
305
- """
306
- if not websearch_enabled or not messages:
307
- return messages
308
-
309
- # Get the last user message as search query
310
- last_user_msg = None
311
- for msg in reversed(messages):
312
- if msg.get("role") == "user":
313
- last_user_msg = msg.get("content", "")
314
- break
315
-
316
- if not last_user_msg:
317
- return messages
318
-
319
- # Perform web search
320
- http_client = getattr(app_.state, "http_client", None)
321
- if not http_client:
322
- logger.warning("HTTP client not available for web search")
323
- return messages
324
-
325
- try:
326
- success, search_results, error = await perform_web_search(
327
- http_client,
328
- last_user_msg,
329
- num_results
330
- )
331
-
332
- if success and search_results:
333
- # Inject search results before the last user message
334
- search_context = {
335
- "role": "system",
336
- "content": f"[Web Search Results]\n{search_results}\n\nUse these search results to provide an accurate, up-to-date answer to the user's question."
337
- }
338
-
339
- # Insert search results before last user message
340
- messages_copy = messages[:-1] + [search_context, messages[-1]]
341
- logger.info("Web search completed: %d results", num_results)
342
- return messages_copy
343
- else:
344
- if error:
345
- logger.warning("Web search failed: %s", error)
346
- except Exception as e:
347
- logger.error("Web search error: %s", str(e))
348
-
349
- return messages
350
-
351
-
352
  # Helper function for streaming responses
353
  async def stream_response(
354
  app_: FastAPI,
@@ -476,8 +411,6 @@ class ChatRequest(BaseModel):
476
  top_p: Optional[float] = None
477
  max_tokens: Optional[int] = None
478
  stream: Optional[bool] = False
479
- websearch: Optional[bool] = False
480
- websearch_results: Optional[int] = 5
481
 
482
 
483
  class ChatResponse(BaseModel):
@@ -501,22 +434,12 @@ class HFResponseItem(BaseModel):
501
 
502
  @app.post("/chat")
503
  async def chat(req: ChatRequest):
504
- """Rox Core - Main conversational model with streaming support and web search"""
505
  messages: list = []
506
  system_prompt = _system_prompt_for("core")
507
  if system_prompt:
508
  messages.append({"role": "system", "content": system_prompt})
509
  messages.extend([m.model_dump() for m in req.messages])
510
-
511
- # Handle web search if enabled
512
- if req.websearch:
513
- messages = await _handle_websearch(
514
- app,
515
- messages,
516
- req.websearch,
517
- req.websearch_results or 5
518
- )
519
-
520
  temperature = _effective_temperature(req.temperature)
521
  top_p = _effective_top_p(req.top_p)
522
  max_tokens = _effective_max_tokens(req.max_tokens, 8192)
@@ -545,17 +468,12 @@ async def chat(req: ChatRequest):
545
 
546
  @app.post("/turbo")
547
  async def turbo(req: ChatRequest):
548
- """Rox 2.1 Turbo - Fast and efficient with streaming and web search"""
549
  messages: list = []
550
  system_prompt = _system_prompt_for("turbo")
551
  if system_prompt:
552
  messages.append({"role": "system", "content": system_prompt})
553
  messages.extend([m.model_dump() for m in req.messages])
554
-
555
- # Handle web search if enabled
556
- if req.websearch:
557
- messages = await _handle_websearch(app, messages, req.websearch, req.websearch_results or 5)
558
-
559
  temperature = _effective_temperature(req.temperature)
560
  top_p = _effective_top_p(req.top_p)
561
  max_tokens = _effective_max_tokens(req.max_tokens, 8192)
@@ -584,17 +502,12 @@ async def turbo(req: ChatRequest):
584
 
585
  @app.post("/coder")
586
  async def coder(req: ChatRequest):
587
- """Rox 3.5 Coder - Specialized coding with streaming and web search"""
588
  messages: list = []
589
  system_prompt = _system_prompt_for("coder")
590
  if system_prompt:
591
  messages.append({"role": "system", "content": system_prompt})
592
  messages.extend([m.model_dump() for m in req.messages])
593
-
594
- # Handle web search if enabled
595
- if req.websearch:
596
- messages = await _handle_websearch(app, messages, req.websearch, req.websearch_results or 5)
597
-
598
  temperature = _effective_temperature(req.temperature)
599
  top_p = _effective_top_p(req.top_p)
600
  max_tokens = _effective_max_tokens(req.max_tokens, 16384)
@@ -668,17 +581,12 @@ async def turbo45(req: ChatRequest):
668
 
669
  @app.post("/ultra")
670
  async def ultra(req: ChatRequest):
671
- """Rox 5 Ultra - Most advanced with streaming and web search"""
672
  messages: list = []
673
  system_prompt = _system_prompt_for("ultra")
674
  if system_prompt:
675
  messages.append({"role": "system", "content": system_prompt})
676
  messages.extend([m.model_dump() for m in req.messages])
677
-
678
- # Handle web search if enabled
679
- if req.websearch:
680
- messages = await _handle_websearch(app, messages, req.websearch, req.websearch_results or 5)
681
-
682
  temperature = _effective_temperature(req.temperature)
683
  top_p = _effective_top_p(req.top_p)
684
  max_tokens = _effective_max_tokens(req.max_tokens, 8192)
 
16
  from openai import AsyncOpenAI
17
  import httpx
18
  import json
 
19
 
20
 
21
  # Load environment variables
 
70
 
71
  # Model configurations
72
  ROX_CORE_MODEL = "z-ai/glm4.7"
73
+ ROX_TURBO_MODEL = "deepseek-ai/deepseek-v3.1-terminus"
74
  ROX_CODER_MODEL = "z-ai/glm5"
75
  ROX_TURBO_45_MODEL = "qwen/qwen3-coder-480b-a35b-instruct"
76
+ ROX_ULTRA_MODEL = "deepseek-ai/deepseek-v3.2"
77
  ROX_DYNO_MODEL = "moonshotai/kimi-k2.5"
78
  ROX_CODER_7_MODEL = "z-ai/glm-5.1"
79
  ROX_VISION_MODEL = "google/gemma-3-27b-it"
 
284
  }
285
 
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  # Helper function for streaming responses
288
  async def stream_response(
289
  app_: FastAPI,
 
411
  top_p: Optional[float] = None
412
  max_tokens: Optional[int] = None
413
  stream: Optional[bool] = False
 
 
414
 
415
 
416
  class ChatResponse(BaseModel):
 
434
 
435
  @app.post("/chat")
436
  async def chat(req: ChatRequest):
437
+ """Rox Core - Main conversational model with streaming support"""
438
  messages: list = []
439
  system_prompt = _system_prompt_for("core")
440
  if system_prompt:
441
  messages.append({"role": "system", "content": system_prompt})
442
  messages.extend([m.model_dump() for m in req.messages])
 
 
 
 
 
 
 
 
 
 
443
  temperature = _effective_temperature(req.temperature)
444
  top_p = _effective_top_p(req.top_p)
445
  max_tokens = _effective_max_tokens(req.max_tokens, 8192)
 
468
 
469
  @app.post("/turbo")
470
  async def turbo(req: ChatRequest):
471
+ """Rox 2.1 Turbo - Fast and efficient with streaming"""
472
  messages: list = []
473
  system_prompt = _system_prompt_for("turbo")
474
  if system_prompt:
475
  messages.append({"role": "system", "content": system_prompt})
476
  messages.extend([m.model_dump() for m in req.messages])
 
 
 
 
 
477
  temperature = _effective_temperature(req.temperature)
478
  top_p = _effective_top_p(req.top_p)
479
  max_tokens = _effective_max_tokens(req.max_tokens, 8192)
 
502
 
503
  @app.post("/coder")
504
  async def coder(req: ChatRequest):
505
+ """Rox 3.5 Coder - Specialized coding with streaming"""
506
  messages: list = []
507
  system_prompt = _system_prompt_for("coder")
508
  if system_prompt:
509
  messages.append({"role": "system", "content": system_prompt})
510
  messages.extend([m.model_dump() for m in req.messages])
 
 
 
 
 
511
  temperature = _effective_temperature(req.temperature)
512
  top_p = _effective_top_p(req.top_p)
513
  max_tokens = _effective_max_tokens(req.max_tokens, 16384)
 
581
 
582
  @app.post("/ultra")
583
  async def ultra(req: ChatRequest):
584
+ """Rox 5 Ultra - Most advanced with streaming"""
585
  messages: list = []
586
  system_prompt = _system_prompt_for("ultra")
587
  if system_prompt:
588
  messages.append({"role": "system", "content": system_prompt})
589
  messages.extend([m.model_dump() for m in req.messages])
 
 
 
 
 
590
  temperature = _effective_temperature(req.temperature)
591
  top_p = _effective_top_p(req.top_p)
592
  max_tokens = _effective_max_tokens(req.max_tokens, 8192)