Upload 13 files
Browse files- README.md +1 -27
- python-version +1 -0
- server.py +6 -98
README.md
CHANGED
|
@@ -35,22 +35,9 @@ curl -X POST https://Rox-Turbo-API.hf.space/chat \
|
|
| 35 |
-d '{"messages":[{"role":"user","content":"Hello"}]}'
|
| 36 |
```
|
| 37 |
|
| 38 |
-
### With Web Search (NEW!)
|
| 39 |
-
|
| 40 |
-
Get real-time information from the web:
|
| 41 |
-
|
| 42 |
-
```bash
|
| 43 |
-
curl -X POST https://Rox-Turbo-API.hf.space/chat \
|
| 44 |
-
-H "Content-Type: application/json" \
|
| 45 |
-
-d '{
|
| 46 |
-
"messages":[{"role":"user","content":"Latest AI news?"}],
|
| 47 |
-
"websearch": true
|
| 48 |
-
}'
|
| 49 |
-
```
|
| 50 |
-
|
| 51 |
## Usage
|
| 52 |
|
| 53 |
-
###
|
| 54 |
|
| 55 |
```json
|
| 56 |
{
|
|
@@ -62,18 +49,6 @@ curl -X POST https://Rox-Turbo-API.hf.space/chat \
|
|
| 62 |
}
|
| 63 |
```
|
| 64 |
|
| 65 |
-
### With Web Search
|
| 66 |
-
|
| 67 |
-
```json
|
| 68 |
-
{
|
| 69 |
-
"messages": [
|
| 70 |
-
{"role": "user", "content": "What's new in AI?"}
|
| 71 |
-
],
|
| 72 |
-
"websearch": true,
|
| 73 |
-
"websearch_results": 5
|
| 74 |
-
}
|
| 75 |
-
```
|
| 76 |
-
|
| 77 |
### Response
|
| 78 |
|
| 79 |
```json
|
|
@@ -136,7 +111,6 @@ curl -X POST https://Rox-Turbo-API.hf.space/turbo45 \
|
|
| 136 |
|
| 137 |
## Documentation
|
| 138 |
|
| 139 |
-
- [Web Search Guide](docs/WEBSEARCH.md) - Real-time web search
|
| 140 |
- [Code Examples](docs/CODE.md) - Copy-paste code
|
| 141 |
- [Developer Guide](docs/DEVELOPER_GUIDE.md) - Integration guide
|
| 142 |
- [Models Guide](docs/MODELS.md) - Model details
|
|
|
|
| 35 |
-d '{"messages":[{"role":"user","content":"Hello"}]}'
|
| 36 |
```
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
## Usage
|
| 39 |
|
| 40 |
+
### Request
|
| 41 |
|
| 42 |
```json
|
| 43 |
{
|
|
|
|
| 49 |
}
|
| 50 |
```
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
### Response
|
| 53 |
|
| 54 |
```json
|
|
|
|
| 111 |
|
| 112 |
## Documentation
|
| 113 |
|
|
|
|
| 114 |
- [Code Examples](docs/CODE.md) - Copy-paste code
|
| 115 |
- [Developer Guide](docs/DEVELOPER_GUIDE.md) - Integration guide
|
| 116 |
- [Models Guide](docs/MODELS.md) - Model details
|
python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.14
|
server.py
CHANGED
|
@@ -16,7 +16,6 @@ from pydantic import BaseModel, Field
|
|
| 16 |
from openai import AsyncOpenAI
|
| 17 |
import httpx
|
| 18 |
import json
|
| 19 |
-
from websearch import perform_web_search
|
| 20 |
|
| 21 |
|
| 22 |
# Load environment variables
|
|
@@ -71,10 +70,10 @@ SYSTEM_PROMPT_MODE = os.getenv("SYSTEM_PROMPT_MODE", "full").strip().lower()
|
|
| 71 |
|
| 72 |
# Model configurations
|
| 73 |
ROX_CORE_MODEL = "z-ai/glm4.7"
|
| 74 |
-
ROX_TURBO_MODEL = "deepseek-ai/deepseek-v3.
|
| 75 |
ROX_CODER_MODEL = "z-ai/glm5"
|
| 76 |
ROX_TURBO_45_MODEL = "qwen/qwen3-coder-480b-a35b-instruct"
|
| 77 |
-
ROX_ULTRA_MODEL = "deepseek-ai/deepseek-
|
| 78 |
ROX_DYNO_MODEL = "moonshotai/kimi-k2.5"
|
| 79 |
ROX_CODER_7_MODEL = "z-ai/glm-5.1"
|
| 80 |
ROX_VISION_MODEL = "google/gemma-3-27b-it"
|
|
@@ -285,70 +284,6 @@ def _sse_headers() -> dict:
|
|
| 285 |
}
|
| 286 |
|
| 287 |
|
| 288 |
-
async def _handle_websearch(
|
| 289 |
-
app_: FastAPI,
|
| 290 |
-
messages: list,
|
| 291 |
-
websearch_enabled: bool,
|
| 292 |
-
num_results: int
|
| 293 |
-
) -> list:
|
| 294 |
-
"""
|
| 295 |
-
Handle web search if enabled and inject results into messages
|
| 296 |
-
|
| 297 |
-
Args:
|
| 298 |
-
app_: FastAPI app instance
|
| 299 |
-
messages: Current message list
|
| 300 |
-
websearch_enabled: Whether web search is enabled
|
| 301 |
-
num_results: Number of search results to fetch
|
| 302 |
-
|
| 303 |
-
Returns:
|
| 304 |
-
Updated messages list with search results if applicable
|
| 305 |
-
"""
|
| 306 |
-
if not websearch_enabled or not messages:
|
| 307 |
-
return messages
|
| 308 |
-
|
| 309 |
-
# Get the last user message as search query
|
| 310 |
-
last_user_msg = None
|
| 311 |
-
for msg in reversed(messages):
|
| 312 |
-
if msg.get("role") == "user":
|
| 313 |
-
last_user_msg = msg.get("content", "")
|
| 314 |
-
break
|
| 315 |
-
|
| 316 |
-
if not last_user_msg:
|
| 317 |
-
return messages
|
| 318 |
-
|
| 319 |
-
# Perform web search
|
| 320 |
-
http_client = getattr(app_.state, "http_client", None)
|
| 321 |
-
if not http_client:
|
| 322 |
-
logger.warning("HTTP client not available for web search")
|
| 323 |
-
return messages
|
| 324 |
-
|
| 325 |
-
try:
|
| 326 |
-
success, search_results, error = await perform_web_search(
|
| 327 |
-
http_client,
|
| 328 |
-
last_user_msg,
|
| 329 |
-
num_results
|
| 330 |
-
)
|
| 331 |
-
|
| 332 |
-
if success and search_results:
|
| 333 |
-
# Inject search results before the last user message
|
| 334 |
-
search_context = {
|
| 335 |
-
"role": "system",
|
| 336 |
-
"content": f"[Web Search Results]\n{search_results}\n\nUse these search results to provide an accurate, up-to-date answer to the user's question."
|
| 337 |
-
}
|
| 338 |
-
|
| 339 |
-
# Insert search results before last user message
|
| 340 |
-
messages_copy = messages[:-1] + [search_context, messages[-1]]
|
| 341 |
-
logger.info("Web search completed: %d results", num_results)
|
| 342 |
-
return messages_copy
|
| 343 |
-
else:
|
| 344 |
-
if error:
|
| 345 |
-
logger.warning("Web search failed: %s", error)
|
| 346 |
-
except Exception as e:
|
| 347 |
-
logger.error("Web search error: %s", str(e))
|
| 348 |
-
|
| 349 |
-
return messages
|
| 350 |
-
|
| 351 |
-
|
| 352 |
# Helper function for streaming responses
|
| 353 |
async def stream_response(
|
| 354 |
app_: FastAPI,
|
|
@@ -476,8 +411,6 @@ class ChatRequest(BaseModel):
|
|
| 476 |
top_p: Optional[float] = None
|
| 477 |
max_tokens: Optional[int] = None
|
| 478 |
stream: Optional[bool] = False
|
| 479 |
-
websearch: Optional[bool] = False
|
| 480 |
-
websearch_results: Optional[int] = 5
|
| 481 |
|
| 482 |
|
| 483 |
class ChatResponse(BaseModel):
|
|
@@ -501,22 +434,12 @@ class HFResponseItem(BaseModel):
|
|
| 501 |
|
| 502 |
@app.post("/chat")
|
| 503 |
async def chat(req: ChatRequest):
|
| 504 |
-
"""Rox Core - Main conversational model with streaming support
|
| 505 |
messages: list = []
|
| 506 |
system_prompt = _system_prompt_for("core")
|
| 507 |
if system_prompt:
|
| 508 |
messages.append({"role": "system", "content": system_prompt})
|
| 509 |
messages.extend([m.model_dump() for m in req.messages])
|
| 510 |
-
|
| 511 |
-
# Handle web search if enabled
|
| 512 |
-
if req.websearch:
|
| 513 |
-
messages = await _handle_websearch(
|
| 514 |
-
app,
|
| 515 |
-
messages,
|
| 516 |
-
req.websearch,
|
| 517 |
-
req.websearch_results or 5
|
| 518 |
-
)
|
| 519 |
-
|
| 520 |
temperature = _effective_temperature(req.temperature)
|
| 521 |
top_p = _effective_top_p(req.top_p)
|
| 522 |
max_tokens = _effective_max_tokens(req.max_tokens, 8192)
|
|
@@ -545,17 +468,12 @@ async def chat(req: ChatRequest):
|
|
| 545 |
|
| 546 |
@app.post("/turbo")
|
| 547 |
async def turbo(req: ChatRequest):
|
| 548 |
-
"""Rox 2.1 Turbo - Fast and efficient with streaming
|
| 549 |
messages: list = []
|
| 550 |
system_prompt = _system_prompt_for("turbo")
|
| 551 |
if system_prompt:
|
| 552 |
messages.append({"role": "system", "content": system_prompt})
|
| 553 |
messages.extend([m.model_dump() for m in req.messages])
|
| 554 |
-
|
| 555 |
-
# Handle web search if enabled
|
| 556 |
-
if req.websearch:
|
| 557 |
-
messages = await _handle_websearch(app, messages, req.websearch, req.websearch_results or 5)
|
| 558 |
-
|
| 559 |
temperature = _effective_temperature(req.temperature)
|
| 560 |
top_p = _effective_top_p(req.top_p)
|
| 561 |
max_tokens = _effective_max_tokens(req.max_tokens, 8192)
|
|
@@ -584,17 +502,12 @@ async def turbo(req: ChatRequest):
|
|
| 584 |
|
| 585 |
@app.post("/coder")
|
| 586 |
async def coder(req: ChatRequest):
|
| 587 |
-
"""Rox 3.5 Coder - Specialized coding with streaming
|
| 588 |
messages: list = []
|
| 589 |
system_prompt = _system_prompt_for("coder")
|
| 590 |
if system_prompt:
|
| 591 |
messages.append({"role": "system", "content": system_prompt})
|
| 592 |
messages.extend([m.model_dump() for m in req.messages])
|
| 593 |
-
|
| 594 |
-
# Handle web search if enabled
|
| 595 |
-
if req.websearch:
|
| 596 |
-
messages = await _handle_websearch(app, messages, req.websearch, req.websearch_results or 5)
|
| 597 |
-
|
| 598 |
temperature = _effective_temperature(req.temperature)
|
| 599 |
top_p = _effective_top_p(req.top_p)
|
| 600 |
max_tokens = _effective_max_tokens(req.max_tokens, 16384)
|
|
@@ -668,17 +581,12 @@ async def turbo45(req: ChatRequest):
|
|
| 668 |
|
| 669 |
@app.post("/ultra")
|
| 670 |
async def ultra(req: ChatRequest):
|
| 671 |
-
"""Rox 5 Ultra - Most advanced with streaming
|
| 672 |
messages: list = []
|
| 673 |
system_prompt = _system_prompt_for("ultra")
|
| 674 |
if system_prompt:
|
| 675 |
messages.append({"role": "system", "content": system_prompt})
|
| 676 |
messages.extend([m.model_dump() for m in req.messages])
|
| 677 |
-
|
| 678 |
-
# Handle web search if enabled
|
| 679 |
-
if req.websearch:
|
| 680 |
-
messages = await _handle_websearch(app, messages, req.websearch, req.websearch_results or 5)
|
| 681 |
-
|
| 682 |
temperature = _effective_temperature(req.temperature)
|
| 683 |
top_p = _effective_top_p(req.top_p)
|
| 684 |
max_tokens = _effective_max_tokens(req.max_tokens, 8192)
|
|
|
|
| 16 |
from openai import AsyncOpenAI
|
| 17 |
import httpx
|
| 18 |
import json
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
# Load environment variables
|
|
|
|
| 70 |
|
| 71 |
# Model configurations
|
| 72 |
ROX_CORE_MODEL = "z-ai/glm4.7"
|
| 73 |
+
ROX_TURBO_MODEL = "deepseek-ai/deepseek-v3.1-terminus"
|
| 74 |
ROX_CODER_MODEL = "z-ai/glm5"
|
| 75 |
ROX_TURBO_45_MODEL = "qwen/qwen3-coder-480b-a35b-instruct"
|
| 76 |
+
ROX_ULTRA_MODEL = "deepseek-ai/deepseek-v3.2"
|
| 77 |
ROX_DYNO_MODEL = "moonshotai/kimi-k2.5"
|
| 78 |
ROX_CODER_7_MODEL = "z-ai/glm-5.1"
|
| 79 |
ROX_VISION_MODEL = "google/gemma-3-27b-it"
|
|
|
|
| 284 |
}
|
| 285 |
|
| 286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
# Helper function for streaming responses
|
| 288 |
async def stream_response(
|
| 289 |
app_: FastAPI,
|
|
|
|
| 411 |
top_p: Optional[float] = None
|
| 412 |
max_tokens: Optional[int] = None
|
| 413 |
stream: Optional[bool] = False
|
|
|
|
|
|
|
| 414 |
|
| 415 |
|
| 416 |
class ChatResponse(BaseModel):
|
|
|
|
| 434 |
|
| 435 |
@app.post("/chat")
|
| 436 |
async def chat(req: ChatRequest):
|
| 437 |
+
"""Rox Core - Main conversational model with streaming support"""
|
| 438 |
messages: list = []
|
| 439 |
system_prompt = _system_prompt_for("core")
|
| 440 |
if system_prompt:
|
| 441 |
messages.append({"role": "system", "content": system_prompt})
|
| 442 |
messages.extend([m.model_dump() for m in req.messages])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
temperature = _effective_temperature(req.temperature)
|
| 444 |
top_p = _effective_top_p(req.top_p)
|
| 445 |
max_tokens = _effective_max_tokens(req.max_tokens, 8192)
|
|
|
|
| 468 |
|
| 469 |
@app.post("/turbo")
|
| 470 |
async def turbo(req: ChatRequest):
|
| 471 |
+
"""Rox 2.1 Turbo - Fast and efficient with streaming"""
|
| 472 |
messages: list = []
|
| 473 |
system_prompt = _system_prompt_for("turbo")
|
| 474 |
if system_prompt:
|
| 475 |
messages.append({"role": "system", "content": system_prompt})
|
| 476 |
messages.extend([m.model_dump() for m in req.messages])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
temperature = _effective_temperature(req.temperature)
|
| 478 |
top_p = _effective_top_p(req.top_p)
|
| 479 |
max_tokens = _effective_max_tokens(req.max_tokens, 8192)
|
|
|
|
| 502 |
|
| 503 |
@app.post("/coder")
|
| 504 |
async def coder(req: ChatRequest):
|
| 505 |
+
"""Rox 3.5 Coder - Specialized coding with streaming"""
|
| 506 |
messages: list = []
|
| 507 |
system_prompt = _system_prompt_for("coder")
|
| 508 |
if system_prompt:
|
| 509 |
messages.append({"role": "system", "content": system_prompt})
|
| 510 |
messages.extend([m.model_dump() for m in req.messages])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
temperature = _effective_temperature(req.temperature)
|
| 512 |
top_p = _effective_top_p(req.top_p)
|
| 513 |
max_tokens = _effective_max_tokens(req.max_tokens, 16384)
|
|
|
|
| 581 |
|
| 582 |
@app.post("/ultra")
|
| 583 |
async def ultra(req: ChatRequest):
|
| 584 |
+
"""Rox 5 Ultra - Most advanced with streaming"""
|
| 585 |
messages: list = []
|
| 586 |
system_prompt = _system_prompt_for("ultra")
|
| 587 |
if system_prompt:
|
| 588 |
messages.append({"role": "system", "content": system_prompt})
|
| 589 |
messages.extend([m.model_dump() for m in req.messages])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
temperature = _effective_temperature(req.temperature)
|
| 591 |
top_p = _effective_top_p(req.top_p)
|
| 592 |
max_tokens = _effective_max_tokens(req.max_tokens, 8192)
|