Fix sampling params

alpayariyak · alpayariyak · commit d25b6f96287d · 2024-03-12T22:15:57.000Z
diff --git a/src/engine.py b/src/engine.py
@@ -15,7 +15,6 @@
 from constants import DEFAULT_MAX_CONCURRENCY, DEFAULT_BATCH_SIZE, DEFAULT_BATCH_SIZE_GROWTH_FACTOR, DEFAULT_MIN_BATCH_SIZE
 from tokenizer import TokenizerWrapper
 from config import EngineConfig
-from sampling_params import validate_sampling_params
 
 class vLLMEngine:
     def __init__(self, engine = None):
@@ -33,10 +32,9 @@ def dynamic_batch_size(self, current_batch_size, batch_size_growth_factor):
                            
     async def generate(self, job_input: JobInput):
         try:
-            validated_sampling_params = validate_sampling_params(job_input.input_sampling_params)
             async for batch in self._generate_vllm(
                 llm_input=job_input.llm_input,
-                validated_sampling_params=validated_sampling_params,
+                validated_sampling_params=job_input.sampling_params,
                 batch_size=job_input.max_batch_size,
                 stream=job_input.stream,
                 apply_chat_template=job_input.apply_chat_template,
diff --git a/src/sampling_params.py b/src/sampling_params.py
diff --git a/src/utils.py b/src/utils.py
@@ -3,6 +3,7 @@
 from typing import Any, Dict
 from vllm.utils import random_uuid
 from vllm.entrypoints.openai.protocol import ErrorResponse
+from vllm import SamplingParams
 
 logging.basicConfig(level=logging.INFO)
 
@@ -31,7 +32,7 @@ def __init__(self, job):
         self.max_batch_size = job.get("max_batch_size")
         self.apply_chat_template = job.get("apply_chat_template", False)
         self.use_openai_format = job.get("use_openai_format", False)
-        self.input_sampling_params = job.get("sampling_params", {})
+        self.sampling_params = SamplingParams(**job.get("sampling_params", {}))
         self.request_id = random_uuid()
         batch_size_growth_factor = job.get("batch_size_growth_factor")
         self.batch_size_growth_factor = float(batch_size_growth_factor) if batch_size_growth_factor else None 
@@ -62,4 +63,6 @@ def update(self):
 def create_error_response(message: str, err_type: str = "BadRequestError", status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse:
     return ErrorResponse(message=message,
                             type=err_type,
-                            code=status_code.value)
+                            code=status_code.value)
+    
+    
diff --git a/vllm-base-image/vllm b/vllm-base-image/vllm
@@ -1 +1 @@
-Subproject commit 9cb4acdac4bfa28a25a7b3fec25db9260f7ce44e
+Subproject commit c46d230a6299ded4d9c49dee581b48fc931a5cd3