@app.post("/v1/embeddings", response_model=EmbeddingResponse) asyncdefget_embeddings(request: EmbeddingRequest): ifisinstance(request.input, str):# 判断输入是否是字符串,字符串直接编码,否则对字符串列表编码 embeddings = [embedding_model.encode(request.input)] else: embeddings = [embedding_model.encode(text) for text in request.input] embeddings = [embedding.tolist() for embedding in embeddings]
defnum_tokens_from_string(string: str) -> int: """ Returns the number of tokens in a text string. use cl100k_base tokenizer """ encoding = tiktoken.get_encoding('cl100k_base') num_tokens = len(encoding.encode(string)) return num_tokens
response = { "data": [ { "object": "embedding", "embedding": embedding, "index": index } for index, embedding inenumerate(embeddings) ], "model": request.model, "object": "list", "usage": CompletionUsage( prompt_tokens=sum(len(text.split()) for text in request.input), completion_tokens=0, total_tokens=sum(num_tokens_from_string(text) for text in request.input), ) } return response
if stop_found: break #最后一个字符跳出返回结束 # Only last stream result contains finish_reason, we set finish_reason as stop ret = { "text": response, "usage": { "prompt_tokens": input_echo_len, "completion_tokens": total_len - input_echo_len, "total_tokens": total_len, }, "finish_reason": "stop", } yield ret #内存显存收下垃圾 gc.collect() torch.cuda.empty_cache()
defprocess_chatglm_messages(messages, tools=None): _messages = messages messages = [] msg_has_sys = False if tools: messages.append( { "role": "system", "content": "Answer the following questions as best as you can. You have access to the following tools:", "tools": tools } ) msg_has_sys = True
for m in _messages: role, content, func_call = m.role, m.content, m.function_call if role == "function": messages.append( { "role": "observation", "content": content } )
elif role == "assistant"and func_call isnotNone: for response in content.split("<|assistant|>"): metadata, sub_content = response.split("\n", maxsplit=1) messages.append( { "role": role, "metadata": metadata, "content": sub_content.strip() } ) else: if role == "system"and msg_has_sys: msg_has_sys = False continue messages.append({"role": role, "content": content}) return messages
<|system|> You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown. <|user|> Hello <|assistant|> Hello, I'm ChatGLM3. What can I assist you today?