Hare-1.1B-Chat

我要开发同款
匿名用户2024年07月31日
20阅读
所属分类ai、mistral
开源地址https://modelscope.cn/models/LiteAITeam/Hare-1.1B-Chat
授权协议apache-2.0

作品详情

Lite-AI

Hare-1.1B-Chat是由Hare-1.1B-base微调而来,支持多轮对话。有关模型详细细节,请参照Hare-1.1B-base

推理

import torch

from transformers import GenerationConfig
from transformers import AutoTokenizer, AutoModelForCausalLM

def chat(
    messages,
    model,
    tokenizer,
    generate_config=None,
    max_length=512,
    max_new_tokens=256,
):
    if generate_config is None:
        generate_config = GenerationConfig(
            do_sample=False,
            max_length=max_length,
            max_new_tokens=max_new_tokens,
            eos_token_id=32001,
        )

    if messages[0]["role"] == "system":
        system = messages[0]["content"]
        messages = messages[0:]
    else:
        system = "You are a helpful assistant."

    n_token = max_length
    system = "<round_start>system\n{}<round_end>\n".format(system)
    system_token = tokenizer.encode(system, add_special_tokens=False)
    n_token -= len(system_token)

    query = messages[-1]["content"]
    query = "<round_start>user\n{}<round_end>\n<round_start>assistant\n".format(query)
    query_token = tokenizer.encode(query, add_special_tokens=False)
    n_token -= len(query_token)

    messages = messages[:-1]
    conversations = []
    for ids in range(len(messages)-1, 0, -2):
        user = messages[ids - 1]["content"]
        assistant = messages[ids]["content"]

        round = "<round_start>user\n{}<round_end>\n<round_start>assistant\n{}<round_end>\n".format(user, assistant)
        round_token = tokenizer.encode(round, add_special_tokens=False)

        if n_token - len(round_token) > 0:
            conversations = [round] + conversations
        else:
            break

    prompt = system + "".join(conversations) + query
    prompt_token = tokenizer(prompt, add_special_tokens=False, return_tensors="pt")
    prompt_token.to(model.device)

    response = model.generate(
        generation_config=generate_config,
        **prompt_token
    )
    output_tokens = response[0].cpu().numpy()[prompt_token.input_ids.size()[1]:]
    output_string = tokenizer.decode(output_tokens, skip_special_tokens=True).replace("<round_end>", "")
    return output_string, prompt

# ======================
#       main
# ======================
if __name__ == "__main__":

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model_path = "LiteAI-Team/Hare-1.1B-chat"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path)
    model.to(device)

    query = "Hello!"
    messages = [
        # {"role": "system", "content": "You are an AI assistant, aiming to always upholding high standards of performance and quality."},
        {"role": "user", "content": query}
    ]
    response, input_prompt = chat(messages=messages, model=model, tokenizer=tokenizer)

    print("=" * 25, " User ", "=" * 25,)
    print(query)
    print("=" * 25, " Assistant ", "=" * 25,)
    print(response)
声明:本文仅代表作者观点,不代表本站立场。如果侵犯到您的合法权益,请联系我们删除侵权资源!如果遇到资源链接失效,请您通过评论或工单的方式通知管理员。未经允许,不得转载,本站所有资源文章禁止商业使用运营!
下载安装【程序员客栈】APP
实时对接需求、及时收发消息、丰富的开放项目需求、随时随地查看项目状态

评论