一、安装基础环境
%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
安装wangdb 登录wangdb 登录huggingface
from huggingface_hub import login
from google.colab import userdata
hf_token = userdata.get('HUGGINGFACE_TOKEN')
login(hf_token)
!pip install wandb
import wandb
wb_token = userdata.get("wandb")
wandb.login(key=wb_token)
run = wandb.init(
project='Fine-tune-DeepSeek-R1-Distill-Llama-8B on Medical COT Dataset',
job_type="training",
anonymous="allow"
)
二、下载模型,引入unsloth框架
from unsloth import FastLanguageModel
max_seq_length = 2048
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
token = hf_token,
)
三、准备训练文本,脚本
#文本风格指定
prompt_style = """下面是一个描述任务的指令,与提供进一步上下文的输入配对。编写一个适当地完成请求的响应。
在回答之前,仔细思考这个问题,建立一个循序渐进的思维链,以确保一个合乎逻辑和准确的回答
### Instruction:
你是一位在临床推理、诊断和治疗计划方面拥有先进知识的医学专家。
请回答以下医学问题。
### Question:
{}
### Response:
<think>{}"""
#问题
question = "一个66岁女性,一位长期在咳嗽或打喷嚏等活动中出现非自主性尿失禁但夜间无尿失禁的女性接受了妇科检查和棉签测试。根据这些检查结果,膀胱测压最有可能揭示她的残余尿量和逼尿肌收缩情况如何?"
#训练之前的模型回答
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")
outputs = model.generate(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=1200,
use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])
下载公共医疗数据集
train_prompt_style = """以下是描述一项任务的指令,以及提供进一步背景信息的输入内容。
请给出恰当的回答以完成请求。
在回答之前,请仔细思考问题,并构建一个逐步的思维链条,以确保回答合乎逻辑且准确无误。
### Instruction:
您是一位医学专家,在临床推理、诊断和治疗规划方面拥有高深的知识。
请回答以下医学问题
### Question:
{}
### Response:
<think>
{}
</think>
{}"""
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
inputs = examples["Question"]
cots = examples["Complex_CoT"]
outputs = examples["Response"]
texts = []
for input, cot, output in zip(inputs, cots, outputs):
text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
texts.append(text)
return {
"text": texts,
}
from datasets import load_dataset
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","zh", split = "train[0:5000]",trust_remote_code=True)
dataset = dataset.map(formatting_prompts_func, batched = True,)
dataset["text"][0]
加载训练脚本,启动微调
model = FastLanguageModel.get_peft_model(
model,
r=16,
target_modules=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
],
lora_alpha=16,
lora_dropout=0,
bias="none",
use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
random_state=3407,
use_rslora=False,
loftq_config=None,
)
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=max_seq_length,
dataset_num_proc=2,
args=TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
# Use num_train_epochs = 1, warmup_ratio for full training runs!
warmup_steps=5,
max_steps=60,
learning_rate=2e-4,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
logging_steps=10,
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="linear",
seed=3407,
output_dir="outputs",
),
)
#开始训练
trainer_stats = trainer.train()
四、使用训练后的模型进行问题回答
FastLanguageModel.for_inference(model) # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")
outputs = model.generate(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=1200,
use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])
合并微调后的模型进行发布
new_model_online = "zhigang1237/DeepSeek-R1-Medical-COT"
#model.push_to_hub(new_model_online) 单独发布loar文件
#tokenizer.push_to_hub(new_model_online) 单独发布loar文件
#合并发布
model.push_to_hub_merged(new_model_online, tokenizer, save_method = "merged_16bit")