垃圾分类 Agent — Qwen3-VL + OpenVINO
基于 modelscope-workshop 项目,使用 OpenVINO 运行 Qwen3-VL,对垃圾图片进行自动分类并给出处理建议。
前置条件
- 虚拟环境已激活:
source ~/modelscope-workshop/ov_workshop/bin/activate - 模型目录存在:
~/modelscope-workshop/lab1-multimodal-vlm/Qwen3-VL-4B-Instruct-int4-ov/
加载模型
import os
from optimum.intel.openvino import OVModelForVisualCausalLM
from transformers import AutoProcessor
model_dir = os.path.expanduser("~/modelscope-workshop/lab1-multimodal-vlm/Qwen3-VL-4B-Instruct-int4-ov")
model = OVModelForVisualCausalLM.from_pretrained(model_dir, device="AUTO")
print("✅ 模型加载完成")
min_pixels = 256 * 28 * 28
max_pixels = 1280 * 28 * 28
processor = AutoProcessor.from_pretrained(
model_dir, min_pixels=min_pixels, max_pixels=max_pixels, fix_mistral_regex=True
)
若模型不存在,先下载:
from pathlib import Path from modelscope import snapshot_download model_dir = Path("~/modelscope-workshop/lab1-multimodal-vlm/Qwen3-VL-4B-Instruct-int4-ov").expanduser() if not model_dir.exists(): snapshot_download("snake7gun/Qwen3-VL-4B-Instruct-int4-ov", local_dir=str(model_dir))
垃圾分类推理
核心函数
import json
import re
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Dict
from PIL import Image
CATEGORIES = ["可回收物", "有害垃圾", "厨余垃圾", "其他垃圾"]
def _extract_json(text: str) -> Dict:
"""优先解析模型输出中的 JSON;失败时返回空字典。"""
text = text.strip()
candidate = text
match = re.search(r"\{[\s\S]*\}", text)
if match:
candidate = match.group(0)
try:
return json.loads(candidate)
except Exception:
return {}
def _normalize_category(raw: str) -> str:
"""将模型输出的分类标签归一化到标准四分类。"""
raw = (raw or "").strip()
for c in CATEGORIES:
if c in raw:
return c
keyword_map = {
"可回收": "可回收物",
"有害": "有害垃圾",
"厨余": "厨余垃圾",
"湿垃圾": "厨余垃圾",
"干垃圾": "其他垃圾",
"其他": "其他垃圾",
}
for k, v in keyword_map.items():
if k in raw:
return v
return "其他垃圾"
def classify_waste(image_path: Path, user_note: str = "") -> Dict:
"""垃圾分类 Agent:输入图片路径,输出结构化分类结果。"""
prompt = f"""
你是一个垃圾分类助手。请根据图片内容判断主要垃圾类别。
分类只能从以下四类中选择一项:{', '.join(CATEGORIES)}。
用户补充信息:{user_note or '无'}
请严格输出 JSON(不要额外文字),格式如下:
{{
"category": "可回收物|有害垃圾|厨余垃圾|其他垃圾",
"reason": "简要理由,不超过40字",
"confidence": 0.0,
"advice": "处理建议,不超过40字"
}}
""".strip()
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": str(image_path)},
{"type": "text", "text": prompt},
],
}
]
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt",
)
output_ids = model.generate(**inputs, max_new_tokens=160, do_sample=False)
generated_trimmed = [out[len(inp):] for inp, out in zip(inputs.input_ids, output_ids)]
text = processor.batch_decode(
generated_trimmed,
skip_special_tokens=True,
clean_up_tokenization_spaces=False,
)[0].strip()
data = _extract_json(text)
category = _normalize_category(str(data.get("category", "")))
reason = str(data.get("reason", "未提供理由"))
advice = str(data.get("advice", "请按当地垃圾分类规范投放。"))
confidence = data.get("confidence", 0.5)
try:
confidence = float(confidence)
except Exception:
confidence = 0.5
confidence = max(0.0, min(1.0, confidence))
return {
"category": category,
"reason": reason,
"confidence": confidence,
"advice": advice,
"raw_output": text,
}
def classify_waste_image(image: Image.Image, user_note: str = "") -> Dict:
"""接受 PIL Image 对象的包装函数。"""
if image is None:
return {
"category": "其他垃圾",
"reason": "未上传图片",
"confidence": 0.0,
"advice": "请先上传图片再分类。",
"raw_output": "",
}
with NamedTemporaryFile(suffix=".png", delete=False) as f:
tmp_path = Path(f.name)
image.save(tmp_path)
try:
result = classify_waste(tmp_path, user_note)
finally:
if tmp_path.exists():
tmp_path.unlink()
return result
使用示例
# 对本地图片分类
result = classify_waste(Path("waste.jpg"))
print(json.dumps(result, ensure_ascii=False, indent=2))
# 输出示例:
# {
# "category": "可回收物",
# "reason": "塑料瓶为可回收材料,无有害成分",
# "confidence": 0.98,
# "advice": "投放至可回收物垃圾桶",
# "raw_output": "{ ... }"
# }
# 带补充信息分类
result = classify_waste(Path("waste.jpg"), user_note="好像是电池")
# → category 会归为 "有害垃圾"
Gradio 交互式演示
import gradio as gr
def waste_agent_demo(image, user_note):
result = classify_waste_image(image, user_note)
answer = (
f"分类结果:{result['category']}\n"
f"置信度:{result['confidence']:.2f}\n"
f"判断理由:{result['reason']}\n"
f"处理建议:{result['advice']}"
)
return answer, result
with gr.Blocks(title="垃圾分类 Agent") as demo:
gr.Markdown("# 垃圾分类 Agent\n上传图片,自动判断垃圾类别并给出处理建议。")
with gr.Row():
image_input = gr.Image(type="pil", label="上传垃圾图片")
with gr.Column():
note_input = gr.Textbox(label="补充描述(可选)", placeholder="例如:电池、塑料瓶、果皮等")
run_button = gr.Button("开始分类")
text_output = gr.Textbox(label="分类结果")
json_output = gr.JSON(label="结构化输出")
run_button.click(waste_agent_demo, [image_input, note_input], [text_output, json_output])
demo.launch(share=False) # 浏览器打开 http://127.0.0.1:7860
输出说明
分类结果为结构化 JSON,包含以下字段:
| 字段 | 类型 | 说明 |
| ------------ | ------ | ----------------------------------------------------------- |
| category | string | 分类结果:可回收物 / 有害垃圾 / 厨余垃圾 / 其他垃圾 |
| reason | string | 判断理由(不超过 40 字) |
| confidence | float | 置信度(0.0 ~ 1.0) |
| advice | string | 处理建议(不超过 40 字) |
| raw_output | string | 模型原始输出文本 |
常见错误排查
| 错误 | 原因 | 解决方法 |
| ------------------------------ | ------------------ | ------------------------------------------------------------ |
| FileNotFoundError: model_dir | 模型未下载 | 确认模型目录路径正确,或运行上方下载代码 |
| JSON 解析失败返回空字典 | 模型输出格式不稳定 | category 会归一化到 其他垃圾,可调大 max_new_tokens 重试 |
| Gradio 端口占用 | 7860 端口被占用 | demo.launch(server_port=7861) |
Scan to join WeChat group