{
  "episode_id": "rider-delivery-02_w3SZVPiNxDU_60_120",
  "task_id": 9002,
  "task_name": "outdoor_food_delivery_ego_pov_continuation",
  "task_text_en": "First-person delivery cruise: residential block ride + four intersection crossings with NYC yellow taxis, school bus, and crosswalk yields (continuation of clip 01).",
  "task_text_zh": "第一视角送餐巡航：住宅街区行驶 + 通过 4 个含纽约黄色出租车、校车和斑马线让行的交叉口（接续 clip 01）。",
  "init_text_en": "Rider continues from clip 01 on the scooter, post-pickup, heading into a wider Manhattan avenue grid with steady cruise behavior.",
  "init_text_zh": "骑手延续 clip 01，已取餐，正驾驶电动车驶入更宽的曼哈顿大道路网，进入稳定巡航。",
  "robot": "Human Delivery Rider (Ego-POV Camera)",
  "robot_kinematics": "human + e-scooter · chest/helmet-mounted camera",
  "robot_sn": "rider-cam-001",
  "scene": "Outdoor",
  "sub_scene": "urban_delivery_avenue_grid_manhattan",
  "data_gen_mode": "real_machine",
  "fps": 30,
  "duration_seconds": 60,
  "frame_count_video": 1800,
  "frame_count_annotated": 36,
  "annotation_sampling": "0.5fps base + 6 event keyframes",
  "videos": {
    "ego": {
      "src": "ego.mp4",
      "label": "EGO POV · CHEST CAM",
      "resolution": "568×320",
      "role": "primary"
    }
  },
  "annotation_layers": [
    "L1: 2D detection + tracking · 21 track IDs, 139 bbox total",
    "L3: ego action + state (mode · speed · brake · gaze)",
    "L5: bilingual captions (7 clip-level · 13 event-level · 36 keyframe-level)"
  ],
  "available_streams": [
    "RGB × 1 (ego POV, 60 s · 30 fps)",
    "L1 detection: 21 track IDs across 36 frames",
    "L3 ego state: mode / speed_bucket / brake_intensity / gaze",
    "L4 risk events: 5× T3 flagged (4 taxi-cross + 1 ped-crosswalk yield)",
    "L5 bilingual captions: 56 total entries (clip + event + keyframe)",
    "Dashboard speedometer values transcribed in object notes (13-23 mph range)"
  ],
  "spans_source": "L5 keyframe + event captions, grouped by clip"
}