يتطلب توسيم إدارة المحتوى اعتباراً دقيقاً لرفاهية المُوسِّمين وإرشادات واضحة للمحتوى الشخصي وضبط جودة قوي. يغطي هذا الدليل أفضل الممارسات لتصنيف المحتوى السام.

رفاهية المُوسِّمين

التعرض للمحتوى الضار يمكن أن يؤثر على الصحة النفسية للمُوسِّمين. نفّذ تدابير وقائية:

تكوين الرفاهية

yaml

wellbeing:
  # Content warnings
  warnings:
    enabled: true
    show_before_session: true
    message: |
      This task involves reviewing potentially offensive content including
      hate speech, harassment, and explicit material. Take breaks as needed.
 
  # Break reminders
  breaks:
    enabled: true
    reminder_interval: 30  # minutes
    break_duration: 5  # suggested minutes
    message: "Consider taking a short break. Your wellbeing matters."
 
  # Session limits
  limits:
    max_session_duration: 120  # minutes
    max_items_per_session: 100
    cooldown_between_sessions: 60  # minutes
 
  # Easy exit
  exit:
    allow_immediate_exit: true
    no_penalty_exit: true
    exit_button_prominent: true
    exit_message: "No problem. Take care of yourself."
 
  # Support resources
  resources:
    show_support_link: true
    support_url: "https://yourorg.com/support"
    hotline_number: "1-800-XXX-XXXX"

طمس المحتوى

yaml

display:
  # Blur images by default
  image_display:
    blur_by_default: true
    blur_amount: 20
    click_to_reveal: true
    reveal_duration: 10  # auto-blur after 10 seconds
 
  # Text content warnings
  text_display:
    show_severity_indicator: true
    expandable_content: true
    default_collapsed: true

تصنيف السمية

السمية متعددة المستويات

yaml

annotation_schemes:
  - annotation_type: radio
    name: toxicity_level
    question: "Rate the toxicity level of this content"
    options:
      - name: none
        label: "Not Toxic"
        description: "No harmful content"
 
      - name: mild
        label: "Mildly Toxic"
        description: "Rude or insensitive but not severe"
 
      - name: moderate
        label: "Moderately Toxic"
        description: "Clearly offensive or harmful"
 
      - name: severe
        label: "Severely Toxic"
        description: "Extremely offensive, threatening, or dangerous"

فئات السمية

yaml

annotation_schemes:
  - annotation_type: multiselect
    name: toxicity_types
    question: "Select all types of toxicity present"
    options:
      - name: profanity
        label: "Profanity/Obscenity"
        description: "Swear words, vulgar language"
 
      - name: insult
        label: "Insults"
        description: "Personal attacks, name-calling"
 
      - name: threat
        label: "Threats"
        description: "Threats of violence or harm"
 
      - name: hate_speech
        label: "Hate Speech"
        description: "Targeting protected groups"
 
      - name: harassment
        label: "Harassment"
        description: "Targeted, persistent hostility"
 
      - name: sexual
        label: "Sexual Content"
        description: "Explicit or suggestive content"
 
      - name: self_harm
        label: "Self-Harm/Suicide"
        description: "Promoting or glorifying self-harm"
 
      - name: misinformation
        label: "Misinformation"
        description: "Demonstrably false claims"
 
      - name: spam
        label: "Spam/Scam"
        description: "Unwanted promotional content"

اكتشاف خطاب الكراهية

المجموعات المستهدفة

yaml

annotation_schemes:
  - annotation_type: multiselect
    name: target_groups
    question: "Which groups are targeted? (if hate speech detected)"
    depends_on:
      field: toxicity_types
      contains: hate_speech
 
    options:
      - name: race_ethnicity
        label: "Race/Ethnicity"
 
      - name: religion
        label: "Religion"
 
      - name: gender
        label: "Gender"
 
      - name: sexual_orientation
        label: "Sexual Orientation"
 
      - name: disability
        label: "Disability"
 
      - name: nationality
        label: "Nationality/Origin"
 
      - name: age
        label: "Age"
 
      - name: other
        label: "Other Protected Group"

شدة خطاب الكراهية

yaml

annotation_schemes:
  - annotation_type: radio
    name: hate_severity
    question: "Severity of hate speech"
    depends_on:
      field: toxicity_types
      contains: hate_speech
 
    options:
      - name: implicit
        label: "Implicit"
        description: "Coded language, dog whistles"
 
      - name: explicit_mild
        label: "Explicit - Mild"
        description: "Clear but not threatening"
 
      - name: explicit_severe
        label: "Explicit - Severe"
        description: "Dehumanizing, threatening, or violent"

الإدارة السياقية

قواعد خاصة بالمنصة

yaml

# Context affects what's acceptable
annotation_schemes:
  - annotation_type: radio
    name: context_appropriate
    question: "Is this content appropriate for the platform context?"
    context_info:
      platform: "{{metadata.platform}}"
      community: "{{metadata.community}}"
      audience: "{{metadata.audience}}"
 
    options:
      - name: appropriate
        label: "Appropriate for Context"
 
      - name: borderline
        label: "Borderline"
 
      - name: inappropriate
        label: "Inappropriate for Context"
 
  - annotation_type: text
    name: context_notes
    question: "Explain your contextual reasoning"
    depends_on:
      field: context_appropriate
      value: borderline

اكتشاف النية

yaml

annotation_schemes:
  - annotation_type: radio
    name: intent
    question: "What is the apparent intent?"
    options:
      - name: genuine_attack
        label: "Genuine Attack"
        description: "Intent to harm or offend"
 
      - name: satire
        label: "Satire/Parody"
        description: "Mocking toxic behavior"
 
      - name: quote
        label: "Quote/Report"
        description: "Reporting or discussing toxic content"
 
      - name: reclaimed
        label: "Reclaimed Language"
        description: "In-group use of slurs"
 
      - name: unclear
        label: "Unclear Intent"

إدارة محتوى الصور

تصنيف المحتوى المرئي

yaml

annotation_schemes:
  - annotation_type: multiselect
    name: image_violations
    question: "Select all policy violations"
    options:
      - name: nudity
        label: "Nudity/Sexual Content"
 
      - name: violence_graphic
        label: "Graphic Violence"
 
      - name: gore
        label: "Gore/Disturbing Content"
 
      - name: hate_symbols
        label: "Hate Symbols"
 
      - name: dangerous_acts
        label: "Dangerous Acts"
 
      - name: child_safety
        label: "Child Safety Concern"
        priority: critical
        escalate: true
 
      - name: none
        label: "No Violations"
 
  - annotation_type: radio
    name: action_recommendation
    question: "Recommended action"
    options:
      - name: approve
        label: "Approve"
      - name: age_restrict
        label: "Age-Restrict"
      - name: warning_label
        label: "Add Warning Label"
      - name: remove
        label: "Remove"
      - name: escalate
        label: "Escalate to Specialist"

ضبط الجودة

yaml

quality_control:
  # Calibration for subjective content
  calibration:
    enabled: true
    frequency: 20  # Every 20 items
    items: calibration/moderation_gold.json
    feedback: true
    recalibrate_on_drift: true
 
  # High redundancy for borderline cases
  redundancy:
    annotations_per_item: 3
    increase_for_borderline: 5
    agreement_threshold: 0.67
 
  # Expert escalation
  escalation:
    enabled: true
    triggers:
      - field: toxicity_level
        value: severe
      - field: image_violations
        contains: child_safety
    escalate_to: trust_safety_team
 
  # Distribution monitoring
  monitoring:
    track_distribution: true
    alert_on_skew: true
    expected_distribution:
      none: 0.4
      mild: 0.3
      moderate: 0.2
      severe: 0.1

التكوين الكامل

yaml

annotation_task_name: "Content Moderation"
 
# Wellbeing first
wellbeing:
  warnings:
    enabled: true
    message: "This task contains potentially offensive content."
  breaks:
    reminder_interval: 30
    message: "Remember to take breaks."
  limits:
    max_session_duration: 90
    max_items_per_session: 75
 
display:
  # Blur sensitive content
  image_display:
    blur_by_default: true
    click_to_reveal: true
 
  # Show platform context
  metadata_display:
    show_fields: [platform, community, report_reason]
 
annotation_schemes:
  # Toxicity level
  - annotation_type: radio
    name: toxicity
    question: "Toxicity level"
    options:
      - name: none
        label: "None"
      - name: mild
        label: "Mild"
      - name: moderate
        label: "Moderate"
      - name: severe
        label: "Severe"
 
  # Categories
  - annotation_type: multiselect
    name: categories
    question: "Types of harmful content (select all)"
    options:
      - name: hate
        label: "Hate Speech"
      - name: harassment
        label: "Harassment"
      - name: violence
        label: "Violence/Threats"
      - name: sexual
        label: "Sexual Content"
      - name: self_harm
        label: "Self-Harm"
      - name: spam
        label: "Spam"
      - name: none
        label: "None"
 
  # Confidence
  - annotation_type: likert
    name: confidence
    question: "How confident are you?"
    size: 5
    min_label: "Uncertain"
    max_label: "Very Confident"
 
  # Notes
  - annotation_type: text
    name: notes
    question: "Additional notes (optional)"
    multiline: true
 
quality_control:
  redundancy:
    annotations_per_item: 3
  calibration:
    enabled: true
    frequency: 25
  escalation:
    enabled: true
    triggers:
      - field: toxicity
        value: severe
 
output_annotation_dir: annotations/
export_annotation_format: jsonl

أفضل ممارسات الإرشادات

عند كتابة إرشادات الإدارة:

حدد عتبات واضحة: ما الذي يجعل شيئاً "خفيفاً" مقابل "متوسطاً"؟
قدم أمثلة: أظهر الحالات الحدية مع التوضيحات
عالج السياق: كيف تؤثر المنصة/الجمهور على القرارات؟
تعامل مع الغموض: ماذا تفعل عندما تكون النية غير واضحة
حدّث بانتظام: الإرشادات تتطور مع أنواع المحتوى الجديدة

دعم المُوسِّمين

التناوب: لا تعين نفس الشخص للمحتوى السام باستمرار
الوصول للدعم: وصول سهل لموارد الصحة النفسية
قنوات التغذية الراجعة: اسمح للمُوسِّمين بالإبلاغ عن المخاوف
التقدير: اعترف بصعوبة العمل

الوثائق الكاملة في /docs/core-concepts/annotation-schemes.