Guides6 min read
إعداد توسيم إدارة المحتوى
تكوين Potato لاكتشاف السمية وتصنيف خطاب الكراهية وتوسيم المحتوى الحساس مع مراعاة رفاهية المُوسِّمين.
Potato Team·
إعداد توسيم إدارة المحتوى
يتطلب توسيم إدارة المحتوى اعتباراً دقيقاً لرفاهية المُوسِّمين وإرشادات واضحة للمحتوى الشخصي وضبط جودة قوي. يغطي هذا الدليل أفضل الممارسات لتصنيف المحتوى السام.
رفاهية المُوسِّمين
التعرض للمحتوى الضار يمكن أن يؤثر على الصحة النفسية للمُوسِّمين. نفّذ تدابير وقائية:
تكوين الرفاهية
yaml
wellbeing:
# Content warnings
warnings:
enabled: true
show_before_session: true
message: |
This task involves reviewing potentially offensive content including
hate speech, harassment, and explicit material. Take breaks as needed.
# Break reminders
breaks:
enabled: true
reminder_interval: 30 # minutes
break_duration: 5 # suggested minutes
message: "Consider taking a short break. Your wellbeing matters."
# Session limits
limits:
max_session_duration: 120 # minutes
max_items_per_session: 100
cooldown_between_sessions: 60 # minutes
# Easy exit
exit:
allow_immediate_exit: true
no_penalty_exit: true
exit_button_prominent: true
exit_message: "No problem. Take care of yourself."
# Support resources
resources:
show_support_link: true
support_url: "https://yourorg.com/support"
hotline_number: "1-800-XXX-XXXX"طمس المحتوى
yaml
display:
# Blur images by default
image_display:
blur_by_default: true
blur_amount: 20
click_to_reveal: true
reveal_duration: 10 # auto-blur after 10 seconds
# Text content warnings
text_display:
show_severity_indicator: true
expandable_content: true
default_collapsed: trueتصنيف السمية
السمية متعددة المستويات
yaml
annotation_schemes:
- annotation_type: radio
name: toxicity_level
question: "Rate the toxicity level of this content"
options:
- name: none
label: "Not Toxic"
description: "No harmful content"
- name: mild
label: "Mildly Toxic"
description: "Rude or insensitive but not severe"
- name: moderate
label: "Moderately Toxic"
description: "Clearly offensive or harmful"
- name: severe
label: "Severely Toxic"
description: "Extremely offensive, threatening, or dangerous"فئات السمية
yaml
annotation_schemes:
- annotation_type: multiselect
name: toxicity_types
question: "Select all types of toxicity present"
options:
- name: profanity
label: "Profanity/Obscenity"
description: "Swear words, vulgar language"
- name: insult
label: "Insults"
description: "Personal attacks, name-calling"
- name: threat
label: "Threats"
description: "Threats of violence or harm"
- name: hate_speech
label: "Hate Speech"
description: "Targeting protected groups"
- name: harassment
label: "Harassment"
description: "Targeted, persistent hostility"
- name: sexual
label: "Sexual Content"
description: "Explicit or suggestive content"
- name: self_harm
label: "Self-Harm/Suicide"
description: "Promoting or glorifying self-harm"
- name: misinformation
label: "Misinformation"
description: "Demonstrably false claims"
- name: spam
label: "Spam/Scam"
description: "Unwanted promotional content"اكتشاف خطاب الكراهية
المجموعات المستهدفة
yaml
annotation_schemes:
- annotation_type: multiselect
name: target_groups
question: "Which groups are targeted? (if hate speech detected)"
depends_on:
field: toxicity_types
contains: hate_speech
options:
- name: race_ethnicity
label: "Race/Ethnicity"
- name: religion
label: "Religion"
- name: gender
label: "Gender"
- name: sexual_orientation
label: "Sexual Orientation"
- name: disability
label: "Disability"
- name: nationality
label: "Nationality/Origin"
- name: age
label: "Age"
- name: other
label: "Other Protected Group"شدة خطاب الكراهية
yaml
annotation_schemes:
- annotation_type: radio
name: hate_severity
question: "Severity of hate speech"
depends_on:
field: toxicity_types
contains: hate_speech
options:
- name: implicit
label: "Implicit"
description: "Coded language, dog whistles"
- name: explicit_mild
label: "Explicit - Mild"
description: "Clear but not threatening"
- name: explicit_severe
label: "Explicit - Severe"
description: "Dehumanizing, threatening, or violent"الإدارة السياقية
قواعد خاصة بالمنصة
yaml
# Context affects what's acceptable
annotation_schemes:
- annotation_type: radio
name: context_appropriate
question: "Is this content appropriate for the platform context?"
context_info:
platform: "{{metadata.platform}}"
community: "{{metadata.community}}"
audience: "{{metadata.audience}}"
options:
- name: appropriate
label: "Appropriate for Context"
- name: borderline
label: "Borderline"
- name: inappropriate
label: "Inappropriate for Context"
- annotation_type: text
name: context_notes
question: "Explain your contextual reasoning"
depends_on:
field: context_appropriate
value: borderlineاكتشاف النية
yaml
annotation_schemes:
- annotation_type: radio
name: intent
question: "What is the apparent intent?"
options:
- name: genuine_attack
label: "Genuine Attack"
description: "Intent to harm or offend"
- name: satire
label: "Satire/Parody"
description: "Mocking toxic behavior"
- name: quote
label: "Quote/Report"
description: "Reporting or discussing toxic content"
- name: reclaimed
label: "Reclaimed Language"
description: "In-group use of slurs"
- name: unclear
label: "Unclear Intent"إدارة محتوى الصور
تصنيف المحتوى المرئي
yaml
annotation_schemes:
- annotation_type: multiselect
name: image_violations
question: "Select all policy violations"
options:
- name: nudity
label: "Nudity/Sexual Content"
- name: violence_graphic
label: "Graphic Violence"
- name: gore
label: "Gore/Disturbing Content"
- name: hate_symbols
label: "Hate Symbols"
- name: dangerous_acts
label: "Dangerous Acts"
- name: child_safety
label: "Child Safety Concern"
priority: critical
escalate: true
- name: none
label: "No Violations"
- annotation_type: radio
name: action_recommendation
question: "Recommended action"
options:
- name: approve
label: "Approve"
- name: age_restrict
label: "Age-Restrict"
- name: warning_label
label: "Add Warning Label"
- name: remove
label: "Remove"
- name: escalate
label: "Escalate to Specialist"ضبط الجودة
yaml
quality_control:
# Calibration for subjective content
calibration:
enabled: true
frequency: 20 # Every 20 items
items: calibration/moderation_gold.json
feedback: true
recalibrate_on_drift: true
# High redundancy for borderline cases
redundancy:
annotations_per_item: 3
increase_for_borderline: 5
agreement_threshold: 0.67
# Expert escalation
escalation:
enabled: true
triggers:
- field: toxicity_level
value: severe
- field: image_violations
contains: child_safety
escalate_to: trust_safety_team
# Distribution monitoring
monitoring:
track_distribution: true
alert_on_skew: true
expected_distribution:
none: 0.4
mild: 0.3
moderate: 0.2
severe: 0.1التكوين الكامل
yaml
annotation_task_name: "Content Moderation"
# Wellbeing first
wellbeing:
warnings:
enabled: true
message: "This task contains potentially offensive content."
breaks:
reminder_interval: 30
message: "Remember to take breaks."
limits:
max_session_duration: 90
max_items_per_session: 75
display:
# Blur sensitive content
image_display:
blur_by_default: true
click_to_reveal: true
# Show platform context
metadata_display:
show_fields: [platform, community, report_reason]
annotation_schemes:
# Toxicity level
- annotation_type: radio
name: toxicity
question: "Toxicity level"
options:
- name: none
label: "None"
- name: mild
label: "Mild"
- name: moderate
label: "Moderate"
- name: severe
label: "Severe"
# Categories
- annotation_type: multiselect
name: categories
question: "Types of harmful content (select all)"
options:
- name: hate
label: "Hate Speech"
- name: harassment
label: "Harassment"
- name: violence
label: "Violence/Threats"
- name: sexual
label: "Sexual Content"
- name: self_harm
label: "Self-Harm"
- name: spam
label: "Spam"
- name: none
label: "None"
# Confidence
- annotation_type: likert
name: confidence
question: "How confident are you?"
size: 5
min_label: "Uncertain"
max_label: "Very Confident"
# Notes
- annotation_type: text
name: notes
question: "Additional notes (optional)"
multiline: true
quality_control:
redundancy:
annotations_per_item: 3
calibration:
enabled: true
frequency: 25
escalation:
enabled: true
triggers:
- field: toxicity
value: severe
output_annotation_dir: annotations/
output_annotation_format: jsonlأفضل ممارسات الإرشادات
عند كتابة إرشادات الإدارة:
- حدد عتبات واضحة: ما الذي يجعل شيئاً "خفيفاً" مقابل "متوسطاً"؟
- قدم أمثلة: أظهر الحالات الحدية مع التوضيحات
- عالج السياق: كيف تؤثر المنصة/الجمهور على القرارات؟
- تعامل مع الغموض: ماذا تفعل عندما تكون النية غير واضحة
- حدّث بانتظام: الإرشادات تتطور مع أنواع المحتوى الجديدة
دعم المُوسِّمين
- التناوب: لا تعين نفس الشخص للمحتوى السام باستمرار
- الوصول للدعم: وصول سهل لموارد الصحة النفسية
- قنوات التغذية الراجعة: اسمح للمُوسِّمين بالإبلاغ عن المخاوف
- التقدير: اعترف بصعوبة العمل
الوثائق الكاملة في /docs/core-concepts/annotation-types.