This example is a demonstration of Crowdom's data labeling workflow for annotation tasks.
In annotation tasks, unlike classification tasks, there are an "unlimited" (comparing to fixed classification label set) number of possible solutions.
Data labeling quality control measures differ in this case – unlike control tasks in classification, we ask other workers to check received solutions (annotations).
For annotation task workflow example, we chose an audio transcription task - we ask workers to write down the words they hear in the audios.
If this is your first time with Crowdom workflow structure – visit image classification workflow example.
%pip install crowdom
from datetime import timedelta
import os
import pandas as pd
import toloka.client as toloka
from crowdom import base, datasource, client, objects, pricing, params as labeling_params
import yaml
import logging.config
with open('logging.yaml') as f:
logging.config.dictConfig(yaml.full_load(f.read()))
from IPython.display import clear_output, display
token = os.getenv('TOLOKA_TOKEN') or input('Enter your token: ')
clear_output()
toloka_client = client.create_toloka_client(token=token)
toloka_client = client.create_toloka_client(token=token, environment=toloka.TolokaClient.Environment.SANDBOX)
We are dealing with annotation task, and we transcribe Audio
into Text
:
annotation_function = base.AnnotationFunction(
inputs=(objects.Audio,),
outputs=(objects.Text,)
)
example_url = 'https://tlk.s3.yandex.net/ext_dataset/noisy_speech/noisy_tested_wav/p232_299.wav'
example_audio = (objects.Audio(url=example_url),)
client.TaskPreview(example_audio, task_function=annotation_function, lang='EN').display_link()
instruction = {
'RU': 'Запишите звучащие на аудио слова, без знаков препинания и заглавных букв.',
'EN': 'Transcribe the audio, without any punctuation or capitalization.'}
task_spec = base.TaskSpec(
id='audio-transcription',
function=annotation_function,
name={'EN': 'Audio transcription', 'RU': 'Расшифровка аудио'},
description={'EN': 'Transcribe short audios', 'RU': 'Расшифровка коротких аудио'},
instruction=instruction)
Workers in their task feed will see your task for EN
language like this, depending on where they are doing the tasks:
Browser |
Mobile app |
![]() |
![]() |
Language of your data:
lang = 'EN'
Localized version of annotation_task_spec
:
task_spec_en = client.AnnotationTaskSpec(task_spec, lang)
Expected file format – JSON list, each object having keys from name
typed like type
. As is image classification example, for media types, such as Audio
, we expect URLs.
datasource.file_format(task_spec_en.task_mapping)
input_objects = datasource.read_tasks('tasks.json', task_spec_en.task_mapping)
control_objects = None
In addition to the source data, a reference labeling is expected in the file. For our task, reference labeling is correct transcription, located in text
field.
datasource.file_format(task_spec_en.task_mapping, has_solutions=True)
control_objects = datasource.read_tasks(
'control_tasks.json',
task_spec_en.task_mapping,
has_solutions=True,
)
Define task duration hint.
# audios are 3-10 seconds each, and workers need time to transcribe them
task_duration_hint = timedelta(seconds=20)
Define estimated task duration hint for experts:
task_duration_hint = timedelta(seconds=30)
from crowdom import experts, project
scenario = project.Scenario.EXPERT_LABELING_OF_TASKS
experts_task_spec = client.AnnotationTaskSpec(task_spec, lang, scenario)
if control_objects:
objects = control_objects
experts_task_spec = experts_task_spec.check
else:
objects = input_objects
avg_price_per_hour = None
avg_price_per_hour = 3.5 # USD
pricing_options = pricing.get_expert_pricing_options(
task_duration_hint, experts_task_spec.task_mapping, avg_price_per_hour)
pricing_config = pricing.choose_default_expert_option(pricing_options, avg_price_per_hour)
client.define_task(experts_task_spec, toloka_client)
raw_feedback = client.launch_experts(
experts_task_spec,
client.ExpertParams(
task_duration_hint=task_duration_hint,
pricing_config=pricing_config,
),
objects[:10],
experts.ExpertCase.TASK_VERIFICATION,
toloka_client,
interactive=True)
worker_id_to_name = {'fd060a4d57b00f9bba4421fe4c7c22f3': 'bob'} # {'< hex 32-digit id >': '< username >'}
feedback = client.ExpertLabelingResults(raw_feedback, experts_task_spec, worker_id_to_name)
feedback_df = feedback.get_results()
with pd.option_context("max_colwidth", 100):
display(feedback_df)
task_duration_hint = feedback_df['duration'].mean().to_pytimedelta() # with reference labeling
# task_duration_hint = timedelta(seconds=experts_proposed_value) # without reference labeling
task_duration_hint
During the annotation
process, as a measure of quality control, we show gathered annotations to other workers and ask them to evaluate them – we refer to this process as annotation check
. This process, however, needs its own quality control measures – so, we can create control objects
and training
for annotation check
as well as training
for main annotation
process.
control_objects, _ = feedback.get_correct_objects(client.ExpertLabelingApplication.CONTROL_TASKS)
training_objects, comments = feedback.get_correct_objects(application=client.ExpertLabelingApplication.TRAINING)
training_config = pricing.choose_default_training_option(
pricing.get_training_options(task_duration_hint, len(training_objects), training_time=timedelta(minutes=2)))
client.define_task(task_spec_en, toloka_client)
client.create_training(
task_spec_en,
training_objects,
comments,
toloka_client,
training_config)
check_training_objects, check_comments = feedback.get_correct_objects(application=client.ExpertLabelingApplication.ANNOTATION_CHECK_TRAINING)
training_config = pricing.choose_default_training_option(
pricing.get_training_options(task_duration_hint, len(training_objects), training_time=timedelta(minutes=2)))
client.define_task(task_spec_en, toloka_client)
client.create_training(
task_spec_en.check,
check_training_objects,
check_comments,
toloka_client,
training_config)
You can skip any customization in this section and use default options, which we consider suitable for a wide range of typical tasks, or tune parameters to you liking.
For general information about labeling efficiency optimization and for information about customization for classification task – view image classification example.
Annotation labeling process consists of two distinct subprocesses – annotation
and check
steps. You can interactively customize parameters for each of these steps independently.
Most of parameters for annotation
step are the same as for classification
. There's a new addition – Assignment check sample
.
With this option enabled, only a portion of tasks would be checked from each assignment - you can change this number with Max tasks to check
option. After that if enough of these tasks were done correctly, this whole assignment would be finalized
- all tasks from it would be considered checked, and no more checks would be created for them. You can change this threshold number of correctly done tasks with Accuracy threshold
option.
Assignment check sample
can reduce cost and time of labeling process, but low check coverage can't guarantee high quality for unchecked solutions.
You can specify different task_duration_hint
s for main process and check
, if they require significantly different time to complete.
params_form = labeling_params.get_annotation_interface(
task_spec=task_spec_en,
check_task_duration_hint=task_duration_hint,
annotation_task_duration_hint=task_duration_hint,
toloka_client=toloka_client)
check_params, annotation_params = params_form.get_params()
You can define your own pricing config for labeling.
However, you can only specify real_task_count
and assignment_price
for it, we cannot use control tasks directly for labeling quality control.
from crowdom import classification, classification_loop, control, evaluation, worker
pricing_config = pricing.PoolPricingConfig(assignment_price=0.05, real_tasks_count=20, control_tasks_count=0)
assert pricing_config.control_tasks_count == 0
Define quality and control params:
assignment_check_sample = evaluation.AssignmentCheckSample(
max_tasks_to_check=15,
assignment_accuracy_finalization_threshold=0.85,
)
You can specify a custom overlap, minimum number attempts for annotation should always be 1:
correct_done_task_ratio_for_acceptance = 0.5
control_params = control.Control(
rules=control.RuleBuilder().add_static_reward(
threshold=correct_done_task_ratio_for_acceptance).add_speed_control(
# if worker complete tasks in 10% of expected time, we will reject assignment assuming fraud/scripts/random clicking
# specify 0 to disable this control option
ratio_rand=.1,
# if worker complete tasks in 30% of expected time, we will block him for a while, suspecting poor performance
# specify 0 to disable this control option
ratio_poor=.3,
).build())
annotation_params = client.AnnotationParams(
task_duration_hint=task_duration_hint,
pricing_config=pricing_config,
overlap=classification_loop.DynamicOverlap(min_overlap=1, max_overlap=3, confidence=0.85),
control=control_params,
assignment_check_sample=assignment_check_sample,
worker_filter=worker.WorkerFilter(
filters=[
worker.WorkerFilter.Params(
langs={worker.LanguageRequirement(lang=lang)},
regions=worker.lang_to_default_regions.get(lang, {}),
age_range=(18, None),
),
],
training_score=None,
),
)
assert isinstance(annotation_params.overlap, classification_loop.DynamicOverlap)
client.define_task(task_spec_en, toloka_client)
assert control_objects, 'No control objects supplied'
assert isinstance(control_objects[0], tuple)
try:
task_spec_en.check.task_mapping.validate_objects(control_objects[0][0])
except:
control_objects = [(task + solution, (base.BinaryEvaluation(ok=True),)) for (task, solution) in control_objects]
artifacts = client.launch_annotation(
task_spec_en,
annotation_params,
check_params,
input_objects,
control_objects,
toloka_client)
results = artifacts.results
Ground truth (most probable option):
with pd.option_context("max_colwidth", 100):
display(results.predict())
All gathered annotations with respective confidence values:
with pd.option_context("max_colwidth", 100):
display(results.predict_proba())
Detailed information about each annotation and each check for it:
with pd.option_context('max_colwidth', 150), pd.option_context('display.max_rows', 100):
display(results.worker_labels())
Quality verification closely resembles task verification with reference labeling. However, it slightly differs in options. You can run verification on random sample of labeled objects:
import random
scenario = project.Scenario.EXPERT_LABELING_OF_SOLVED_TASKS
experts_task_spec = client.AnnotationTaskSpec(task_spec, lang, scenario)
sample_size = min(20, int(0.1 * len(input_objects)))
objects = random.sample(client.select_control_tasks(input_objects, results.raw, min_confidence=.0), sample_size)
client.define_task(experts_task_spec, toloka_client)
raw_feedback = client.launch_experts(
experts_task_spec,
client.ExpertParams(
task_duration_hint=task_duration_hint,
pricing_config=pricing_config,
),
objects,
experts.ExpertCase.LABELING_QUALITY_VERIFICATION,
toloka_client,
interactive=True)
test_results = client.ExpertLabelingResults(raw_feedback, experts_task_spec)
test_results.get_accuracy()