This example illustrates how to evaluate quality of audio synthesis by gathering answers to some quality questions about the audios with Crowdom.
In our example, we ask workers to answer three quality questions about each given audio.
In this example we will use conditional view for objects - so that for each question only corresponding answers will be displayed as an option.
Moreover, this workflow shows how to use Markdown
view for Text
objects.
%pip install crowdom
from datetime import timedelta
import logging.config
import markdown2
import yaml
import os
from typing import Dict
from crowdom import base, datasource, client, objects, pricing, control, classification, classification_loop, worker
with open('logging.yaml') as f:
logging.config.dictConfig(yaml.full_load(f.read()))
toloka_client = client.create_toloka_client(token=os.getenv('TOLOKA_TOKEN') or input('Enter your token: '))
Define your questions and corresponding answers:
question_label
contains the question you want to askEnum
members:class Noise(objects.Question):
NOISE = 'noise'
CORRUPTION = 'corruption'
BOTH = 'both'
NONE = 'none'
@classmethod
def question_label(cls) -> Dict[str, str]:
return {
'EN': '**Are there any noises or singal corruption?**',
'RU': '**Встречаются ли лишние шумы или искажения сигнала?**',
}
@classmethod
def labels(cls) -> Dict['Noise', Dict[str, str]]:
return {
cls.NOISE: {'EN': 'There is noise', 'RU': 'На аудио есть лишний шум'},
cls.CORRUPTION: {'EN': 'There is noise corruption', 'RU': 'Фрагменты сигнала искажены'},
cls.BOTH: {'EN': 'Both problems present', 'RU': 'Есть и 1, и 2 проблема'},
cls.NONE: {'EN': 'These problems are not present', 'RU': 'Подобных проблем нет'},
}
class Acoustics(objects.Question):
INSIDE = 'inside'
OUTDOORS = 'outdoors'
STUDIO = 'studio'
@classmethod
def question_label(cls) -> Dict[str, str]:
return {'RU': '**Где записывалось аудио?**', 'EN': '**Where was this audio recorded?**'}
@classmethod
def labels(cls) -> Dict['Acoustics', Dict[str, str]]:
return {
cls.INSIDE: {'EN': 'In living or industrial space', 'RU': 'В жилом / промышленном помещении'},
cls.OUTDOORS: {'EN': 'Outdoors', 'RU': 'На улице'},
cls.STUDIO: {'EN': 'In recording studio', 'RU': 'В профессиональной студии'},
}
class Intonation(objects.Question):
YES = 'yes'
NO = 'no'
@classmethod
def question_label(cls) -> Dict[str, str]:
return {
'EN': '**Does intonation convey correct meaning of the text?**',
'RU': '**Верно ли интонация на аудио передаёт смысл написанного текста?**',
}
@classmethod
def labels(cls) -> Dict['Intonation', Dict[str, str]]:
return {
cls.YES: {
'EN': 'Intonation conveys text meaning correctly', 'RU': 'Смысл текста передан интонацией верно',
},
cls.NO: {
'EN': 'Intonation does not convey text meaning', 'RU': 'Смысл текста передан интонацией неверно',
},
}
Create combined question and answer classes, which will contain all possible options for your task:
Question, Answer, question_answers_list = objects.get_combined_classes([Noise, Acoustics, Intonation])
question_answers = base.create_available_labels_if('question', question_answers_list)
In this example, there are a couple of customization options specified:
LabelsDisplayType.MONO
changes your question view from a radio-button list to text versionText
TextFormat.MARKDOWN
enables markdown rendering for question text and titleavailable_labels
For some questions (e.g. Intonation
), the Text
, that is being spoken on the audio, matters, so it is also included in the inputs
:
function = base.ClassificationFunction(
inputs=(
base.ClassMeta(
type=Question,
name='question',
input_display_type=base.LabelsDisplayType.MONO,
title=base.Title(text={'EN': 'Question', 'RU': 'Вопрос'}, format=base.TextFormat.MARKDOWN),
text_format=base.TextFormat.MARKDOWN),
base.ObjectMeta(
type=objects.Audio,
name='audio'),
objects.TextMeta(
type=objects.Text,
name='text',
format=base.TextFormat.MARKDOWN),
),
cls=base.ClassMeta(
type=Answer,
name='answer',
available_labels=question_answers,
title=base.Title(text={'EN': 'Answer', 'RU': 'Ответ'}, format=base.TextFormat.MARKDOWN)),
)
audio_url = 'https://storage.yandexcloud.net/crowdom-public/examples/audio_questions/data/2ff85d99-f9fa-4297-8d58-0935de85515d.wav'
sample_task = (Question.NOISE, objects.Audio(url=audio_url), objects.Text(text=''))
client.TaskPreview(sample_task, task_function=function, lang='RU').display_link()
We need meaningful Text
for the Intonation
question:
sample_task = (Question.INTONATION, objects.Audio(url=audio_url), objects.Text(text='Это модерн? Модерн!'))
client.TaskPreview(sample_task, task_function=function, lang='RU').display_link()
lang = 'RU'
instruction = {}
for worker_lang in ['EN', 'RU']:
with open(f'instruction_{worker_lang}.md') as f:
instruction[worker_lang] = markdown2.markdown(f.read())
task_spec = client.TaskSpec(
id='qq',
function=function,
name={
'EN': 'Audio Questions', 'RU': 'Вопросы про аудио',
},
description={
'EN': 'Listen to a speech on audio and answer the question asked',
'RU': 'Прослушайте речь на аудио и ответьте на заданный вопрос',
},
instruction=instruction)
task_spec_ru = client.PreparedTaskSpec(task_spec, lang)
client.define_task(task_spec_ru, toloka_client)
task_duration_hint = timedelta(seconds=10) # audios are about 1-5 seconds each
input_objects = datasource.read_tasks('tasks.json', task_spec_ru.task_mapping)
control_objects = datasource.read_tasks('control_tasks.json', task_spec_ru.task_mapping, has_solutions=True)
As the task is non-standard, and total number of possible answers for all of the questions can be too high, it is better to specify parameters with code:
pricing_config = pricing.PoolPricingConfig(assignment_price=.02, real_tasks_count=20, control_tasks_count=4)
correct_control_task_ratio_for_acceptance = .7
control_params = control.Control(
rules=control.RuleBuilder().add_static_reward(
threshold=correct_control_task_ratio_for_acceptance).add_speed_control(
ratio_rand=.1,
ratio_poor=.3,
).build())
params = client.Params(
pricing_config=pricing_config,
task_duration_hint=task_duration_hint,
aggregation_algorithm=classification.AggregationAlgorithm.MAJORITY_VOTE,
overlap=classification_loop.StaticOverlap(overlap=5),
control=control_params,
worker_filter=worker.WorkerFilter(
filters=[
worker.WorkerFilter.Params(
langs={worker.LanguageRequirement(lang=lang)},
regions=worker.lang_to_default_regions.get(lang, {}),
age_range=(18, None),
),
],
training_score=80,
),
)
artifacts = client.launch(
task_spec_ru,
params,
input_objects,
control_objects,
toloka_client,
)
results = artifacts.results
results.predict()
question | audio | text | result | confidence | overlap | |
---|---|---|---|---|---|---|
0 | noise | https://storage.yandexcloud.net/crowdom-publ... | *Вам доступны и рассрочка, и кредит, и наличны... | noise__none | 1.0 | 1 |
1 | noise | https://storage.yandexcloud.net/crowdom-publ... | *И светло и зелено было вокруг.* | noise__none | 1.0 | 1 |
2 | noise | https://storage.yandexcloud.net/crowdom-publ... | *Накормленные досыта котики играли с трудом.* | noise__none | 1.0 | 1 |
3 | noise | https://storage.yandexcloud.net/crowdom-publ... | *Накормленные досыта, котики играли с трудом.* | noise__none | 1.0 | 1 |
4 | noise | https://storage.yandexcloud.net/crowdom-publ... | *В доме - мыши.* | noise__none | 1.0 | 1 |