copenlu's
repositories
|
data
|
llmsearch
|
prompts
|
results
|
samples
|
scripts
|
traces
|
vizualizations
|
generate_figs.ipynb
|
path_analysis.ipynb
|
.gitignore
|
README.md
|
code_exec_accuracy_by_dataset.pdf
|
code_exec_by_dataset-cropped.pdf
|
code_exec_by_dataset.pdf
|
code_exec_percentage_by_dataset.pdf
|
effect_of_scale.pdf
|
eval_code.sh
|
icl_accuracy_vs_rouge_AQUA.pdf
|
icl_accuracy_vs_rouge_ASDiv.pdf
|
icl_accuracy_vs_rouge_All Datasets.pdf
|
icl_accuracy_vs_rouge_CLUTRR.pdf
|
icl_accuracy_vs_rouge_Date.pdf
|
icl_accuracy_vs_rouge_GSM8k.pdf
|
icl_accuracy_vs_rouge_MultiArith.pdf
|
icl_accuracy_vs_rouge_SQA.pdf
|
icl_accuracy_vs_rouge_SVAMP.pdf
|
icl_accuracy_vs_rouge_Sports.pdf
|
model_accuracy_by_dataset.pdf
|
model_accuracy_vs_rouge_AQUA.pdf
|
model_accuracy_vs_rouge_ASDiv.pdf
|
model_accuracy_vs_rouge_All Datasets.pdf
|
model_accuracy_vs_rouge_CLUTRR.pdf
|
model_accuracy_vs_rouge_Date.pdf
|
model_accuracy_vs_rouge_GSM8k.pdf
|
model_accuracy_vs_rouge_MultiArith.pdf
|
model_accuracy_vs_rouge_SQA.pdf
|
model_accuracy_vs_rouge_SVAMP.pdf
|
model_accuracy_vs_rouge_Sports.pdf
|
model_faithfulness_by_dataset.pdf
|
results_plot_AQUA_code_exact.pdf
|
results_plot_GSM8k_code_exact.pdf
|
results_plot_None.pdf
|
results_plot_SQA_code_exact.pdf
|
results_plot_faith_vs_accuracy.pdf
|
results_plot_faithulness_vs_accuracy.pdf
|
run_ASDiv.sh
|
run_MultiArith.sh
|
run_aqua.sh
|
run_aqua_plan_only.sh
|
run_cluttr.sh
|
run_date.sh
|
run_gsm.sh
|
run_gsm_plan_only.sh
|
run_llama3.1.sh
|
run_sports.sh
|
run_sqa_plan_only.sh
|
run_svamp.sh
|
setup.py
|