Name
copenlu's repositories
data
llmsearch
prompts
results
samples
scripts
traces
vizualizations
generate_figs.ipynb
path_analysis.ipynb
.gitignore
README.md
code_exec_accuracy_by_dataset.pdf
code_exec_by_dataset-cropped.pdf
code_exec_by_dataset.pdf
code_exec_percentage_by_dataset.pdf
effect_of_scale.pdf
eval_code.sh
icl_accuracy_vs_rouge_AQUA.pdf
icl_accuracy_vs_rouge_ASDiv.pdf
icl_accuracy_vs_rouge_All Datasets.pdf
icl_accuracy_vs_rouge_CLUTRR.pdf
icl_accuracy_vs_rouge_Date.pdf
icl_accuracy_vs_rouge_GSM8k.pdf
icl_accuracy_vs_rouge_MultiArith.pdf
icl_accuracy_vs_rouge_SQA.pdf
icl_accuracy_vs_rouge_SVAMP.pdf
icl_accuracy_vs_rouge_Sports.pdf
model_accuracy_by_dataset.pdf
model_accuracy_vs_rouge_AQUA.pdf
model_accuracy_vs_rouge_ASDiv.pdf
model_accuracy_vs_rouge_All Datasets.pdf
model_accuracy_vs_rouge_CLUTRR.pdf
model_accuracy_vs_rouge_Date.pdf
model_accuracy_vs_rouge_GSM8k.pdf
model_accuracy_vs_rouge_MultiArith.pdf
model_accuracy_vs_rouge_SQA.pdf
model_accuracy_vs_rouge_SVAMP.pdf
model_accuracy_vs_rouge_Sports.pdf
model_faithfulness_by_dataset.pdf
results_plot_AQUA_code_exact.pdf
results_plot_GSM8k_code_exact.pdf
results_plot_None.pdf
results_plot_SQA_code_exact.pdf
results_plot_faith_vs_accuracy.pdf
results_plot_faithulness_vs_accuracy.pdf
run_ASDiv.sh
run_MultiArith.sh
run_aqua.sh
run_aqua_plan_only.sh
run_cluttr.sh
run_date.sh
run_gsm.sh
run_gsm_plan_only.sh
run_llama3.1.sh
run_sports.sh
run_sqa_plan_only.sh
run_svamp.sh
setup.py