Skip to content

Commit

Permalink
arxiv for separability
Browse files Browse the repository at this point in the history
  • Loading branch information
swabhs committed Jul 3, 2024
1 parent 1f7c013 commit 0546b61
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 5 deletions.
2 changes: 1 addition & 1 deletion _bibliography/papers.bib
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ @misc{ghosh2024compare
author={Ghosh, Sayan and Srinivasan, Tejas and Swayamdipta, Swabha},
year={2024},
abbr={Preprint},
url={https://sghosh73.github.io/assets/pdf/sep_paper.pdf},
url={https://arxiv.org/abs/2407.01878},
selected=true,
preview={separability.jpeg},
abstract={Human evaluation of generated language through pairwise preference judgments is pervasive. However, under common scenarios, such as when generations from a model pair are very similar, or when stochastic decoding results in large variations in generations, it results in inconsistent preference ratings. We address these challenges by introducing a meta-evaluation measure, separability, which estimates how suitable a test instance is for pairwise preference evaluation. For a candidate test instance, separability samples multiple generations from a pair of models, and measures how distinguishable the two sets of generations are. Our experiments show that instances with high separability values yield more consistent preference ratings from both human- and auto-raters. Further, the distribution of separability allows insights into which test benchmarks are more valuable for comparing models. Finally, we incorporate separability into ELO ratings, accounting for how suitable each test instance might be for reliably ranking LLMs. Overall, separability has implications for consistent, efficient and robust preference evaluation of LLMs with both human- and auto-raters.},
Expand Down
51 changes: 47 additions & 4 deletions _bibliography/papers_orcid.bib
Original file line number Diff line number Diff line change
@@ -1,36 +1,79 @@
---
---
@misc{ghosh2024compare,
title={Compare without Despair: Reliable Preference Evaluation with Generation Separability},
author={Ghosh, Sayan and Srinivasan, Tejas and Swayamdipta, Swabha},
year={2024},
url={https://arxiv.org/abs/2407.01878},
}

@misc{ranjit2024oath,
title={OATH-Frames: Characterizing Online Attitudes Towards Homelessness via LLM Assistants},
author={Jaspreet Ranjit and Brihi Joshi and Rebecca Dorn and Laura Petry and Olga Koumoundouros and Jayne Bottarini and Peichen Liu and Eric Rice and Swabha Swayamdipta},
year={2024},
url={https://dill-lab.github.io/oath-frames/},
}

@misc{gulati2024out,
title={Out-of-Distribution Detection through Soft Clustering with Non-Negative Kernel Regression},
author={Aryan Gulati and Xingjian Dong and Carlos Hurtado and Sarath Shekkizhar and Swabha Swayamdipta and Antonio Ortega},
year={2024},
url={},
}

@misc{khurana2024crowd,
title={Crowd-Calibrator: Can Annotator Disagreement Inform Calibration in Subjective Tasks?},
author={Urja Khurana and Eric Nalisnick and Antske Fokkens and Swabha Swayamdipta},
year={2024},
url={},
}

@misc{finlayson2024logits,
title={Logits of API-Protected LLMs Leak Proprietary Information},
author={Matthew Finlayson and Xiang Ren and Swabha Swayamdipta},
year={2024},
url={https://arxiv.org/abs/2403.09539},
}

@inproceedings{cui2024annotating,
title={Annotating FrameNet via Structure-Conditioned Language Generation},
author={Xinyue Cui and Swabha Swayamdipta},
url={https://arxiv.org/abs/2406.04834},
booktitle={Proceedings of ACL (to appear)},
year={2024},
}


@misc{nazari2024generative,
@inproceedings{nazari2024generative,
title={Generative Explanations for Program Synthesizers},
author={Amirmohammad Nazari and Souti Chattopadhyay and Swabha Swayamdipta and Mukund Raghothaman},
year={2024},
url={https://arxiv.org/abs/2403.03429},
booktitle={Proceedings of VL/HCC (To Appear)},
}

@inproceedings{vazquez2024proceedings,
title={Proceedings of the 1st Workshop on Uncertainty-Aware NLP (UncertaiNLP 2024)},
author={V{\'a}zquez, Ra{\'u}l and Celikkanat, Hande and Ulmer, Dennis and Tiedemann, J{\"o}rg and Swayamdipta, Swabha and Aziz, Wilker and Plank, Barbara and Baan, Joris and de Marneffe, Marie-Catherine},
booktitle={Proceedings of the 1st Workshop on Uncertainty-Aware NLP (UncertaiNLP 2024)},
url={https://aclanthology.org/2024.uncertainlp-1.0/},
year={2024},
}

@inproceedings{finlayson2023closing,
title={Closing the Curious Case of Neural Text Degeneration},
author={Matthew Finlayson and John Hewitt and Alexander Koller and Swabha Swayamdipta and Ashish Sabharwal},
year={2024},
url={https://arxiv.org/abs/2310.01693},
booktitle={Proc. of ICLR (To Appear)},
booktitle={Proc. of ICLR},
}

@inproceedings{nam2023does,
title={Does Video Summarization Require Videos? Quantifying the Effectiveness of Language in Video Summarization},
author={Yoonsoo Nam and Adam Lehavi and Daniel Yang and Digbalay Bose and Swabha Swayamdipta and Shrikanth Narayanan},
year={2024},
url={https://arxiv.org/abs/2309.09405},
booktitle={Proc. of ICASSP (To Appear)},
booktitle={Proc. of ICASSP},
}

@inproceedings{howard2023neurocomparatives,
Expand Down

0 comments on commit 0546b61

Please sign in to comment.