@conference{EURECOM+8667,
  author = {Benechehab, Abdelhakim and  El Hili, Youssef Attia and  Thomas, Albert and  Paolo, Giuseppe and  Filippone, Maurizio},
  title = {Embedding distance as a reward signal can replace verifiers for LLM reasoning},
  booktitle = {ICLR 2026, 14th International Conference on Learning Representations, Workshop LLM Reasoning, 23-27 April 2026, Rio de Janeiro, Brazil},
  year = {2026},
  editor = {EURECOM},
  address = {Rio de Janeiro},
  note = {© EURECOM. Personal use of this material is permitted. The definitive version of this paper was published in ICLR 2026, 14th International Conference on Learning Representations, Workshop LLM Reasoning, 23-27 April 2026, Rio de Janeiro, Brazil and is available at :},
}