6:["$","$L15",null,{"children":[["$","div",null,{"className":"flex w-full flex-col","children":[["$","div",null,{"className":"flex items-center justify-between border-b border-neutral-400/30 px-5 py-4","children":[["$","div",null,{"className":"flex items-center gap-x-5","children":[["$","h1",null,{"className":"text-xl font-bold text-neutral-800","children":["$","a",null,{"href":"https://notesum.ai/","target":"_blank","rel":"noopener noreferrer","children":"notesum.ai"}]}],["$","span",null,{"className":"h-5 w-[1px] bg-neutral-400/30"}],["$","$L16",null,{"paper":{"id":"2411.05141v1","title":"Audiobox TTA-RAG: Improving Zero-Shot and Few-Shot Text-To-Audio with\n Retrieval-Augmented Generation","category":"eess.AS","link":"http://arxiv.org/abs/2411.05141v1","categories":["eess.AS","cs.SD"],"creation_date":"2024-11-07 19:50:28+00:00","author":["Mu Yang","Bowen Shi","Matthew Le","Wei-Ning Hsu","Andros Tjandra"],"summary":"$17","figure":"","table":"\n\n\n\n\n\n\n

Evaluation

Dataset

","short_name":"Audiobox TTA-RAG","author_affiliation":{"Mu Yang":"Center for Robust Speech Systems (CRSS), University of Texas at Dallas, USA","Bowen Shi":"Meta AI, USA","Matthew Le":"Meta AI, USA","Wei-Ning Hsu":"Meta AI, USA","Andros Tjandra":"Meta AI, USA"},"journal_ref":""}}],["$","span",null,{"className":"h-5 w-[1px] bg-neutral-400/30"}],["$","span",null,{"className":"text-sm text-neutral-500","children":["Published at"," ","November 7"]}]]}],["$","div",null,{"children":["$","$L18",null,{}]}]]}],["$","div",null,{"className":"mt-2 flex-1 overflow-y-auto px-2 pb-6 scrollbar-thin sm:px-4 md:px-[10%]","children":[[["$","div",null,{"ref":"$undefined","className":"rounded-lg bg-card text-card-foreground border-0 shadow-none","children":[["$","div",null,{"ref":"$undefined","className":"flex flex-col space-y-1.5 p-6 pl-4","children":["$","h3",null,{"ref":"$undefined","className":"text-2xl font-semibold leading-none tracking-tight","children":"Audiobox TTA-RAG: Improving Zero-Shot and Few-Shot Text-To-Audio with\n Retrieval-Augmented Generation"}]}],["$","div",null,{"ref":"$undefined","className":"p-6 pt-0 pl-4","children":[["$","div",null,{"className":"m-0 flex flex-row items-center gap-2 pl-1","children":[["$","span",null,{"children":"$undefined"}],["$","span",null,{"children":""}]]}],["$","div",null,{"className":"flex flex-col gap-2 md:flex-row md:items-center","children":[["$","div",null,{"className":"flex flex-wrap gap-2","children":[["$","div","0",{"ref":"$undefined","className":"grid place-items-center border px-2.5 text-xs transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 shadow w-fit rounded-md border-[#1abc5b1a] bg-[#1ABC5B05] font-normal text-[#0A883E] hover:bg-[#1ABC5B05]","children":"eess.AS"}],["$","div","1",{"ref":"$undefined","className":"grid place-items-center border px-2.5 text-xs transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 shadow w-fit rounded-md border-[#1abc5b1a] bg-[#1ABC5B05] font-normal text-[#0A883E] hover:bg-[#1ABC5B05]","children":"cs.SD"}]]}],["$","p",null,{"className":"pt-2 md:pl-2 md:pt-0","children":["Released Date:"," ","November 7, 2024"]}]]}]]}],["$","div",null,{"ref":"$undefined","className":"flex items-center p-6 pt-0 pl-4","children":["$","div",null,{"className":"w-full","children":[["$","p",null,{"className":"mb-4 flex","children":[["$","span",null,{"className":"min-w-[70px]","children":"Authors: "}],["$","span",null,{"children":[["$","span","0",{"children":[["$","span",null,{"className":"mr-[2px]","children":["Mu Yang",["$","sup",null,{"children":"1"}]]}],", "]}],["$","span","1",{"children":[["$","span",null,{"className":"mr-[2px]","children":["Bowen Shi",["$","sup",null,{"children":"2"}]]}],", "]}],["$","span","2",{"children":[["$","span",null,{"className":"mr-[2px]","children":["Matthew Le",["$","sup",null,{"children":"2"}]]}],", "]}],["$","span","3",{"children":[["$","span",null,{"className":"mr-[2px]","children":["Wei-Ning Hsu",["$","sup",null,{"children":"2"}]]}],", "]}],["$","span","4",{"children":[["$","span",null,{"className":"mr-[2px]","children":["Andros Tjandra",["$","sup",null,{"children":"2"}]]}],false]}]]}]]}],["$","p",null,{"className":"mb-4 flex","children":[["$","span",null,{"className":"min-w-[70px]","children":"Aff.: "}],["$","span",null,{"children":[["$","span","0",{"children":[["$","sup",null,{"children":1}],"Center for Robust Speech Systems (CRSS), University of Texas at Dallas, USA","; "]}],["$","span","1",{"children":[["$","sup",null,{"children":2}],"Meta AI, USA",false]}]]}]]}],["$","p",null,{"className":"flex","children":[["$","span",null,{"className":"min-w-[70px]","children":"Arxiv: "}],["$","a",null,{"className":"border-b border-dashed text-[#0083FA]","href":"http://arxiv.org/abs/2411.05141v1","target":"_blank","rel":"noopener noreferrer","children":"http://arxiv.org/abs/2411.05141v1"}]]}]]}]}]]}],["$","$L19",null,{"className":"border-gray mb-4 w-[100%] border-t border-solid text-black"}],""],["$","$L1a",null,{"contentEditableClassName":"prose max-w-none prose-table:w-max","markdown":"$1b"}],["$","$L1c",null,{"children":[["$","$L1d",null,{"slotName":"trigger","children":["$","div",null,{"className":"max-w-full overflow-x-auto font-[system-ui] font-light scrollbar-thin","children":["$","div",null,{"className":"h-fit cursor-pointer rounded-[5px] p-2 text-center","dangerouslySetInnerHTML":{"__html":"\n\n\n\n\n\n\n

Evaluation

Dataset

"}}]}]}],["$","$L1d",null,{"slotName":"content","className":"flex h-[90vh] w-[90vw] items-center justify-center","children":["$","div",null,{"className":"p-4 font-[system-ui] font-light","children":["$","div",null,{"className":"min-w-fit rounded-[5px] border border-solid p-2","dangerouslySetInnerHTML":{"__html":"\n\n\n\n\n\n\n

Evaluation

Dataset

"}}]}]}]]}]]}]]}],["$","$L1e",null,{"pageName":"share"}]]}]

notesum.ai

Audiobox TTA-RAG: Improving Zero-Shot and Few-Shot Text-To-Audio with Retrieval-Augmented Generation