@article{dolatabadi2023using, title = {Using Social Media to Help Understand Long COVID Patient-Reported Health Outcomes: A Natural Language Processing Approach}, author = {Dolatabadi, Elham and Moyano, Diana and Bales, Michael and Spasojevic, Sofija and Bhambhoria, Rohan and Bhatti, Junaid and Debnath, Shyamolima and Hoell, Nicholas and Li, Xin and Leng, Celine and others}, journal = {Journal of Medical Internet Research}, pages = {2022--12}, file = {dolatabadi2023using.pdf}, doi = {10.2196/55010}, year = {2023} }
Background While scientific knowledge of post–COVID-19 condition (PCC) is growing, there remains significant uncertainty in the definition of the disease, its expected clinical course, and its impact on daily functioning. Social media platforms can generate valuable insights into patient-reported health outcomes as the content is produced at high resolution by patients and caregivers, representing experiences that may be unavailable to most clinicians. Objective In this study, we aimed to determine the validity and effectiveness of advanced natural language processing approaches built to derive insight into PCC-related patient-reported health outcomes from social media platforms Twitter and Reddit. We extracted PCC-related terms, including symptoms and conditions, and measured their occurrence frequency. We compared the outputs with human annotations and clinical outcomes and tracked symptom and condition term occurrences over time and locations to explore the pipeline’s potential as a surveillance tool. Methods We used bidirectional encoder representations from transformers (BERT) models to extract and normalize PCC symptom and condition terms from English posts on Twitter and Reddit. We compared 2 named entity recognition models and implemented a 2-step normalization task to map extracted terms to unique concepts in standardized terminology. The normalization steps were done using a semantic search approach with BERT biencoders. We evaluated the effectiveness of BERT models in extracting the terms using a human-annotated corpus and a proximity-based score. We also compared the validity and reliability of the extracted and normalized terms to a web-based survey with more than 3000 participants from several countries. Results UmlsBERT-Clinical had the highest accuracy in predicting entities closest to those extracted by human annotators. Based on our findings, the top 3 most commonly occurring groups of PCC symptom and condition terms were systemic (such as fatigue), neuropsychiatric (such as anxiety and brain fog), and respiratory (such as shortness of breath). In addition, we also found novel symptom and condition terms that had not been categorized in previous studies, such as infection and pain. Regarding the co-occurring symptoms, the pair of fatigue and headaches was among the most co-occurring term pairs across both platforms. Based on the temporal analysis, the neuropsychiatric terms were the most prevalent, followed by the systemic category, on both social media platforms. Our spatial analysis concluded that 42% (10,938/26,247) of the analyzed terms included location information, with the majority coming from the United States, United Kingdom, and Canada. Conclusions The outcome of our social media–derived pipeline is comparable with the results of peer-reviewed articles relevant to PCC symptoms. Overall, this study provides unique insights into patient-reported health outcomes of PCC and valuable information about the patient’s journey that can help health care providers anticipate future needs.
@article{dahan2022analytics, title = {Analytics and EU Courts: The Case of Trademark Disputes}, author = {Dahan, Samuel and Bhambhoria, Rohan and Townsend, Simon and Zhu, Xiaodan}, booktitle = {The Changing European Union: A Critical View on the Role of Law and the Courts}, file = {dahan2022analytics.pdf}, doi = {10.2139/ssrn.3786069}, year = {2022} }
@article{bhambhoria2022towards, title = {Towards providing clinical insights on long covid from twitter data}, author = {Bhambhoria, Rohan and Saab, Jad and Uppal, Sara and Li, Xin and Yakimovich, Artur and Bhatti, Junaid and Valdamudi, Nirma Khatri and Moyano, Diana and Bales, Michael and Dolatabadi, Elham and others}, booktitle = {Multimodal AI in healthcare: A paradigm shift in health intelligence}, pages = {267--278}, year = {2022}, doi = {10.1007/978-3-031-14771-5_19}, file = {bhambhoria2022towards.pdf}, publisher = {Springer International Publishing Cham} }
From the outset of the COVID-19 pandemic, social media has provided a platform for sharing and discussing experiences in real time. This rich source of information may also prove useful to researchers for uncovering evolving insights into post-acute sequelae of SARS-CoV-2 (PACS), commonly referred to as Long COVID. In order to leverage social media data, we propose using entity-extraction methods for providing clinical insights prior to defining subsequent downstream tasks. In this work, we address the gap between state-of-the-art entity recognition models and the extraction of clinically relevant entities which may be useful to provide explanations for gaining relevant insights from Twitter data. We then propose an approach to bridge the gap by utilizing existing configurable tools, and datasets to enhance the capabilities of these models. Code for this work is available at: https://github.com/VectorInstitute/ProjectLongCovid-NER.
@unpublished{bhambhoria2024evaluating, title = {Evaluating AI for Law: Bridging the Gap with Open-Source Solutions}, author = {Bhambhoria, Rohan and Dahan, Samuel and Li, Jonathan and Zhu, Xiaodan}, doi = {2404.12349}, file = {bhambhoria2024evaluating.pdf}, year = {2024} }
This study evaluates the performance of general-purpose AI, like ChatGPT, in legal question-answering tasks, highlighting significant risks to legal professionals and clients. It suggests leveraging foundational models enhanced by domain-specific knowledge to overcome these issues. The paper advocates for creating open-source legal AI systems to improve accuracy, transparency, and narrative diversity, addressing general AI’s shortcomings in legal contexts.
@inproceedings{luo2023prototype, title = {Prototype-Based Interpretability for Legal Citation Prediction}, author = {Luo, Chu Fei and Bhambhoria, Rohan and Dahan, Samuel and Zhu, Xiaodan}, journal = {ACL 2023}, file = {luo2023prototype.pdf}, doi = {10.18653/v1/2023.findings-acl.301}, year = {2023} }
Deep learning has made significant progress in the past decade, and demonstrates potential to solve problems with extensive social impact. In high-stakes decision making areas such as law, experts often require interpretability for automatic systems to be utilized in practical settings. In this work, we attempt to address these requirements applied to the important problem of legal citation prediction (LCP). We design the task with parallels to the thought-process of lawyers, i.e., with reference to both precedents and legislative provisions. After initial experimental results, we refine the target citation predictions with the feedback of legal experts. Additionally, we introduce a prototype architecture to add interpretability, achieving strong performance while adhering to decision parameters used by lawyers. Our study builds on and leverages the state-of-the-art language processing models for law, while addressing vital considerations for high-stakes tasks with practical societal impact.
@inproceedings{li2023prefix, title = {Prefix Propagation: Parameter-Efficient Tuning for Long Sequences}, author = {Li, Jonathan and Aitken, Will and Bhambhoria, Rohan and Zhu, Xiaodan}, booktitle = {ACL 2023}, file = {li2023prefix.pdf}, doi = {10.18653/v1/2023.acl-short.120}, year = {2023} }
Parameter-efficient tuning aims to mitigate the large memory requirements of adapting pretrained language models for downstream tasks. For example, one popular method, prefix-tuning, prepends trainable tokens to sequences while freezing the rest of the model’s parameters. Although such models attain comparable performance with fine-tuning when applied to sequences with short to moderate lengths, we show their inferior performance when modelling long sequences. To bridge this gap, we propose prefix-propagation, a simple but effective approach that conditions prefixes on previous hidden states. We empirically demonstrate that prefix-propagation outperforms prefix-tuning across long-document tasks, while using 50% fewer parameters. To further investigate the proposed architecture, we also show its advantage in calibration, and perform additional study on its relationship with kernel attention. To the best of our knowledge, this work is the first to focus on parameter-efficient learning for long-sequence language tasks.
@inproceedings{bhambhoria2023simple, title = {A Simple and Effective Framework for Strict Zero-Shot Hierarchical Classification}, author = {Bhambhoria, Rohan and Chen, Lei and Zhu, Xiaodan}, booktitle = {ACL 2023}, doi = {10.18653/v1/2023.acl-short.152}, file = {bhambhoria2023simple.pdf}, year = {2023} }
In recent years, large language models (LLMs) have achieved strong performance on benchmark tasks, especially in zero or few-shot settings. However, these benchmarks often do not adequately address the challenges posed in the real-world, such as that of hierarchical classification. In order to address this challenge, we propose refactoring conventional tasks on hierarchical datasets into a more indicative long-tail prediction task. We observe LLMs are more prone to failure in these cases. To address these limitations, we propose the use of entailment-contradiction prediction in conjunction with LLMs, which allows for strong performance in a strict zero-shot setting. Importantly, our method does not require any parameter updates, a resource-intensive process and achieves strong performance across multiple datasets.
@inproceedings{luo2023legally, title = {Legally Enforceable Hate Speech Detection for Public Forums}, author = {Luo, Chu-Fei and Bhambhoria, Rohan and Zhu, Xiaodan and Dahan, Samuel}, booktitle = {Findings of EMNLP}, doi = {10.18653/v1/2023.findings-emnlp.730}, file = {luo2023legally.pdf}, year = {2023} }
Hate speech causes widespread and deep-seated societal issues. Proper enforcement of hate speech laws is key for protecting groups of people against harmful and discriminatory language. However, determining what constitutes hate speech is a complex task that is highly open to subjective interpretations. Existing works do not align their systems with enforceable definitions of hate speech, which can make their outputs inconsistent with the goals of regulators. This research introduces a new perspective and task for enforceable hate speech detection centred around legal definitions, and a dataset annotated on violations of eleven possible definitions by legal experts. Given the challenge of identifying clear, legally enforceable instances of hate speech, we augment the dataset with expert-generated samples and an automatically mined challenge set. We experiment with grounding the model decision in these definitions using zero-shot and few-shot prompting. We then report results on several large language models (LLMs). With this task definition, automatic hate speech detection can be more closely aligned to enforceable laws, and hence assist in more rigorous enforcement of legal protections against harmful speech in public forums.
@inproceedings{li2022parameter, title = {Parameter-Efficient Legal Domain Adaptation}, author = {Li, Jonathan and Bhambhoria, Rohan and Zhu, Xiaodan}, booktitle = {Proceedings of the Natural Legal Language Processing Workshop 2022}, year = {2022}, doi = {10.18653/v1/2022.nllp-1.10}, file = {li2022parameter.pdf} }
Seeking legal advice is often expensive. Recent advancements in machine learning for solving complex problems can be leveraged to help make legal services more accessible to the public. However, real-life applications encounter significant challenges. State-of-the-art language models are growing increasingly large, making parameter-efficient learning increasingly important. Unfortunately, parameter-efficient methods perform poorly with small amounts of data, which are common in the legal domain (where data labelling costs are high). To address these challenges, we propose parameter-efficient legal domain adaptation, which uses vast unsupervised legal data from public legal forums to perform legal pre-training. This method exceeds or matches the fewshot performance of existing models such as LEGAL-BERT on various legal tasks while tuning only approximately 0.1% of model parameters. Additionally, we show that our method can achieve calibration comparable to existing methods across several tasks. To the best of our knowledge, this work is among the first to explore parameter-efficient methods of tuning language models in the legal domain.
@inproceedings{bhambhoria2022interpretable, title = {Interpretable Low-Resource Legal Decision Making}, author = {Bhambhoria, Rohan and Liu, Hui and Dahan, Samuel and Zhu, Xiaodan}, booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, file = {bhambhoria2022interpretable.pdf}, doi = {10.1609/aaai.v36i11.21438}, year = {2022} }
Over the past several years, legal applications of deep learning have been on the rise. However, as with other high-stakes decision making areas, the requirement for interpretability is of crucial importance. Current models utilized by legal practitioners are more of the conventional machine learning type, wherein they are inherently interpretable, yet unable to harness the performance capabilities of data-driven deep learning models. In this work, we utilize deep learning models in the area of trademark law to shed light on the issue of likelihood of confusion between trademarks. Specifically, we introduce a model-agnostic interpretable intermediate layer, a technique which proves to be effective for legal documents. Furthermore, we utilize weakly supervised learning by means of a curriculum learning strategy, effectively demonstrating the improved performance of a deep learning model. This is in contrast to the conventional models which are only able to utilize the limited number of expensive manually-annotated samples by legal experts. Although the methods presented in this work tackles the task of risk of confusion for trademarks, it is straightforward to extend them to other fields of law, or more generally, to other similar high-stakes application scenarios.
@inproceedings{luo2022evaluating, title = {Evaluating Explanation Correctness in Legal Decision Making}, author = {Luo, Chu Fei and Bhambhoria, Rohan and Dahan, Samuel and Zhu, Xiaodan}, booktitle = {Canadian Conference on Artificial Intelligence}, file = {luo2022evaluating.pdf}, doi = {10.21428/594757db.8718dc8b}, year = {2022} }
As machine learning models are being extensively deployed across many applications, concerns are rising with regard to their trustability. Explainable models have become an important topic of interest for high-stakes decision making, but their evaluation in the legal domain still remains seriously understudied; existing work does not have thorough feedback from subject matter experts to inform their evaluation. Our work here aims to quantify the faithfulness and plausibility of explainable AI methods over several legal tasks, using computational evaluation and user studies directly involving lawyers. The computational evaluation is for measuring faithfulness, how close the explanation is to the model’s true reasoning, while the user studies are measuring plausibility, how reasonable is the explanation to a subject matter expert. The general goal of this evaluation is to find a more accurate indication of whether or not machine learning methods are able to adequately satisfy legal requirements
@inproceedings{bhambhoria2021investigating, title = {Investigating the State-of-the-Art Performance and Explainability of Legal Judgment Prediction}, author = {Bhambhoria, Rohan and Dahan, Samuel and Zhu, Xiaodan}, booktitle = {Canadian Conference on Artificial Intelligence}, doi = {10.21428/594757db.a66d81b6}, file = {bhambhoria2021investigating.pdf}, year = {2021} }
In the past decade deep learning models have achieved impressive performance on a wide range of tasks. However, they still face challenges in many high-stakes problems. In this paper we study Legal Judgment Prediction (LJP), which is an important high-stakes task utilizing fact descriptions obtained from court cases to make final judgements. We investigate the state-of-the-art of the LJP task by leveraging the most recent deep learning models, longformer, and demonstrate that we obtain the state-of-the-art performance, even with a limited amount of training data, benefiting from the advantage of pretraining and the long-sequence modeling capability of longformer. However, our analyses suggest that the improvement is due to the model’s fitting to spurious correlations, in which the model makes correct decisions based on information irrelevant to the task itself. We advocate that caution should be seriously exercised when explaining the obtained results. The second challenge in many high-stakes problems is interpretability required for models. The final predictions made by deep learning models are useful only if the evidences that support the models’ decisions are consistent with those used by subject-matter experts. We demonstrate that by using post-hoc interpretation, the conventional method XGBoost is actually capable of providing explainable results with a performance comparable to the longformer model, while not being subject to the spurious correlation issue. We hope our work contributes to the line of research on understanding the advantages and limitations of deep learning for high-stakes problems.
@inproceedings{bhambhoria2020smart, title = {A smart system to generate and validate question answer pairs for COVID-19 literature}, author = {Bhambhoria, Rohan and Feng, Luna and Sepehr, Dawn and Chen, John and Cowling, Conner and Kocak, Sedef and Dolatabadi, Elham}, doi = {10.18653/v1/2020.sdp-1.4}, file = {bhambhoria2020smart.pdf}, booktitle = {Proceedings of the First Workshop on Scholarly Document Processing}, pages = {20--30}, year = {2020} }
Automatically generating question answer (QA) pairs from the rapidly growing coronavirus-related literature is of great value to the medical community. Creating high quality QA pairs would allow researchers to build models to address scientific queries for answers which are not readily available in support of the ongoing fight against the pandemic. QA pair generation is, however, a very tedious and time consuming task requiring domain expertise for annotation and evaluation. In this paper we present our contribution in addressing some of the challenges of building a QA system without gold data. We first present a method to create QA pairs from a large semi-structured dataset through the use of transformer and rule-based models. Next, we propose a means of engaging subject matter experts (SMEs) for annotating the QA pairs through the usage of a web application. Finally, we demonstrate some experiments showcasing the effectiveness of leveraging active learning in designing a high performing model with a substantially lower annotation effort from the domain experts.
@inproceedings{lam2020gap, title = {The Gap between Deep Learning and Law: Predicting Employment Notice}, author = {Lam, Jason and Bhambhoria, Rohan and Liang, David and Zhu, Xiaodan and Dahan, Samuel}, booktitle = {ICML Workshop on Law \& Machine Learning (LML)}, file = {lam2020gap.pdf}, year = {2020} }
This study aims to determine whether Natural Language Processing with deep learning models can shed new light on the Canadian calculation system for employment notice. In particular, we investigate whether deep learning can enhance the predictability of notice period, that is, whether it is possible to predict notice period with high accuracy. A major challenge with the classification of reasonable notice is the inconsistency of the case law. As argued by the Ontario Court of Appeal, the process of determining reasonable notice is "more art than science". In a previous study, we assessed the predictability of reasonable notice periods by applying statistical machine learning to a hand-annotated dataset of 850 cases. Building on this past study, this paper utilizes state-of-the-art deep learning models on a free-text summary of cases. We further experiment with a variety of domain adaptations of state-of-the-art pretrained BERT-esque models. Our results appear to show that the domain adaptations of BERT-esque models negatively affected performance. Our best performing model was an out-of-the-box RoBERTa base model which achieved a 69% accuracy using a +/-2 prediction window
@misc{bhambhoria2024natural, title = {Natural Language Processing for Justifiable Legal Practitioner Assistance}, author = {Bhambhoria, Rohan}, year = {2024}, file = {bhambhoria2024natural.pdf}, doi = {https://hdl.handle.net/1974/32803}, school = {Queen's University (PhD Thesis)} }
The significant advancement of artificial intelligence (AI) has had a profound impact on many areas of applications. With Natural Language Processing (NLP) being a key player in this technological progress, exploring NLP within vital areas represents a pathway for immediate practical benefits while also challenging the existing AI solutions and helping to develop better models. This dissertation is centered around a comprehensive examination of NLP in the legal domain, a realm where the interpretation of language is of critical importance. In this dissertation, we investigate three main aspects of challenges: (i) The interpretability of NLP models, which is essential for legal practitioners to derive meaningful insights. To solve this challenge, we utilize post-hoc and ante-hoc methods of interpretability and show that the latter can be utilized to develop versatile interpretable solutions for several tasks in the legal domain. Specifically, we introduce a system that contains model-agnostic interpretable intermediate layers, a technique that proves to be effective for legal documents. (ii) The evaluation of existing datasets while identifying the disparity between the controlled experimental settings in academic research and the real-world legal problems. For this challenge, we propose a systematic annotation protocol that involves the expertise of legal professionals in dataset curation. We utilize weakly supervised learning by means of a curriculum learning strategy, effectively demonstrating the improved performance of a deep learning model. Then, we design a task with parallels to the thought-process of lawyers, i.e., with reference to both precedents and legislative provisions. Additionally, we introduce a prototype architecture to add interpretability, achieving strong performance while adhering to decision parameters used by lawyers. (iii) Lastly, the task of adapting general-purpose NLP models for specific legal tasks needs to be examined. We refactor conventional tasks on hierarchical datasets into a more indicative long-tail prediction task. We observe LLMs are prone to failure in these cases. To address these limitations, we propose the use of entailment-contradiction prediction in conjunction with LLMs, which allows for strong performance in a strict zero-shot setting. Our method does not require any parameter updates, a resource-intensive process and achieves strong performance across multiple datasets.
@misc{dahan2020ai, title = {AI-powered Trademark Dispute Resolution-Expert Opinion Commissioned by the European Union Intellectual Property Office (EUIPO)}, author = {Dahan, Samuel and Bhambhoria, Rohan and Zhu, Xiaodan and Townsend, Simon}, file = {dahan2020ai.pdf}, year = {2020} }
Rohan Bhambhoria
Researcher
Queen's University
Toronto
Ontario, Canada
© 2024 Rohan Bhambhoria