<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Hum Factors</journal-id><journal-id journal-id-type="publisher-id">humanfactors</journal-id><journal-id journal-id-type="index">6</journal-id><journal-title>JMIR Human Factors</journal-title><abbrev-journal-title>JMIR Hum Factors</abbrev-journal-title><issn pub-type="epub">2292-9495</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e84971</article-id><article-id pub-id-type="doi">10.2196/84971</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Chatbot Usability Scale in Chinese Users: Cross-Cultural Adaptation and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Ma</surname><given-names>Haoming</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Pei</surname><given-names>Runyuan</given-names></name><degrees>BSN</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Li</surname><given-names>Sijia</given-names></name><degrees>MSN</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Aoqi</given-names></name><degrees>MSN</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tang</surname><given-names>Xingyi</given-names></name><degrees>MSN</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Piao</surname><given-names>Meihua</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>School of Nursing, Peking Union Medical College, Chinese Academy of Medical Sciences</institution><addr-line>33 Badachu Road</addr-line><addr-line>Beijing</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Kushniruk</surname><given-names>Andre</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Gronier</surname><given-names>Guillaume</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Brooke</surname><given-names>John</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Meihua Piao, PhD, School of Nursing, Peking Union Medical College, Chinese Academy of Medical Sciences, 33 Badachu Road, Beijing, 100144, China, 86 13522112889; <email>parkmihua@snu.ac.kr</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>6</day><month>4</month><year>2026</year></pub-date><volume>13</volume><elocation-id>e84971</elocation-id><history><date date-type="received"><day>28</day><month>09</month><year>2025</year></date><date date-type="rev-recd"><day>10</day><month>01</month><year>2026</year></date><date date-type="accepted"><day>19</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Haoming Ma, Runyuan Pei, Sijia Li, Aoqi Wang, Xingyi Tang, Meihua Piao. Originally published in JMIR Human Factors (<ext-link ext-link-type="uri" xlink:href="https://humanfactors.jmir.org">https://humanfactors.jmir.org</ext-link>), 6.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Human Factors, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://humanfactors.jmir.org">https://humanfactors.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://humanfactors.jmir.org/2026/1/e84971"/><abstract><sec><title>Background</title><p>Chatbots are increasingly deployed across various domains; however, systematic evaluation of their usability remains limited, particularly in non-Western contexts. The 11-item Chatbot Usability Scale (BUS-11), a multidimensional instrument grounded in human-computer interaction theory, has demonstrated strong psychometric properties in prior studies; however, a validated Chinese version does not exist, despite China being one of the largest chatbot markets.</p></sec><sec><title>Objective</title><p>This study aimed to translate, culturally adapt, and validate the BUS-11 for Chinese users.</p></sec><sec sec-type="methods"><title>Methods</title><p>Following established cross-cultural adaptation procedures, the scale was forward- and back-translated, reviewed by an expert committee, and pilot-tested for clarity and feasibility. A main validation study was then conducted with 214 participants, who completed 438 evaluations of the chatbots across 10 widely used systems.</p></sec><sec sec-type="results"><title>Results</title><p>Psychometric analyses demonstrated excellent scale-level content validity index (ie, 0.92), strong internal consistency (Cronbach &#x03B1;=0.92), and a clear 3-factor structure (Accessibility, Interaction Process Quality, and Information Quality), explaining 56.1% of the variance. Meanwhile, privacy or security and response time were retained as single-item indicators. The Chinese BUS-11 was concise, user-friendly, and psychometrically robust.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This work fills a critical gap by providing the first validated instrument for assessing chatbot usability in Chinese contexts, enabling reliable cross-cultural comparisons and supporting both research and practical design evaluation in human-computer interaction.</p></sec></abstract><kwd-group><kwd>chatbot</kwd><kwd>usability</kwd><kwd>satisfaction</kwd><kwd>user experience</kwd><kwd>cross-cultural adaptation</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Chatbots (conversational agents) are artificial intelligence systems designed to interact with users through natural language dialog [<xref ref-type="bibr" rid="ref1">1</xref>]. They have become increasingly ubiquitous across the globe in domains ranging from online retail and customer service to public administration and health care [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. China represents a particularly active context for chatbot adoption, with large-scale use in commercial platforms, government service portals, and hospital-based patient guidance systems [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. As chatbots become embedded in everyday services, their usability becomes increasingly consequential because it shapes user satisfaction, acceptance, and sustained engagement [<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>In human-computer interaction (HCI), usability refers to how easy, efficient, and satisfying a system is for users to achieve their goals [<xref ref-type="bibr" rid="ref9">9</xref>]. For chatbots, usability extends beyond general ease of use to include interaction qualities that are specific to conversation&#x2014;such as turn-taking, dialog coherence, response appropriateness, and perceived trustworthiness [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. However, commonly used usability instruments (eg, the System Usability Scale) were developed for graphical user interfaces and provide limited coverage of conversational properties central to chatbot interaction [<xref ref-type="bibr" rid="ref12">12</xref>]. To better reflect the chatbot context, the Chatbot Usability Questionnaire (CUQ) adapts traditional usability constructs to conversational agents, emphasizing ease of use, navigability, clarity of onboarding, and basic error handling [<xref ref-type="bibr" rid="ref13">13</xref>]. Yet, the CUQ remains anchored in general usability notions and may insufficiently capture AI-specific determinants of sustained chatbot use, including information adequacy, contextual understanding, responsiveness, and privacy- and trust-related concerns.</p><p>To address these gaps, Borsci et al [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] proposed the 11-item Chatbot Usability Scale (BUS-11), a theory-driven, chatbot-native instrument developed from HCI principles and validated through multistage psychometric testing across different chatbot systems and contexts. The BUS-11 operationalizes chatbot usability as a multidimensional construct encompassing Accessibility, Interaction Process Quality, Information Quality, Privacy and Security, and Response Time, thereby facilitating comprehensive evaluation and cross-system comparability. Despite its growing international adoption, a validated Chinese version of the BUS-11 is currently lacking, which limits the rigorous measurement of usability in one of the world&#x2019;s largest chatbot markets.</p><p>Cross-cultural adaptation of usability instruments requires ensuring semantic, conceptual, and cultural equivalence to preserve measurement validity across contexts [<xref ref-type="bibr" rid="ref15">15</xref>]. Building on established adaptation guidelines and prior localization efforts in Chinese HCI research [<xref ref-type="bibr" rid="ref16">16</xref>], this study aims to translate and culturally adapt the BUS-11 into Chinese and to evaluate its psychometric properties. Specifically, we (1) conducted a rigorous translation and cultural adaptation of the BUS-11, (2) examined reliability, validity, and factor structure using data from 214 participants and 438 chatbot evaluations, and (3) provided initial evidence on Chinese users&#x2019; perceptions of chatbot usability to facilitate future cross-cultural comparisons in chatbot user experience (UX) research [<xref ref-type="bibr" rid="ref17">17</xref>].</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Scale Translation and Cultural Adaptation</title><sec id="s2-1-1"><title>Overview</title><p>Following authorization from the original scale&#x2019;s corresponding author, the translation of the BUS-11 into Chinese was conducted using the Brislin translation model as improved by Jones et al [<xref ref-type="bibr" rid="ref18">18</xref>]. Consistent with established guidance for cross-cultural instrument adaptation (eg, forward-back translation with expert reconciliation), we documented all translation decisions to ensure semantic and conceptual equivalence. The specific steps are as follows: (1) forward translation: the researcher and a bilingual professional proficient in both Chinese and English independently translated the original BUS-11 into Chinese. Discrepancies and alternative phrasings were recorded in a reconciliation log and resolved through consensus, resulting in an initial Chinese draft; (2) back translation: another bilingual translator, whose native language was Chinese, translated the initial Chinese draft back into English, producing a back-translated English version; (3) consistency check: a monolingual English-speaking expert familiar with the BUS-11 reviewed and compared the original scale with the back-translated version. Any residual inconsistencies (eg, colloquialisms or culture-specific expressions) were discussed in joint meetings with all translators until consensus was achieved, ensuring conceptual and semantic consistency; (4) final translation: a final Chinese translation was prepared collaboratively by all translators, incorporating adjustments from the consistency review, thus establishing semantic equivalence with the original BUS-11. Minor wording refinements were made for clarity and idiomatic naturalness (no item content was substantively altered). At the end of the cultural adaptation process, the final Chinese version of the BUS-11 and the original English items are presented side by side in <xref ref-type="table" rid="table1">Table 1</xref> to facilitate direct comparison of item wording and to enhance the transparency of the adaptation procedure.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Original English and Chinese versions of the 11-item Chatbot Usability Scale (BUS-11).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Dimension in English (Chinese)</td><td align="left" valign="bottom">Item in English (Chinese)</td></tr></thead><tbody><tr><td align="left" valign="top">Access the chatbot&#x2019;s functions (&#x4E00;, &#x8BBF;&#x95EE;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x7684;&#x529F;&#x80FD;)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The chatbot function was easily detectable (&#x5BB9;&#x6613;&#x627E;&#x5230;&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x7684;&#x5404;&#x79CD;&#x529F;&#x80FD; [&#x4F8B;&#xFF1A;&#x4FEE;&#x6539;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x4F7F;&#x7528;&#x7684;&#x8BED;&#x8A00;&#x6216;&#x4FEE;&#x6539;&#x5934;&#x50CF;])</p></list-item><list-item><p>It was easy to find the chatbot (&#x627E;&#x5230;&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x7684;&#x5165;&#x53E3;&#x662F;&#x4E00;&#x4EF6;&#x7B80;&#x5355;&#x7684;&#x4E8B;)</p></list-item></list></td></tr><tr><td align="left" valign="top">Quality of chatbot functions (&#x4E8C;, &#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x529F;&#x80FD;&#x7684;&#x8D28;&#x91CF;)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Communicating with the chatbot was clear (&#x4E0E;&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x7684;&#x6C9F;&#x901A;&#x8FC7;&#x7A0B;&#x662F;&#x987A;&#x7545;&#x7684;)</p></list-item><list-item><p>The chatbot was able to keep track of context (&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x80FD;&#x591F;&#x4FDD;&#x6301;&#x4E0A;&#x4E0B;&#x6587;&#x7684;&#x8FDE;&#x8D2F;&#x6027;)</p></list-item><list-item><p>The chatbot&#x2019;s responses were easy to understand (&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x7684;&#x56DE;&#x590D;&#x6613;&#x4E8E;&#x7406;&#x89E3;)</p></list-item></list></td></tr><tr><td align="left" valign="top">Provide quality of conversation and information (&#x4E09;, &#x63D0;&#x4F9B;&#x5BF9;&#x8BDD;&#x548C;&#x4FE1;&#x606F;&#x7684;&#x8D28;&#x91CF;)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>I find that the chatbot understands what I want and helps me achieve my goal (&#x6211;&#x611F;&#x89C9;&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x7406;&#x89E3;&#x6211;&#x7684;&#x76EE;&#x7684;&#xFF0C;&#x5E76;&#x534F;&#x52A9;&#x6211;&#x8FBE;&#x6210;&#x76EE;&#x7684;)</p></list-item><list-item><p>I believe the chatbot informs me of any possible privacy issues (&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x63D0;&#x4F9B;&#x7684;&#x4FE1;&#x606F;&#x91CF;&#x662F;&#x5408;&#x9002;&#x7684;&#xFF0C;&#x4E0D;&#x591A;&#x4E0D;&#x5C11;)</p></list-item><list-item><p>The chatbot only gives me the information I need (&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x53EA;&#x63D0;&#x4F9B;&#x4E86;&#x6211;&#x9700;&#x8981;&#x7684;&#x4FE1;&#x606F;)</p></list-item><list-item><p>I feel like the chatbot&#x2019;s responses were accurate (&#x6211;&#x89C9;&#x5F97;&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x7684;&#x56DE;&#x590D;&#x5185;&#x5BB9;&#x6B63;&#x786E;&#x65E0;&#x8BEF;)</p></list-item></list></td></tr><tr><td align="left" valign="top">Privacy and security (&#x56DB;, &#x9690;&#x79C1;&#x548C;&#x5B89;&#x5168;)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>I believe the chatbot informs me of any possible privacy issues (&#x6211;&#x76F8;&#x4FE1;&#x6B64;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x4F1A;&#x544A;&#x77E5;&#x6211;&#x4EFB;&#x4F55;&#x6F5C;&#x5728;&#x7684;&#x9690;&#x79C1;&#x95EE;&#x9898;)</p></list-item></list></td></tr><tr><td align="left" valign="top">Response time (&#x4E94;, &#x56DE;&#x590D;&#x65F6;&#x95F4;)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>My waiting time for a response from the chatbot was short (&#x6211;&#x7B49;&#x5F85;&#x804A;&#x5929;&#x673A;&#x5668;&#x4EBA;&#x56DE;&#x590D;&#x7684;&#x65F6;&#x95F4;&#x662F;&#x77ED;&#x7684;)</p></list-item></list></td></tr></tbody></table></table-wrap></sec><sec id="s2-1-2"><title>Cultural Adaptation</title><p>In June 2024, we convened an expert committee (n=6) to evaluate the cultural appropriateness of the Chinese translation. The expert committee comprised 6 specialists drawn from the broader panel reported in <xref ref-type="table" rid="table2">Table 2</xref>, all of whom had doctoral-level training or senior professional experience in HCI, usability or UX evaluation, health informatics, or scale development. Eligibility criteria included: (1) formal training in a relevant discipline, (2) demonstrated experience in chatbot-related research or usability evaluation, and (3) prior involvement in scale development, instrument adaptation, or related methodological work. Each expert independently compared the Chinese items with the original BUS-11 and assessed semantic and conceptual equivalence, idiomatic appropriateness, and readability. Disagreements were resolved through iterative discussion until consensus was reached. Revisions focused on wording-level refinements to improve clarity and align the tone with standard Chinese conversational norms, without altering the intended coverage of the construct.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Background information of the experts.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Expert</td><td align="left" valign="bottom">Sex</td><td align="left" valign="bottom">Age (years)</td><td align="left" valign="bottom">Education</td><td align="left" valign="bottom">Experience (years)</td><td align="left" valign="bottom">Domains</td><td align="left" valign="bottom">Title</td></tr></thead><tbody><tr><td align="left" valign="top">1</td><td align="left" valign="top">Male</td><td align="left" valign="top">37</td><td align="left" valign="top">Doctorate</td><td align="left" valign="top">5</td><td align="left" valign="top">Human-Computer Interaction</td><td align="left" valign="top">Researcher</td></tr><tr><td align="left" valign="top">2</td><td align="left" valign="top">Male</td><td align="left" valign="top">38</td><td align="left" valign="top">Doctorate</td><td align="left" valign="top">3</td><td align="left" valign="top">Human-Computer Interaction</td><td align="left" valign="top">Associate Researcher</td></tr><tr><td align="left" valign="top">3</td><td align="left" valign="top">Male</td><td align="left" valign="top">31</td><td align="left" valign="top">Doctorate</td><td align="left" valign="top">1</td><td align="left" valign="top">Health Informatics</td><td align="left" valign="top">Associate Researcher</td></tr><tr><td align="left" valign="top">4</td><td align="left" valign="top">Male</td><td align="left" valign="top">33</td><td align="left" valign="top">Doctorate</td><td align="left" valign="top">6</td><td align="left" valign="top">Health Informatics</td><td align="left" valign="top">Associate Researcher</td></tr><tr><td align="left" valign="top">5</td><td align="left" valign="top">Male</td><td align="left" valign="top">50</td><td align="left" valign="top">Doctorate</td><td align="left" valign="top">27</td><td align="left" valign="top">Computer Science</td><td align="left" valign="top">Professor</td></tr><tr><td align="left" valign="top">6</td><td align="left" valign="top">Male</td><td align="left" valign="top">36</td><td align="left" valign="top">Doctorate</td><td align="left" valign="top">12</td><td align="left" valign="top">Usability/UX<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> Evaluation</td><td align="left" valign="top">Associate Researcher</td></tr><tr><td align="left" valign="top">7</td><td align="left" valign="top">Female</td><td align="left" valign="top">42</td><td align="left" valign="top">Doctorate</td><td align="left" valign="top">15</td><td align="left" valign="top">Health<break/>Informatics</td><td align="left" valign="top">Associate Researcher</td></tr><tr><td align="left" valign="top">8</td><td align="left" valign="top">Male</td><td align="left" valign="top">34</td><td align="left" valign="top">Doctorate</td><td align="left" valign="top">8</td><td align="left" valign="top">Chatbot Systems Engineering</td><td align="left" valign="top">Senior Engineer</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>UX: user experience.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s2-2"><title>Pilot Study</title><sec id="s2-2-1"><title>Overview</title><p>To ensure participants evaluated chatbots after a realistic, standardized interaction, we designed representative use-case tasks for 10 commonly used chatbot systems in China (refer to <xref ref-type="table" rid="table3">Table 3</xref> for details). For example, participants were instructed to interact with Taobao&#x2019;s intelligent customer service, simulating a typical query related to membership benefits: &#x201C;Your friend recommends you purchase Taobao&#x2019;s 88vip. Please use Taobao&#x2019;s chatbot &#x2018;Smart Customer Service Xiao Mi&#x2019; to help you understand the specific benefits of 88vip. Log in as instructed and converse with this chatbot.&#x201D; Refer to <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for details. Immediately after completing the assigned task, participants filled out the Chinese BUS-11 about that chatbot. The same task-rating flow was later applied in the main study.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Overview of the chatbots evaluated in the study. General-purpose large language model (LLM)-based chatbots (eg, Doubao, Wenxin Yiyan, and Kimi) were evaluated using a single domain-specific task to ensure consistency with the task-based evaluation applied to all systems.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Chatbot (developers)</td><td align="left" valign="bottom">Domain</td><td align="left" valign="bottom">Platform</td><td align="left" valign="bottom">Primary function</td><td align="left" valign="bottom">Example task</td></tr></thead><tbody><tr><td align="left" valign="top">AliMe (Taobao; Alibaba)</td><td align="left" valign="top">E-commerce</td><td align="left" valign="top">App or web</td><td align="left" valign="top">Customer service</td><td align="left" valign="top">Understanding Taobao 88VIP benefits</td></tr><tr><td align="left" valign="top">Zhihu Zhida (Zhihu Inc)</td><td align="left" valign="top">Knowledge Q and A<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">App or web</td><td align="left" valign="top">Knowledge explanation</td><td align="left" valign="top">Pomodoro study method</td></tr><tr><td align="left" valign="top">Tencent Cloud Presales Bot (Tencent Holdings Ltd)</td><td align="left" valign="top">Enterprise services</td><td align="left" valign="top">Web</td><td align="left" valign="top">Product consultation</td><td align="left" valign="top">AI<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> functions in Tencent Meeting</td></tr><tr><td align="left" valign="top">Gaokao Xiaozhi (Zhangshang Gaokao)</td><td align="left" valign="top">Education</td><td align="left" valign="top">App or web</td><td align="left" valign="top">Academic counseling</td><td align="left" valign="top">University and major recommendation</td></tr><tr><td align="left" valign="top">Bank of China Online Bot (Bank of China)</td><td align="left" valign="top">Finance</td><td align="left" valign="top">Web</td><td align="left" valign="top">Banking consultation</td><td align="left" valign="top">Credit card selection</td></tr><tr><td align="left" valign="top">Ctrip Xiaoyou (Ctrip)</td><td align="left" valign="top">Travel</td><td align="left" valign="top">App or web</td><td align="left" valign="top">Booking assistance</td><td align="left" valign="top">Hotel room information</td></tr><tr><td align="left" valign="top">Huawei Xiaoyi (Huawei)</td><td align="left" valign="top">Consumer electronics</td><td align="left" valign="top">App</td><td align="left" valign="top">Technical support</td><td align="left" valign="top">Battery performance troubleshooting</td></tr><tr><td align="left" valign="top">Doubao (ByteDance)</td><td align="left" valign="top">Interview preparation</td><td align="left" valign="top">Web</td><td align="left" valign="top">Conversational assistance</td><td align="left" valign="top">Interview question coaching</td></tr><tr><td align="left" valign="top">Wenxin Yiyan (Baidu)</td><td align="left" valign="top">Education or assessment</td><td align="left" valign="top">Web</td><td align="left" valign="top">Question analysis</td><td align="left" valign="top">Exam question analysis</td></tr><tr><td align="left" valign="top">Kimi (Moonshot AI)</td><td align="left" valign="top">Health information</td><td align="left" valign="top">Web</td><td align="left" valign="top">Medical Q and A<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">Hepatitis B test interpretation</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Q and A: question and answer.</p></fn><fn id="table3fn2"><p><sup>b</sup>AI: artificial intelligence.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-2-2"><title>Sample and Data Collection</title><p>In July 2024, a pilot study involving 15 participants was conducted at a Beijing-based company, using convenience sampling. Participants completed the Chinese BUS-11 after performing designated tasks with selected chatbots. Completion times were recorded, and qualitative feedback regarding item clarity, ambiguity, and comprehension difficulties was collected through structured interviews. The pilot&#x2019;s primary aims were to assess task feasibility, confirm average completion time, and identify any items requiring wording refinements. Pilot data were used only for refinement and were not included in the main validation dataset.</p></sec><sec id="s2-2-3"><title>Outcomes Informing Quality Control</title><p>Based on the observed average completion time (~5 minutes 20 seconds), we set a priori a data-quality rule for the main study to exclude online questionnaires completed in less than 5 minutes, in addition to removing any incomplete submissions.</p></sec></sec><sec id="s2-3"><title>Content Validity Validation</title><p>Between June and July 2024, 8 domain experts (HCI, informatics, chatbot engineering, and scale development; refer to <xref ref-type="table" rid="table2">Table 2</xref>) independently rated the relevance of each Chinese BUS-11 item to its intended dimension using a 4-point scale. Content validity was quantified to explicitly confirm item relevance and construct representativeness in the Chinese linguistic and cultural context following translation and cultural adaptation, even though no substantive changes were made to the original item content [<xref ref-type="bibr" rid="ref19">19</xref>]. The item-level content validity index (I-CVI) was computed as the proportion of experts assigning ratings of 3 or 4, and the average scale-level content validity index (S-CVI) was calculated as the average of the I-CVIs. Criteria of I-CVI&#x2265;0.78 and S-CVI&#x2265;0.90 were used to indicate acceptable-to-excellent content validity. I-CVIs ranged from 0.875 to 1, with 4 items (items 3, 4, 6, and 11) achieving unanimous relevance (I-CVI=1). The average S-CVI was 0.920, indicating excellent content validity of the Chinese BUS-11. All content validity analyses were completed before the main psychometric validation study.</p></sec><sec id="s2-4"><title>Main Validation Study: Participants, Measures, Procedure, and Analysis</title><sec id="s2-4-1"><title>Participant Recruitment</title><p>Participants were recruited via social media platforms (eg, Red Note [Xiaohongshu Information Technology] and WeChat [Tencent Holdings Ltd]) according to predefined inclusion criteria: native Chinese speakers or individuals proficient in Chinese and aged 18 years or older. Recruitment followed an online convenience sampling approach; participation was voluntary. Each participant was randomly assigned to interact with 3 distinct chatbots (selected from the 10 systems), performing 1 predefined task per chatbot and completed the BUS-11 immediately after each interaction. Thus, each participant contributed 3 independent chatbot evaluations, yielding a repeated-measures data structure (evaluations nested within participants). For psychometric analyses (eg, reliability and exploratory factor analysis [EFA]), evaluations were analyzed at the evaluation level (N=438); design implications of repeated measures are discussed in the &#x201C;Limitations&#x201D; section.</p></sec><sec id="s2-4-2"><title>Sample Size Estimation</title><p>Based on factor analysis requirements (5&#x2010;10 times the number of items, accounting for a 20% invalid rate), a minimum sample size of 69 participants was established. We targeted &#x2265;70 participants and exceeded this target, ultimately recruiting 214 participants who provided 438 evaluations, thereby improving the robustness of reliability and factor-analytic estimates.</p></sec><sec id="s2-4-3"><title>Participant Characteristics</title><p>We analyzed 438 valid chatbot evaluations submitted by 214 participants (69 males and 145 females), with each participant randomly assigned to evaluate 3 of 10 popular Chinese chatbot systems using the Chinese BUS-11. The sample skewed young, with 67.3% (144/214) aged 18&#x2010;24 years and 22.4% (48/214) aged 25&#x2010;30 years, and was highly educated, with 93% (199/214) holding a bachelor&#x2019;s degree or above, and represented multiple industries, most prominently health care (52.8%, 113/214) and education or training (11.7%, 25/214). Most respondents had prior exposure to chatbots (79%, 169/214) and at least moderate familiarity (43%, 92/214) reported being familiar or very familiar, while 27.5% (59/214) reported being unfamiliar or very unfamiliar. These characteristics suggest a relatively tech-savvy cohort, which should be considered when interpreting generalizability. Detailed distributions are provided in <xref ref-type="table" rid="table4">Table 4</xref>.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Characteristics of participants (N=214).</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristic and item</td><td align="left" valign="bottom">Participant, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Sex</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="left" valign="top">69 (32.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">145 (67.8)</td></tr><tr><td align="left" valign="top" colspan="2">Age (years)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>18-24</td><td align="left" valign="top">144 (67.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>25-30</td><td align="left" valign="top">48 (22.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>31-40</td><td align="left" valign="top">14 (6.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>41-50</td><td align="left" valign="top">5 (2.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>51-60</td><td align="left" valign="top">3 (1.4)</td></tr><tr><td align="left" valign="top" colspan="2">Educational level</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Junior high school and below</td><td align="left" valign="top">2 (0.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High school</td><td align="left" valign="top">13 (6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bachelor&#x2019;s degree</td><td align="left" valign="top">121 (56.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Master&#x2019;s degree and above</td><td align="left" valign="top">78 (36.4)</td></tr><tr><td align="left" valign="top" colspan="2">Industry</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Manufacturing</td><td align="left" valign="top">9 (4.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Construction</td><td align="left" valign="top">4 (1.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Education or training</td><td align="left" valign="top">25 (11.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Internet</td><td align="left" valign="top">5 (2.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Health care</td><td align="left" valign="top">113 (52.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Computer</td><td align="left" valign="top">11 (5.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Finance</td><td align="left" valign="top">6 (2.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Scientific research</td><td align="left" valign="top">9 (4.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Government institutions</td><td align="left" valign="top">10 (4.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Professional consulting services</td><td align="left" valign="top">6 (2.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Culture and entertainment</td><td align="left" valign="top">8 (3.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Others</td><td align="left" valign="top">8 (3.7)</td></tr><tr><td align="left" valign="top" colspan="2">Familiarity with chatbots</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Very unfamiliar</td><td align="left" valign="top">26 (12.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Unfamiliar</td><td align="left" valign="top">33 (15.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Neutral</td><td align="left" valign="top">63 (29.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Familiar</td><td align="left" valign="top">61 (28.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Very familiar</td><td align="left" valign="top">31 (14.5)</td></tr><tr><td align="left" valign="top" colspan="2">Have you used a chatbot before?</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">45 (21)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes</td><td align="left" valign="top">169 (79)</td></tr><tr><td align="left" valign="top" colspan="2">Frequency of using chatbots weekly</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Never (0)</td><td align="left" valign="top">53 (24.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rarely (1-2)</td><td align="left" valign="top">69 (32.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Occasionally (3-4)</td><td align="left" valign="top">46 (21.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Often (5-6)</td><td align="left" valign="top">25 (11.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Every day (7)</td><td align="left" valign="top">21 (9.8)</td></tr></tbody></table></table-wrap></sec><sec id="s2-4-4"><title>Measures</title><p>Two primary instruments were used:</p><list list-type="order"><list-item><p>General demographic questionnaire: collected participant demographics and chatbot usage patterns.</p></list-item><list-item><p>Chinese BUS-11: a culturally adapted 11-item scale assessing chatbot usability across 5 dimensions. Because 2 BUS-11 dimensions (&#x201C;Privacy and Security&#x201D; and &#x201C;Response Time&#x201D;) are single-item constructs in the original instrument, they were retained in the survey but treated as single-item indicators in factor-analytic planning (refer to &#x201C;Data Analysis&#x201D; section).</p></list-item></list></sec><sec id="s2-4-5"><title>Data Collection</title><p>Data were collected electronically via Tencent Questionnaire. Participants completed tasks on their own devices and then accessed the BUS-11 through a survey link. Strict criteria ensured data quality, and questionnaires that were not fully completed were directly discarded. Based on pilot timing, online questionnaires completed in less than 5 minutes were excluded to reduce insufficient-effort responses. In the pilot experiment, the research team measured the time taken to complete the questionnaire, with an average duration of 5 minutes and 20 seconds. The finalized main study dataset comprised 438 valid chatbot evaluations.</p></sec><sec id="s2-4-6"><title>Data Analysis</title><p>Data were exported to Microsoft Excel, verified, and analyzed using IBM SPSS Statistics (version 26.0). Reliability was evaluated using Cronbach &#x03B1; coefficients for the overall scale and for each of its multi-item dimensions. Structural validity was assessed via EFA after sampling-adequacy checks (Kaiser-Meyer-Olkin [KMO] index and Bartlett test of sphericity). We used a principal components extraction with the Kaiser criterion (eigenvalues &#x003E;1.0) and scree plot inspection to determine the number of factors and applied varimax rotation to aid interpretability. Standard criteria guided item retention: primary factor loadings of &#x2265;0.40, absence of substantial cross-loadings (&#x2265;0.30), and conceptual interpretability of the resulting factor solution. Salient loadings were defined as those with values of &#x2265;0.40. In line with the instrument&#x2019;s structure, the EFA focused on the 9 items related to multi-item dimensions; the 2 single-item dimensions (&#x201C;Privacy and Security&#x201D; and &#x201C;Response Time&#x201D;) were not included in the EFA but were retained as single-item indicators in descriptive analyses. <italic>P</italic>&#x003C;.05 was considered statistically significant for inferential tests (eg, Bartlett test of sphericity). We note that principal component analysis (PCA) is commonly used in early-stage exploratory work for data reduction and structure discovery, and future work may complement this with common-factor methods (eg, principal axis factoring) and confirmatory factor analysis (CFA) once larger samples are available.</p></sec></sec><sec id="s2-5"><title>Ethical Considerations</title><p>This study was approved by the Ethics Committee of the School of Nursing, Peking Union Medical College (approval no PUMCSON-2023-10). All participants provided electronic informed consent before participation. Participation was voluntary, and participants could withdraw at any time without penalty. No personally identifying information was collected in the analytic dataset. Data were stored securely and analyzed in deidentified form.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Scale Reliability and Validity</title><sec id="s3-1-1"><title>Structural Validity</title><p>Before factor extraction, data suitability was confirmed (KMO=0.743; Bartlett <italic>&#x03C7;</italic>&#x00B2;<sub>36</sub>=628.185; <italic>P</italic>&#x003C;.001) because 2 BUS-11 dimensions are single-item constructs (&#x201C;Privacy and Security&#x201D; and &#x201C;Response Time&#x201D;) and they were not included in the EFA. The analysis, therefore, used the remaining 9 items (refer to <xref ref-type="fig" rid="figure1">Figure 1</xref> for the scree plot).</p><p>In this study, PCA and varimax orthogonal rotation were used to extract common factors. Nine items (items 10 and 11 were excluded from the EFA due to having only 1 item each) were analyzed and combined with the scree plot (<xref ref-type="fig" rid="figure1">Figure 1</xref>), and 3 common factors with eigenvalues greater than 1.0 were ultimately extracted, accounting for a cumulative variance contribution rate of 56.107%. The results of the EFA for this experiment are shown in <xref ref-type="table" rid="table5">Table 5</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Scree plot of the factors.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="humanfactors_v13i1e84971_fig01.png"/></fig><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Exploratory factor analysis (EFA) loadings (3-factor solution) for the Chinese 11-item Chatbot Usability Scale (BUS-11).</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Item</td><td align="left" valign="bottom" colspan="3">Factor</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">1</td><td align="left" valign="bottom">2</td><td align="left" valign="bottom">3</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Access the chatbot&#x2019;s functions</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Finding the entry to this chatbot is an easy task</td><td align="left" valign="top">0.840</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>It is easy to find the various functions of this chatbot (eg, uploading a file, analyzing a file, changing chat avatar, and so on).</td><td align="left" valign="top">0.760</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="4">Quality of chatbot functions</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>The communication process with this chatbot is smooth</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.757</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>This chatbot is able to maintain context coherence</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.759</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>The responses from this chatbot are easy to understand</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.716</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="4">Provide quality conversation and information</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>I feel this chatbot understands my intentions and assists me in achieving the purpose of this chat</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.683</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>The information provided by this chatbot is appropriate, neither too much nor too little</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.627</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>This chatbot does not provide information relevant to my needs</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.629</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>I find the responses of this chatbot to be accurate and reliable</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.739</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>Not available.</p></fn></table-wrap-foot></table-wrap><p>EFA with varimax rotation revealed a clear 3-factor structure for the Chinese version of BUS-11 (<xref ref-type="table" rid="table5">Table 5</xref>). The 3 factors explained 15.2%, 19.6%, and 21.3% of the total variance, respectively, accounting for a cumulative 56.1% of the variance. Factor 1 (Accessibility) consisted of 2 items reflecting the ease of locating and using chatbot functions, with strong loadings (0.840 and 0.760). Factor 2 (Interaction Process Quality) consisted of 3 items that described conversational smoothness, coherence, and comprehensibility, with loadings ranging from 0.716 to 0.759. Factor 3 (Information Quality) consisted of 4 items related to understanding user intent, the appropriateness of information, the absence of irrelevant content, and the reliability of responses, with loadings ranging from 0.627 to 0.739.</p><p>Only factors extracted by the eigenvalue &#x003E;1 criterion are shown. Item domain labels reflect the original BUS-11 conceptual domains and are provided for reference.</p></sec><sec id="s3-1-2"><title>Internal Consistency (Reliability)</title><p>The overall internal consistency of the Chinese BUS-11 was excellent (Cronbach &#x03B1;=0.92). By factor, reliability was acceptable to excellent, as indicated by Accessibility (Cronbach &#x03B1;=0.79), Interaction Process Quality (Cronbach <italic>&#x03B1;</italic>=0.91), and Information Quality (Cronbach &#x03B1;=0.93). Reliability was not computed for the single-item dimensions (&#x201C;Privacy and Security&#x201D; and &#x201C;Response Time&#x201D;). Item-level diagnostics (<xref ref-type="table" rid="table6">Table 6</xref>) showed corrected item-total correlations ranging from 0.453 to 0.819, indicating that all items were positively associated with the overall scale score. Cronbach &#x03B1; (if an item was deleted) ranged from 0.91 to 0.93, suggesting that removing any single item would not meaningfully improve internal consistency.</p><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>Mean, SD, corrected item-total correlation, and Cronbach &#x03B1; (if an item is deleted) for the 11-item Chatbot Usability Scale (BUS-11).</p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Item</td><td align="left" valign="bottom">Values, mean (SD)</td><td align="left" valign="bottom">Corrected item-total correlation</td><td align="left" valign="bottom">Cronbach &#x03B1; (if an item is deleted)</td></tr></thead><tbody><tr><td align="left" valign="top">1</td><td align="left" valign="top">3.79 (1.187)</td><td align="left" valign="top">0.539</td><td align="left" valign="top">0.93</td></tr><tr><td align="left" valign="top">2</td><td align="left" valign="top">3.57 (1.059)</td><td align="left" valign="top">0.616</td><td align="left" valign="top">0.92</td></tr><tr><td align="left" valign="top">3</td><td align="left" valign="top">3.77 (0.952)</td><td align="left" valign="top">0.819</td><td align="left" valign="top">0.91</td></tr><tr><td align="left" valign="top">4</td><td align="left" valign="top">3.77 (0.923)</td><td align="left" valign="top">0.785</td><td align="left" valign="top">0.91</td></tr><tr><td align="left" valign="top">5</td><td align="left" valign="top">3.79 (0.946)</td><td align="left" valign="top">0.816</td><td align="left" valign="top">0.91</td></tr><tr><td align="left" valign="top">6</td><td align="left" valign="top">3.66 (0.982)</td><td align="left" valign="top">0.817</td><td align="left" valign="top">0.91</td></tr><tr><td align="left" valign="top">7</td><td align="left" valign="top">3.57 (0.979)</td><td align="left" valign="top">0.785</td><td align="left" valign="top">0.91</td></tr><tr><td align="left" valign="top">8</td><td align="left" valign="top">3.53 (0.96)</td><td align="left" valign="top">0.771</td><td align="left" valign="top">0.91</td></tr><tr><td align="left" valign="top">9</td><td align="left" valign="top">3.55 (0.992)</td><td align="left" valign="top">0.798</td><td align="left" valign="top">0.91</td></tr><tr><td align="left" valign="top">10</td><td align="left" valign="top">2.80 (0.998)</td><td align="left" valign="top">0.453</td><td align="left" valign="top">0.93</td></tr><tr><td align="left" valign="top">11</td><td align="left" valign="top">4.08 (0.866)</td><td align="left" valign="top">0.469</td><td align="left" valign="top">0.93</td></tr></tbody></table></table-wrap></sec></sec><sec id="s3-2"><title>User Feedback on Item Clarity and Acceptability</title><p>Overall, participants reported that the Chinese BUS-11 was easy to understand and straightforward to complete. Most respondents indicated that the items were clear and adequately reflected their experiences with chatbot interactions. The average completion time was approximately 5 minutes, suggesting that the scale imposed a minimal response burden and was suitable for use in both research and applied settings without causing respondent fatigue.</p><p>While feedback was generally positive, one recurrent issue was identified. Participants provided optional open-ended comments on item clarity and completion experience. Comments were reviewed and summarized into recurring issues. Three themes emerged: (1) ambiguity in interpreting the privacy/security item, with some participants indicating uncertainty about what constitutes &#x201C;risk disclosure&#x201D; in chatbot interactions; (2) context-dependence of judgments, where respondents noted that perceived usability varied by task type and chatbot domain; and (3) overall clarity and low burden, with many respondents reporting that the items were easy to understand and could be completed quickly. These feedback themes informed our interpretation of the psychometric findings and priorities for future refinement.</p></sec><sec id="s3-3"><title>Sensitivity Analysis Across Sociodemographic Variables</title><p>To examine the robustness of the Chinese BUS-11 across user subgroups, we conducted sensitivity analyses by gender, age, and prior experience with chatbots (refer to <xref ref-type="table" rid="table7">Table 7</xref>). No significant differences in overall BUS-11 scores were observed between male and female participants (<italic>P</italic>=.12), nor across major age groups (<italic>P</italic>=.84), suggesting stable performance across these demographic characteristics. In contrast, participants with prior chatbot experience reported slightly higher BUS-11 scores than those without such experience (<italic>P</italic>=.03). This pattern indicates that the scale is sensitive to meaningful differences in user familiarity with chatbot interaction, while remaining robust across core sociodemographic variables.</p><table-wrap id="t7" position="float"><label>Table 7.</label><caption><p>Sensitivity analysis of the 11-item Chatbot Usability Scale (BUS-11) scores across sociodemographic variables.</p></caption><table id="table7" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable and group</td><td align="left" valign="bottom">Value, mean (SD)</td><td align="left" valign="bottom">Statistical test</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Sex</td><td align="left" valign="top"><italic>t</italic> test</td><td align="left" valign="top">.12</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="left" valign="top">3.52 (0.80)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">3.63 (0.69)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2">Age (years)</td><td align="left" valign="top">One-way ANOVA</td><td align="left" valign="top">.84</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>18&#x2010;24</td><td align="left" valign="top">3.55 (0.87)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>25&#x2010;30</td><td align="left" valign="top">3.49 (0.95)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>31&#x2010;40</td><td align="left" valign="top">3.30 (0.67)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>41&#x2010;50</td><td align="left" valign="top">3.93 (0.67)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2">Prior chatbot experience</td><td align="left" valign="top"><italic>t</italic> test</td><td align="left" valign="top">.03</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes</td><td align="left" valign="top">3.64 (0.71)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">3.46 (0.78)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr></tbody></table></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Rigorous Localization and Cultural Adaptation</title><p>This study localized the BUS-11 for the Chinese context, providing a standardized instrument for evaluating chatbot usability in China. Given BUS-11&#x2019;s theory-driven and multidimensional design, its adaptation is particularly relevant for contemporary conversational systems. Following established cross-cultural adaptation practices, we applied a forward-backward translation workflow (inspired by the Brislin model) combined with iterative expert committee review to ensure semantic and conceptual equivalence [<xref ref-type="bibr" rid="ref20">20</xref>]. The expert panel (HCI/human factors&#x2013;oriented) focused on pragmatic clarity, idiomatic appropriateness, and cultural relevance, resulting in a Chinese BUS-11 that remained closely aligned with the original without substantive item revisions.</p><p>Notably, the absence of major item-level changes should not be interpreted as evidence that culture is irrelevant to chatbot usability. Instead, cultural effects may operate more subtly, shaping how users interpret and prioritize usability facets (eg, privacy expectations, responsiveness norms, and conversational politeness) rather than necessitating overt rewording. Such differences may be reflected in response patterns and the observed factorial organization, underscoring the importance of future work using multigroup approaches (eg, measurement invariance testing) and direct cross-cultural comparisons to examine whether BUS-11 functions equivalently across cultural groups and whether certain dimensions show differential salience in Chinese users. In addition, consistent documentation of translation decisions improves transparency and supports reproducibility for subsequent adaptation work and comparative HCI research [<xref ref-type="bibr" rid="ref20">20</xref>]. Given the rapid expansion of chatbot deployment across service domains [<xref ref-type="bibr" rid="ref21">21</xref>], a psychometrically grounded Chinese BUS-11 can facilitate rigorous UX benchmarking in China and enable more meaningful integration of Chinese user evidence into the broader HCI literature [<xref ref-type="bibr" rid="ref22">22</xref>].</p></sec><sec id="s4-2"><title>Participant Sample and Representativeness</title><p>Our sample selection aimed for diversity in gender, age, education, and industry background, resulting in a broad cross-section of Chinese chatbot users. In practice, however, the achieved sample was skewed toward younger, highly educated adults. Most participants were aged 18&#x2010;30 years, and more than 90% held at least a bachelor&#x2019;s degree. While this profile likely reflects the early adopters of new technologies [<xref ref-type="bibr" rid="ref23">23</xref>] and those most engaged with contemporary chatbot applications, it also introduces some limitations on generalizability. Younger and tech-savvy users may have different usability perceptions than older or less technologically experienced individuals. Prior research in human factors has noted that older adults can face unique usability challenges and may interact with technology in distinct ways [<xref ref-type="bibr" rid="ref24">24</xref>]. Our current sample underrepresents such groups, as well as users from rural areas or lower educational backgrounds, which means the usability impressions captured by the scale might not fully reflect the broader population of potential chatbot users. On the positive side, the concentration of experienced users in our sample could heighten sensitivity to subtle differences in chatbot interaction quality&#x2014;these users are likely familiar with a variety of interfaces and may provide discerning feedback. The inclusion of participants from a range of industries (eg, education, health care, internet or information technology, and so on) is another strength, as it suggests the scale items were interpreted meaningfully across different usage contexts. Indeed, the BUS-11 was designed to be domain-agnostic, and our results tentatively support its applicability in multiple sectors. However, some fields (eg, finance and construction) were represented by relatively few respondents, so conclusions about those domains should be drawn cautiously. Likewise, because recruitment relied on convenience sampling (primarily in a few urban regions), there may be regional or cultural subgroup differences within China that our study did not capture. In summary, our sample provides a solid initial test of the scale among active chatbot users, but it is not fully representative of all user segments. Future research should broaden the sampling frame to include older adults, less frequent technology users, and a wider geographic distribution to ensure the Chinese BUS-11 is robustly validated for the general population. Broadening the participant base in this way would enhance the scale&#x2019;s universality and address the western, educated, industrialized, rich, and democratic bias analog in the Chinese context, thus strengthening confidence that the tool works well for all target users, not just the young and educated subset.</p></sec><sec id="s4-3"><title>Reliability, Validity, and Key Psychometric Findings</title><sec id="s4-3-1"><title>Overview</title><p>We conducted a comprehensive psychometric evaluation of the Chinese BUS-11, including assessments of content validity, construct (factorial) validity, internal consistency reliability, and user feedback on item clarity and practical usability. Overall, the findings provide strong evidence that the localized scale retains the scientific integrity of the original instrument. Here, we discuss each set of findings in turn, relating them to established benchmarks and prior research, and interpret their implications for the scale&#x2019;s use in HCI and human factors studies.</p></sec><sec id="s4-3-2"><title>Content Validity</title><p>By consulting a panel of 8 experts to review the relevance of each item, we established that the content validity of the Chinese BUS-11 is high. The I-CVI for every item was very strong, with 4 items (Items 3, 4, 6, and 11) achieving an I-CVI of 1 (ie, unanimously deemed highly relevant by all experts). The remaining items also scored well above conventional acceptability thresholds [<xref ref-type="bibr" rid="ref25">25</xref>]. Given that an I-CVI&#x2265;0.78 is often considered the minimum when 6 or more experts are involved, our results indicate that each item is considered clearly representative of the intended construct by domain specialists. The S-CVI was 0.920 (both by the averaging method and the universal agreement method), exceeding the common cutoff of 0.90 for excellent content coverage of a construct. These indices confirm that the BUS-11 (Chinese version) has comprehensive content coverage and the items collectively cover the important facets of chatbot usability without obvious omissions. This outcome was facilitated by the diverse expertise of our review panel, which included professionals in HCI, usability engineering, and systems engineering. Their varied perspectives helped ensure that the items were not only translated correctly but also conceptually appropriate for Chinese users. In practical terms, the high content validity suggests that Chinese BUS-11 can confidently be used to gauge chatbot usability aspects as intended&#x2014;such as efficiency, clarity of answers, and user comfort&#x2014;without missing key elements. The rigorous content validation process, in line with recommended scale development procedures, provides a solid foundation for the subsequent construct validation. It also offers reassurance to practitioners that the instrument has face validity in the local context (the items make sense to experts and presumably to end-users as well). We believe this strong content validity will help drive adoption of the scale in both research and industry evaluations, as stakeholders can trust that the instrument measures what it is supposed to measure (ie, salient user experience factors for chatbots).</p></sec><sec id="s4-3-3"><title>Structural Validity (EFA)</title><p>To examine the underlying factor structure of the Chinese BUS-11, we carried out an EFA on the survey responses. Preliminary checks confirmed that our data were suitable for factor analysis: the KMO measure of sampling adequacy was 0.743, which is above the usual threshold of 0.60 and can be considered &#x201C;middling&#x201D; to &#x201C;good&#x201D; [<xref ref-type="bibr" rid="ref26">26</xref>], and the Bartlett test of sphericity was highly significant, indicating sufficient interitem correlations for extracting latent factors. Using principal component extraction with varimax rotation, we identified a 3-factor solution that best fit the data. These 3 extracted common factors had eigenvalues greater than 1 and together accounted for about 56.1% of the total variance in users&#x2019; responses. A cumulative variance above approximately 50% is acceptable in behavioral research, given the complexity of HCI measures, so 56% indicates that the scale captures a substantial portion of the usability perception variance in our sample. Each factor showed a clear thematic grouping of items, reflecting distinct dimensions of the chatbot user experience. Notably, 2 specific usability aspects (ie, &#x201C;Privacy and Security&#x201D; and &#x201C;Response Time&#x201D;) were each originally measured by a single item in the BUS-11. As single-item facets, they could not load onto multi-item factors in the EFA and were thus analyzed separately (we examined their scores independently rather than including them in the factor structure). The remaining nine items clustered into three coherent factors: (1) Accessibility (eg, ease of accessing and using the chatbot, such as simple interface and low effort to start the interaction), (2) Interaction Quality (covering the smoothness, coherence, and understandability of the conversational exchange), and (3) Information Quality (assessing the relevance, clarity, and correctness of the chatbot&#x2019;s responses, without unnecessary redundancy). This structure is broadly consistent with the original conceptual framework of the BUS scale, which encompassed multiple dimensions of chatbot usability [<xref ref-type="bibr" rid="ref10">10</xref>], but our findings suggest a somewhat more consolidated factor model in the Chinese context.</p><p>In the original development of the BUS, a 5-factor model (BUS-15) was proposed, which included separate dimensions for privacy and response speed alongside factors analogous to those we found [<xref ref-type="bibr" rid="ref10">10</xref>]. Our EFA results imply that Chinese users may not distinguish as many separate categories, instead perceiving a more integrated set of usability factors. Attributes related to the conversational process and functional outcome appear to intertwine; users who find the interaction process smooth also tend to perceive the information provided as high quality, suggesting an underlying general perception of &#x201C;interaction effectiveness.&#x201D; Interestingly, a recent re-examination of BUS-11 in a large multichatbot dataset also identified a simpler factor structure than initially theorized [<xref ref-type="bibr" rid="ref27">27</xref>]. Our findings align with that perspective, reinforcing the idea that a parsimonious model (fewer and broader factors) can sufficiently capture chatbot usability evaluations. It is worth noting that we chose to retain the &#x201C;Privacy and Security&#x201D; and &#x201C;Response Time&#x201D; items as standalone indicators in the instrument, due to their importance in usability (users do care about privacy and speed) and their presence in the original scale. However, from a psychometric standpoint, single-item factors are suboptimal because they do not allow for internal consistency reliability estimation and cannot capture the breadth of a construct. Best practices in scale development typically recommend having at least 2&#x2010;3 items per factor to achieve adequate reliability and construct representation [<xref ref-type="bibr" rid="ref28">28</xref>]. Therefore, one implication is that future refinement of the Chinese BUS might involve expanding these facets&#x2014;adding a couple of items to better gauge the privacy/security aspect of chatbot use (eg, covering data protection, user consent, and so on) and the responsiveness aspect (eg, not just speed but also perceived responsiveness or promptness). For now, including those single items ensures that our localized scale does not lose any content relative to BUS-11, and they provide useful standalone measures (eg, designers might specifically want to see ratings of privacy transparency). In summary, the EFA supports the structural validity of the Chinese BUS-11 by confirming that its items coherently measure multiple distinct dimensions of usability. The identified 3-factor structure captures the major themes of chatbot usability experience in our sample. This structure provides an empirical basis for scoring or interpreting the scale (eg, computing subscale scores for Accessibility, Interaction Quality, and Information Quality) and sets the stage for future confirmatory testing to verify whether this structure generalizes to other samples or holds under more stringent statistical criteria.</p></sec><sec id="s4-3-4"><title>User Feedback on Item Clarity</title><p>The observed variability in interpreting the privacy/security item suggests that privacy-related usability judgments may be more sensitive to users&#x2019; privacy literacy and to how risk information is surfaced in routine interactions, consistent with prior privacy research indicating limited user engagement with privacy policies and disclosures [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. From a measurement perspective, this supports retaining privacy/security as a relevant usability facet while motivating future refinement (eg, adding additional items or more concrete wording) and the use of cognitive interviewing to improve interpretability across user subgroups [<xref ref-type="bibr" rid="ref31">31</xref>]. In addition, the brief completion time (&#x2248;3&#x2010;4 minutes) suggests that the Chinese BUS-11 imposes low respondent burden, comparable to widely used usability instruments and suitable for rapid assessment in applied settings [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>].</p></sec></sec><sec id="s4-4"><title>Limitations</title><p>While this study yielded encouraging results for the Chinese adaptation of BUS-11, several limitations must be acknowledged to contextualize the findings and guide future work. First, the sample, as discussed, was not fully representative of all user demographics. The geographic distribution of participants was relatively narrow (with many users from a few major cities), and certain age groups (especially adults aged older than 50 years) and occupations were underrepresented. The current sample underrepresents such groups, as well as users from rural areas or lower educational backgrounds, which means the usability impressions captured by the scale might not fully reflect the broader population of potential chatbot users. Indeed, the BUS-11 was designed to be domain-agnostic, and the results tentatively support its applicability in multiple sectors. However, some fields (eg, finance and construction) were represented by relatively few respondents, so conclusions about those domains should be drawn cautiously. Likewise, because recruitment relied on convenience and snowball sampling (primarily in a few urban regions), there may be regional or cultural subgroup differences within China that this study did not capture. This means the current validation is most directly applicable to younger, educated Chinese users in urban settings. Caution should be used in generalizing the psychometric results to, say, older adults or rural users, who might interact with chatbots differently or have different usability concerns. Expanding the sample in future studies will be important to verify that the scale maintains its reliability and validity across a broader swath of the Chinese population. Additionally, larger and more heterogeneous samples would allow for analyses of measurement invariance&#x2014;checking whether the scale functions equivalently across subgroups such as older vs younger users, or high-frequency vs low-frequency chatbot users [<xref ref-type="bibr" rid="ref34">34</xref>]. In addition to representativeness concerns, the overall sample size should be considered a methodological limitation. Although the study included 438 chatbot evaluations, these ratings were provided by 214 participants, which may be modest for robust factor-analytic procedures. According to the International Test Commission Guidelines for Translating and Adapting Tests [<xref ref-type="bibr" rid="ref35">35</xref>], stable and generalizable factor solutions are typically supported by larger participant samples. The relatively limited number of participants in this study may therefore constrain the stability and generalizability of the extracted factor structure and other psychometric estimates. Future studies with larger and more diverse samples are needed to further confirm the robustness of the Chinese BUS-11&#x2019;s factor structure and measurement properties.</p><p>Second, certain BUS-11 dimensions in the current form rely on a single item (notably the &#x201C;Privacy and Security&#x201D; and &#x201C;Response Time&#x201D; facets). Single-item measures, while convenient, provide limited psychometric information. They can neither capture the full breadth of a construct nor estimate internal consistency for that construct. The strong performance of the overall scale notwithstanding, the precision and reliability of those facets could be improved. Future research should consider enriching these dimensions by developing and testing additional items. For example, the privacy facet might be expanded with items addressing whether the chatbot clearly asks permission for data usage or whether it provides settings for privacy control. The response time facet might include an item on whether the chatbot&#x2019;s speed meets user expectations or if delays ever cause frustration. Of course, any new items would themselves need to undergo translation and validation. The trade-off is that adding items could slightly increase completion time, but as long as the total remains reasonable (eg, &#x003C;2 minutes increase), it would likely be worth it for a more robust scale. In short, the current findings highlight the need to bolster those single-item constructs for a more rigorous measurement model [<xref ref-type="bibr" rid="ref28">28</xref>].</p><p>Third, this study primarily evaluated the scale in general usage scenarios (common chatbot systems for generic tasks) and did not deeply examine how the scale performs in specific industry contexts or use-case scenarios. It remains an open question whether users in specialized domains&#x2014;such as health care, education, finance, or customer service&#x2014;interpret and prioritize the BUS-11 items similarly. Different contexts may put extra emphasis on certain usability aspects; for instance, in health care chatbot applications, issues such as empathy, trust, and accuracy of information might be even more critical [<xref ref-type="bibr" rid="ref36">36</xref>], whereas in e-commerce chatbots, speed and transactional clarity could dominate user satisfaction. The inability to test such variations is a limitation. It is possible that the BUS-11 might require minor tweaks or weighting adjustments to be optimally effective in certain domains, or it may prove to be robust as is&#x2014;we simply do not have the data yet to say. Future validations should include a variety of chatbot types and domains to ensure the scale&#x2019;s content validity and structure hold in those settings, or to highlight any domain-specific gaps that could be addressed by additional context-specific items.</p><p>Additionally, this study did not use external, established usability instruments to assess convergent validity. Although the BUS-11 is conceptually grounded in usability theory, the absence of a comparator scale&#x2014;such as the System Usability Scale or other widely used usability measures&#x2014;limits the ability to empirically examine whether the Chinese BUS-11 correlates appropriately with validated measures of related constructs. As a result, the current validity evidence is primarily restricted to internal structure and reliability and does not allow for a direct assessment of alignment with existing usability benchmarks. Future studies should consider incorporating one or more established usability scales to evaluate convergent (and potentially discriminant) validity, thereby strengthening the overall validity framework and situating the Chinese BUS-11 more clearly within the broader landscape of usability measurement tools. Moreover, test-retest reliability was not assessed due to the cross-sectional design. Therefore, the temporal stability of the Chinese BUS-11 could not be directly evaluated. Future studies should administer the scale to the same users across time points to examine score stability when the usability of a chatbot system remains unchanged.</p><p>Finally, the psychometric analysis relied on EFA due to the novelty of applying the BUS-11 in Chinese. We have not yet conducted a CFA on a separate sample, which would be the next step to confirm whether the 3-factor model (plus the 2 single-item factors) holds under stricter modeling conditions. A CFA would allow us to test the hypothesized factor structure (possibly including correlated factors or a higher-order factor of overall usability) and to adjust for measurement errors [<xref ref-type="bibr" rid="ref37">37</xref>]. It would also enable a formal comparison of alternative models&#x2014;for example, whether a structure with 5 distinct factors (if privacy and speed were expanded) fits significantly better than a 3-factor structure, or whether a single-factor model (treating usability as unidimensional) is decisively inferior to a multifactor model. Additionally, with a large enough sample, multigroup CFA could be used to test measurement invariance across key groups (eg, male vs female, younger vs older, and different regions), ensuring the scale operates equivalently [<xref ref-type="bibr" rid="ref34">34</xref>]. Because the current sample size was modest for CFA and was collected as a single group, we reserve these confirmatory analyses for future work. In summary, the use of EFA was appropriate for initial validation; however, further confirmation via CFA is needed to solidify the factor structure and verify that the Chinese BUS-11 meets the standards of psychometric validity expected for a widely applicable research instrument.</p><p>The exploratory factor analytic results should be interpreted considering the factor extraction approach used in this study. We note that PCA is commonly applied in early-stage exploratory research for data reduction and initial structure discovery; however, PCA does not explicitly model latent constructs. As such, while appropriate for an initial examination of the scale structure, this approach provides only preliminary evidence regarding the underlying factor model. Future research with larger samples may complement these findings by applying common-factor methods (eg, principal axis factoring) and conducting CFA to more rigorously test the latent structure of the Chinese BUS-11.</p></sec><sec id="s4-5"><title>Conclusion</title><p>This study delivers a validated Chinese adaptation of BUS-11 with excellent internal consistency, strong expert-rated content validity, and a clear 3-factor structure for multi-item facets (Accessibility, Interaction Process Quality, and Information Quality), while preserving the privacy and speed facets conceptually. The instrument fills a measurement gap for chatbot UX evaluation in China and provides researchers and practitioners with a concise, psychometrically sound tool suitable for laboratory and field contexts alike. Future work should establish temporal stability, convergent validity, and confirmatory structure and expand coverage to underrepresented user groups and domain-specific scenarios to further strengthen its utility and generalizability.</p></sec></sec></body><back><notes><sec><title>Funding</title><p>This study was funded by the Non-Profit Central Research Institute Fund of the Chinese Academy of Medical Sciences (grant no 2023-RC320-01).</p></sec><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are not publicly available due to privacy and confidentiality considerations but are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">BUS-11</term><def><p>11-item Chatbot Usability Scale</p></def></def-item><def-item><term id="abb2">CFA</term><def><p>confirmatory factor analysis</p></def></def-item><def-item><term id="abb3">CUQ</term><def><p>Chatbot Usability Questionnaire</p></def></def-item><def-item><term id="abb4">EFA</term><def><p>exploratory factor analysis</p></def></def-item><def-item><term id="abb5">HCI</term><def><p>human-computer interaction</p></def></def-item><def-item><term id="abb6">I-CVI</term><def><p>item-level content validity index</p></def></def-item><def-item><term id="abb7">KMO</term><def><p>Kaiser-Meyer-Olkin</p></def></def-item><def-item><term id="abb8">PCA</term><def><p>principal component analysis</p></def></def-item><def-item><term id="abb9">S-CVI</term><def><p>scale-level content validity index</p></def></def-item><def-item><term id="abb10">UX</term><def><p>user experience</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Nagarhalli</surname><given-names>TP</given-names> </name><name name-style="western"><surname>Vaze</surname><given-names>V</given-names> </name><name name-style="western"><surname>Rana</surname><given-names>NK</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Rana</surname><given-names>N</given-names> </name></person-group><article-title>A review of current trends in the development of chatbot systems</article-title><conf-name>2020 6th International Conference on Advanced Computing and Communication Systems (ICACCS)</conf-name><conf-date>Mar 6-7, 2020</conf-date><pub-id pub-id-type="doi">10.1109/ICACCS48705.2020.9074420</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nadarzynski</surname><given-names>T</given-names> </name><name name-style="western"><surname>Miles</surname><given-names>O</given-names> </name><name name-style="western"><surname>Cowie</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ridge</surname><given-names>D</given-names> </name></person-group><article-title>Acceptability of artificial intelligence (AI)-led chatbot services in healthcare: a mixed-methods study</article-title><source>Digit Health</source><year>2019</year><volume>5</volume><fpage>2055207619871808</fpage><pub-id pub-id-type="doi">10.1177/2055207619871808</pub-id><pub-id pub-id-type="medline">31467682</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Androutsopoulou</surname><given-names>A</given-names> </name><name name-style="western"><surname>Karacapilidis</surname><given-names>N</given-names> </name><name name-style="western"><surname>Loukis</surname><given-names>E</given-names> </name><name name-style="western"><surname>Charalabidis</surname><given-names>Y</given-names> </name></person-group><article-title>Transforming the communication between citizens and government through AI-guided chatbots</article-title><source>Gov Inf Q</source><year>2019</year><month>04</month><volume>36</volume><issue>2</issue><fpage>358</fpage><lpage>367</lpage><pub-id pub-id-type="doi">10.1016/j.giq.2018.10.001</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Landim</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pereira</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Vieira</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Chatbot design approaches for fashion e-commerce: an interdisciplinary review</article-title><source>Int J Fash Des Technol Educ</source><year>2022</year><month>05</month><day>4</day><volume>15</volume><issue>2</issue><fpage>200</fpage><lpage>210</lpage><pub-id pub-id-type="doi">10.1080/17543266.2021.1990417</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>FL</given-names> </name><name name-style="western"><surname>Qiu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>X</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Huang</surname><given-names>J</given-names> </name></person-group><article-title>AliMe Assist: an intelligent assistant for creating an innovative e-commerce experience</article-title><year>2017</year><access-date>2026-03-27</access-date><conf-name>Proceedings of the 2017 ACM Conference on Information and Knowledge Management</conf-name><conf-date>Nov 6-10, 2017</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.researchgate.net/publication/320885511_AliMe_Assist_An_Intelligent_Assistant_for_Creating_an_Innovative_E-commerce_Experience">https://www.researchgate.net/publication/320885511_AliMe_Assist_An_Intelligent_Assistant_for_Creating_an_Innovative_E-commerce_Experience</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>N</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>X</given-names> </name></person-group><article-title>Understanding the determinants in the different government AI adoption stages: evidence of local government chatbots in China</article-title><source>Soc Sci Comput Rev</source><year>2022</year><month>04</month><volume>40</volume><issue>2</issue><fpage>534</fpage><lpage>554</lpage><pub-id pub-id-type="doi">10.1177/0894439320980132</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ji</surname><given-names>M</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Use of health care chatbots among young people in China during the omicron wave of COVID-19: evaluation of the user experience of and satisfaction with the technology</article-title><source>JMIR Hum Factors</source><year>2022</year><month>06</month><day>9</day><volume>9</volume><issue>2</issue><fpage>e36831</fpage><pub-id pub-id-type="doi">10.2196/36831</pub-id><pub-id pub-id-type="medline">35576058</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Silva</surname><given-names>GRS</given-names> </name><name name-style="western"><surname>Canedo</surname><given-names>ED</given-names> </name></person-group><article-title>Towards user-centric guidelines for chatbot conversational design</article-title><source>Int J Hum Comput</source><year>2024</year><month>01</month><day>17</day><volume>40</volume><issue>2</issue><fpage>98</fpage><lpage>120</lpage><pub-id pub-id-type="doi">10.1080/10447318.2022.2118244</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hornb&#x00E6;k</surname><given-names>K</given-names> </name></person-group><article-title>Current practice in measuring usability: challenges to usability studies and research</article-title><source>Int J Hum Comput Stud</source><year>2006</year><month>02</month><volume>64</volume><issue>2</issue><fpage>79</fpage><lpage>102</lpage><pub-id pub-id-type="doi">10.1016/j.ijhcs.2005.06.002</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Borsci</surname><given-names>S</given-names> </name><name name-style="western"><surname>Malizia</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schmettow</surname><given-names>M</given-names> </name><etal/></person-group><article-title>The Chatbot Usability Scale: the design and pilot of a usability scale for interaction with AI-based conversational agents</article-title><source>Pers Ubiquit Comput</source><year>2022</year><month>02</month><volume>26</volume><issue>1</issue><fpage>95</fpage><lpage>119</lpage><pub-id pub-id-type="doi">10.1007/s00779-021-01582-9</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ren</surname><given-names>R</given-names> </name><name name-style="western"><surname>Zapata</surname><given-names>M</given-names> </name><name name-style="western"><surname>Castro</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Dieste</surname><given-names>O</given-names> </name><name name-style="western"><surname>Acuna</surname><given-names>ST</given-names> </name></person-group><article-title>Experimentation for chatbot usability evaluation: a secondary study</article-title><source>IEEE Access</source><year>2022</year><volume>10</volume><fpage>12430</fpage><lpage>12464</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2022.3145323</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ren</surname><given-names>R</given-names> </name><name name-style="western"><surname>Castro</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Acu&#x00F1;a</surname><given-names>ST</given-names> </name><name name-style="western"><surname>de Lara</surname><given-names>J</given-names> </name></person-group><article-title>Evaluation techniques for chatbot usability: a systematic mapping study</article-title><source>Int J Soft Eng Knowl Eng</source><year>2019</year><month>11</month><volume>29</volume><issue>11n12</issue><fpage>1673</fpage><lpage>1702</lpage><pub-id pub-id-type="doi">10.1142/S0218194019400163</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Holmes</surname><given-names>S</given-names> </name></person-group><article-title>Towards validating a chatbot usability scale</article-title><source>International Conference on Human-Computer Interaction</source><publisher-name>Springer</publisher-name><pub-id pub-id-type="doi">10.1007/978-3-031-35708-4_24</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Borsci</surname><given-names>S</given-names> </name><name name-style="western"><surname>Prati</surname><given-names>E</given-names> </name><name name-style="western"><surname>Malizia</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schmettow</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chamberlain</surname><given-names>A</given-names> </name><name name-style="western"><surname>Federici</surname><given-names>S</given-names> </name></person-group><article-title>Ciao AI: the Italian adaptation and validation of the Chatbot Usability Scale</article-title><source>Pers Ubiquit Comput</source><year>2023</year><month>12</month><volume>27</volume><issue>6</issue><fpage>2161</fpage><lpage>2170</lpage><pub-id pub-id-type="doi">10.1007/s00779-023-01731-2</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beaton</surname><given-names>DE</given-names> </name><name name-style="western"><surname>Bombardier</surname><given-names>C</given-names> </name><name name-style="western"><surname>Guillemin</surname><given-names>F</given-names> </name><name name-style="western"><surname>Ferraz</surname><given-names>MB</given-names> </name></person-group><article-title>Guidelines for the process of cross-cultural adaptation of self-report measures</article-title><source>Spine (Phila Pa 1976)</source><year>2000</year><month>12</month><day>15</day><volume>25</volume><issue>24</issue><fpage>3186</fpage><lpage>3191</lpage><pub-id pub-id-type="doi">10.1097/00007632-200012150-00014</pub-id><pub-id pub-id-type="medline">11124735</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lei</surname><given-names>T</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name></person-group><article-title>Chinese System Usability Scale: translation, revision, psychological measurement</article-title><source>Int J Hum-Comput Int</source><year>2020</year><month>06</month><day>14</day><volume>36</volume><issue>10</issue><fpage>953</fpage><lpage>963</lpage><pub-id pub-id-type="doi">10.1080/10447318.2019.1700644</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Bolpagni</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gabrielli</surname><given-names>S</given-names> </name></person-group><article-title>Development of a comprehensive evaluation scale for LLM-powered counseling chatbots (CES-LCC) using the edelphi method</article-title><source>Public Health and Healthcare</source><comment>Preprint posted online on 2025</comment><pub-id pub-id-type="doi">10.20944/preprints202501.1621.v1</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jones</surname><given-names>PS</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Phillips</surname><given-names>LR</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>XE</given-names> </name><name name-style="western"><surname>Jaceldo</surname><given-names>KB</given-names> </name></person-group><article-title>An adaptation of Brislin&#x2019;s translation model for cross-cultural research</article-title><source>Nurs Res</source><year>2001</year><volume>50</volume><issue>5</issue><fpage>300</fpage><lpage>304</lpage><pub-id pub-id-type="doi">10.1097/00006199-200109000-00008</pub-id><pub-id pub-id-type="medline">11570715</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Prinsen</surname><given-names>CAC</given-names> </name><name name-style="western"><surname>Mokkink</surname><given-names>LB</given-names> </name><name name-style="western"><surname>Bouter</surname><given-names>LM</given-names> </name><etal/></person-group><article-title>COSMIN guideline for systematic reviews of patient-reported outcome measures</article-title><source>Qual Life Res</source><year>2018</year><month>05</month><volume>27</volume><issue>5</issue><fpage>1147</fpage><lpage>1157</lpage><pub-id pub-id-type="doi">10.1007/s11136-018-1798-3</pub-id><pub-id pub-id-type="medline">29435801</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brislin</surname><given-names>RW</given-names> </name></person-group><article-title>Back-translation for cross-cultural research</article-title><source>J Cross Cult Psychol</source><year>1970</year><month>09</month><volume>1</volume><issue>3</issue><fpage>185</fpage><lpage>216</lpage><pub-id pub-id-type="doi">10.1177/135910457000100301</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brandtzaeg</surname><given-names>PB</given-names> </name><name name-style="western"><surname>F&#x00F8;lstad</surname><given-names>A</given-names> </name></person-group><article-title>Chatbots: changing user needs and motivations</article-title><source>Interactions</source><year>2018</year><access-date>2026-03-27</access-date><volume>25</volume><issue>5</issue><fpage>38</fpage><lpage>43</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.researchgate.net/publication/327191388_Chatbots_changing_user_needs_and_motivations">https://www.researchgate.net/publication/327191388_Chatbots_changing_user_needs_and_motivations</ext-link></comment><pub-id pub-id-type="doi">10.1145/3236669</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Norman</surname><given-names>D</given-names> </name></person-group><source>The Design of Everyday Things</source><year>2013</year><publisher-name>Hachette UK</publisher-name><pub-id pub-id-type="other">0465072992</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Rogers</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Singhal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Quinlan</surname><given-names>MM</given-names> </name></person-group><article-title>Diffusion of innovations</article-title><source>An Integrated Approach to Communication Theory and Research</source><year>2014</year><access-date>2026-03-27</access-date><publisher-name>Routledge</publisher-name><fpage>432</fpage><lpage>448</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.taylorfrancis.com/chapters/edit/10.4324/9780203887011-36/diffusion-innovations-everett-rogers-arvind-singhal-margaret-quinlan">https://www.taylorfrancis.com/chapters/edit/10.4324/9780203887011-36/diffusion-innovations-everett-rogers-arvind-singhal-margaret-quinlan</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Charness</surname><given-names>N</given-names> </name><name name-style="western"><surname>Boot</surname><given-names>WR</given-names> </name></person-group><article-title>Aging and information technology use: potential and barriers</article-title><source>Curr Dir Psychol Sci</source><year>2009</year><volume>18</volume><issue>5</issue><fpage>253</fpage><lpage>258</lpage><pub-id pub-id-type="doi">10.1111/j.1467-8721.2009.01647.x</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lynn</surname><given-names>MR</given-names> </name></person-group><article-title>Determination and quantification of content validity</article-title><source>Nurs Res</source><year>1986</year><volume>35</volume><issue>6</issue><fpage>382</fpage><lpage>385</lpage><pub-id pub-id-type="medline">3640358</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaiser</surname><given-names>HF</given-names> </name></person-group><article-title>An index of factorial simplicity</article-title><source>Psychometrika</source><year>1974</year><month>03</month><volume>39</volume><issue>1</issue><fpage>31</fpage><lpage>36</lpage><pub-id pub-id-type="doi">10.1007/BF02291575</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Borsci</surname><given-names>S</given-names> </name><name name-style="western"><surname>Schmettow</surname><given-names>M</given-names> </name></person-group><article-title>Re-examining the chatBot Usability Scale (BUS-11) to assess user experience with customer relationship management chatbots</article-title><source>Pers Ubiquit Comput</source><year>2024</year><month>12</month><volume>28</volume><issue>6</issue><fpage>1033</fpage><lpage>1044</lpage><pub-id pub-id-type="doi">10.1007/s00779-024-01834-4</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Worthington</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Whittaker</surname><given-names>TA</given-names> </name></person-group><article-title>Scale development research: a content analysis and recommendations for best practices</article-title><source>Couns Psychol</source><year>2006</year><volume>34</volume><issue>6</issue><fpage>806</fpage><lpage>838</lpage><pub-id pub-id-type="doi">10.1177/0011000006288127</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McDonald</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Cranor</surname><given-names>LF</given-names> </name></person-group><article-title>The cost of reading privacy policies</article-title><source>Isjlp</source><year>2008</year><volume>4</volume><fpage>543</fpage></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Baruh</surname><given-names>L</given-names> </name><name name-style="western"><surname>Secinti</surname><given-names>E</given-names> </name><name name-style="western"><surname>Cemalcilar</surname><given-names>Z</given-names> </name></person-group><article-title>Online privacy concerns and privacy management: a meta-analytical review</article-title><source>J Commun</source><year>2017</year><month>02</month><volume>67</volume><issue>1</issue><fpage>26</fpage><lpage>53</lpage><pub-id pub-id-type="doi">10.1111/jcom.12276</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sweller</surname><given-names>J</given-names> </name></person-group><article-title>Cognitive load during problem solving: effects on learning</article-title><source>Cogn Sci</source><year>1988</year><month>04</month><volume>12</volume><issue>2</issue><fpage>257</fpage><lpage>285</lpage><pub-id pub-id-type="doi">10.1207/s15516709cog1202_4</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Brooke</surname><given-names>J</given-names> </name></person-group><article-title>SUS: a &#x201C;quick and dirty&#x201D; usability scale</article-title><source>Usability Evaluation In Industry</source><year>1996</year><access-date>2026-03-27</access-date><publisher-name>CRC Press</publisher-name><fpage>4</fpage><lpage>7</lpage><comment><ext-link ext-link-type="uri" xlink:href="http://taylorfrancis.com/chapters/edit/10.1201/9781498710411-35/sus-quick-dirty-usability-scale-john-brooke">http://taylorfrancis.com/chapters/edit/10.1201/9781498710411-35/sus-quick-dirty-usability-scale-john-brooke</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lewis</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Fretwell</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Ryan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Parham</surname><given-names>JB</given-names> </name></person-group><article-title>Faculty use of established and emerging technologies in higher education: a unified theory of acceptance and use of technology perspective</article-title><source>IJHE</source><year>2013</year><volume>2</volume><issue>2</issue><fpage>22</fpage><lpage>34</lpage><pub-id pub-id-type="doi">10.5430/ijhe.v2n2p22</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vandenberg</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Lance</surname><given-names>CE</given-names> </name></person-group><article-title>A review and synthesis of the measurement invariance literature: suggestions, practices, and recommendations for organizational research</article-title><source>Organ Res Methods</source><year>2000</year><month>01</month><volume>3</volume><issue>1</issue><fpage>4</fpage><lpage>70</lpage><pub-id pub-id-type="doi">10.1177/109442810031002</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hern&#x00E1;ndez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hidalgo</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Hambleton</surname><given-names>RK</given-names> </name><name name-style="western"><surname>G&#x00F3;mez-Benito</surname><given-names>J</given-names> </name></person-group><article-title>International Test Commission guidelines for test adaptation: a criterion checklist</article-title><source>Psicothema</source><year>2020</year><month>08</month><volume>32</volume><issue>3</issue><fpage>390</fpage><lpage>398</lpage><pub-id pub-id-type="doi">10.7334/psicothema2019.306</pub-id><pub-id pub-id-type="medline">32711675</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Laranjo</surname><given-names>L</given-names> </name><name name-style="western"><surname>Dunn</surname><given-names>AG</given-names> </name><name name-style="western"><surname>Tong</surname><given-names>HL</given-names> </name><etal/></person-group><article-title>Conversational agents in healthcare: a systematic review</article-title><source>J Am Med Inform Assoc</source><year>2018</year><month>09</month><day>1</day><volume>25</volume><issue>9</issue><fpage>1248</fpage><lpage>1258</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocy072</pub-id><pub-id pub-id-type="medline">30010941</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Brown</surname><given-names>T</given-names> </name></person-group><source>Confirmatory Factor Analysis for Applied Research</source><year>2015</year><access-date>2026-03-27</access-date><edition>2</edition><publisher-name>The Guilford Press</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://psycnet.apa.org/record/2015-10560-000">https://psycnet.apa.org/record/2015-10560-000</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Standardized user tasks applied across 10 popular chatbot systems in China to ensure consistency and ecological validity in usability evaluations with the Chinese 11-item Chatbot Usability Scale (BUS-11).</p><media xlink:href="humanfactors_v13i1e84971_app1.docx" xlink:title="DOCX File, 21 KB"/></supplementary-material></app-group></back></article>