<codeBook xmlns="ddi:codebook:2_5" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:codebook:2_5 https://ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd" version="2.5" xml:lang="hu"><docDscr><citation><titlStmt><titl xml:lang="hu">Subject categorisation experiments with AI in MTMT</titl><IDNo agency="handle">hdl:21.15109/ARP/VWQFD2</IDNo></titlStmt><distStmt><distrbtr source="archive">ARP</distrbtr><distDate>2026-05-08</distDate></distStmt><verStmt source="archive"><version date="2026-05-12" type="RELEASED">2</version></verStmt><biblCit>Micsik, András; Tanácsi, Roland, 2026, "Subject categorisation experiments with AI in MTMT", https://hdl.handle.net/21.15109/ARP/VWQFD2, ARP, V2</biblCit></citation></docDscr><stdyDscr><citation><titlStmt><titl xml:lang="hu">Subject categorisation experiments with AI in MTMT</titl><IDNo agency="handle">hdl:21.15109/ARP/VWQFD2</IDNo></titlStmt><rspStmt><AuthEnty affiliation="HUN-REN SZTAKI">Micsik, András</AuthEnty><AuthEnty affiliation="HUN-REN SZTAKI">Tanácsi, Roland</AuthEnty></rspStmt><prodStmt><prodDate>2025-11-15</prodDate><software version="3.10">Python</software><grantNo agency="NKFIH">RRF-2.3.1-21-2022-00004</grantNo></prodStmt><distStmt><distrbtr source="archive">ARP</distrbtr><contact affiliation="HUN-REN SZTAKI" email="micsik@sztaki.hu">Micsik, András</contact><depositr>Micsik, András</depositr><depDate>2026-02-03</depDate></distStmt><holdings URI="https://hdl.handle.net/21.15109/ARP/VWQFD2"/></citation><stdyInfo><subject><keyword xml:lang="en">Computer and Information Science</keyword><keyword xml:lang="hu">Számítástechnika és informatika</keyword><keyword>subject classification</keyword><keyword>scientific categorization</keyword><keyword>transformer models</keyword><keyword>Support Vector Classifier</keyword><keyword>data cleaning</keyword><keyword>large language models</keyword><topcClas vocab="EuroSciVoc" vocabURI="http://data.europa.eu/8mn/euroscivoc/4c8f4b46-6f5c-41d9-9079-7de85c16431d">artificial intelligence</topcClas></subject><abstract xml:lang="hu">Code, sample data and results for subject categorisation experiments with AI in MTMT</abstract><sumDscr/></stdyInfo><method><dataColl><sources/></dataColl><anlyInfo/></method><dataAccs><setAvail/><useStmt/><notes type="DVN:TOU" level="dv">&lt;a href="http://creativecommons.org/licenses/by-nc-nd/4.0">CC BY-NC-ND 4.0&lt;/a></notes></dataAccs><othrStdyMat><relPubl><citation><titlStmt><titl>Tanácsi, R., &amp; Micsik, A. (2026). A Comparative Evaluation of AI Approaches to Large-Scale Scientific Subject Classification. Big Data and Cognitive Computing, 10(5), 151.</titl><IDNo agency="doi">10.3390/bdcc10050151</IDNo></titlStmt><biblCit>Tanácsi, R., &amp; Micsik, A. (2026). A Comparative Evaluation of AI Approaches to Large-Scale Scientific Subject Classification. Big Data and Cognitive Computing, 10(5), 151.</biblCit></citation><ExtLink URI="https://doi.org/10.3390/bdcc10050151"/></relPubl></othrStdyMat></stdyDscr><otherMat ID="f2163827" URI="https://repo.researchdata.hu/api/access/datafile/2163827" level="datafile"><labl>README.txt</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/plain</notes></otherMat><otherMat ID="f2163836" URI="https://repo.researchdata.hu/api/access/datafile/2163836" level="datafile"><labl>lvl4-mtmt-large-multiclass-svm-rbf.zip</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/octet-stream</notes></otherMat><otherMat ID="f2163829" URI="https://repo.researchdata.hu/api/access/datafile/2163829" level="datafile"><labl>svm_rbf_confusion_matrix_percent.csv</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/csv</notes></otherMat><otherMat ID="f2163831" URI="https://repo.researchdata.hu/api/access/datafile/2163831" level="datafile"><labl>annif.csv</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/csv</notes></otherMat><otherMat ID="f2163822" URI="https://repo.researchdata.hu/api/access/datafile/2163822" level="datafile"><labl>embedding_scikit.csv</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/csv</notes></otherMat><otherMat ID="f2163826" URI="https://repo.researchdata.hu/api/access/datafile/2163826" level="datafile"><labl>scibert_lvl3.csv</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/csv</notes></otherMat><otherMat ID="f2163830" URI="https://repo.researchdata.hu/api/access/datafile/2163830" level="datafile"><labl>scibert_lvl4.csv</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/csv</notes></otherMat><otherMat ID="f2163824" URI="https://repo.researchdata.hu/api/access/datafile/2163824" level="datafile"><labl>scibert_lvl4_subtopics.csv</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/csv</notes></otherMat><otherMat ID="f2163828" URI="https://repo.researchdata.hu/api/access/datafile/2163828" level="datafile"><labl>scibert_moe.csv</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/csv</notes></otherMat><otherMat ID="f2163821" URI="https://repo.researchdata.hu/api/access/datafile/2163821" level="datafile"><labl>frascati_mapping.json</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/json</notes></otherMat><otherMat ID="f2163835" URI="https://repo.researchdata.hu/api/access/datafile/2163835" level="datafile"><labl>sample_evaluation_data.csv</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/csv</notes></otherMat><otherMat ID="f2163834" URI="https://repo.researchdata.hu/api/access/datafile/2163834" level="datafile"><labl>sample_evaluation_data.npy</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/octet-stream</notes></otherMat><otherMat ID="f2163823" URI="https://repo.researchdata.hu/api/access/datafile/2163823" level="datafile"><labl>sample_training_data.csv</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/csv</notes></otherMat><otherMat ID="f2163833" URI="https://repo.researchdata.hu/api/access/datafile/2163833" level="datafile"><labl>sample_training_data.npy</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/octet-stream</notes></otherMat><otherMat ID="f2163820" URI="https://repo.researchdata.hu/api/access/datafile/2163820" level="datafile"><labl>eval_svm_rbf.py</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/x-python</notes></otherMat><otherMat ID="f2163914" URI="https://repo.researchdata.hu/api/access/datafile/2163914" level="datafile"><labl>requirements.txt</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/plain</notes></otherMat><otherMat ID="f2163832" URI="https://repo.researchdata.hu/api/access/datafile/2163832" level="datafile"><labl>train_svm_rbf.py</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/x-python</notes></otherMat></codeBook>