{"dcterms:modified":"2026-05-12","dcterms:creator":"ARP","@type":"ore:ResourceMap","schema:additionalType":"Dataverse OREMap Format v1.0.1","dvcore:generatedBy":{"@type":"schema:SoftwareApplication","schema:name":"Dataverse","schema:version":"6.9 build arp-prod-984b1ef503","schema:url":"https://github.com/iqss/dataverse"},"@id":"https://repo.researchdata.hu/api/datasets/export?exporter=OAI_ORE&persistentId=https://hdl.handle.net/21.15109/ARP/VWQFD2","ore:describes":{"productionDate":"2025-11-15","language":"English","dateOfDeposit":"2026-02-03","title":"Subject categorisation experiments with AI in MTMT","subject":"Computer and Information Science","depositor":"Micsik, András","dsDescription":{"dsDescriptionValue":"Code, sample data and results for subject categorisation experiments with AI in MTMT"},"datasetContact":{"datasetContactName":"Micsik, András","datasetContactAffiliation":"HUN-REN SZTAKI","datasetContactEmail":"micsik@sztaki.hu"},"grantNumber":{"grantNumberAgency":"NKFIH","grantNumberValue":"RRF-2.3.1-21-2022-00004"},"publication":{"publicationCitation":"Tanácsi, R., & Micsik, A. (2026). A Comparative Evaluation of AI Approaches to Large-Scale Scientific Subject Classification. Big Data and Cognitive Computing, 10(5), 151.","publicationIDType":"doi","publicationIDNumber":"10.3390/bdcc10050151","publicationURL":"https://doi.org/10.3390/bdcc10050151","publicationRelationType":"IsSupplementTo"},"software":{"softwareName":"Python","softwareVersion":"3.10"},"topicClassification":{"topicClassValue":"artificial intelligence","topicClassVocab":"EuroSciVoc","topicClassVocabURI":"http://data.europa.eu/8mn/euroscivoc/4c8f4b46-6f5c-41d9-9079-7de85c16431d"},"author":[{"authorName":"Micsik, András","authorAffiliation":"HUN-REN SZTAKI","authorIdentifierScheme":"ORCID","authorIdentifier":"0000-0001-9859-9186"},{"authorName":"Tanácsi, Roland","authorAffiliation":"HUN-REN SZTAKI"}],"keyword":[{"keywordValue":"subject classification"},{"keywordValue":"scientific categorization"},{"keywordValue":"transformer models"},{"keywordValue":"Support Vector Classifier"},{"keywordValue":"data cleaning"},{"keywordValue":"large language models"}],"@id":"https://hdl.handle.net/21.15109/ARP/VWQFD2","@type":["ore:Aggregation","schema:Dataset"],"schema:version":"2.0","schema:name":"Subject categorisation experiments with AI in MTMT","schema:dateModified":"2026-05-12 14:55:37.219","schema:datePublished":"2026-05-08","schema:creativeWorkStatus":"RELEASED","schema:license":"http://creativecommons.org/licenses/by-nc-nd/4.0","dvcore:fileTermsOfAccess":{"dvcore:fileRequestAccess":true},"schema:includedInDataCatalog":"ARP","schema:isPartOf":{"schema:name":"Elosztott rendszerek osztály","@id":"https://repo.researchdata.hu/dataverse/hunren-sztaki-elosztott-rendszerek","schema:description":"Ez a HUN-REN SZTAKI Elosztott rendszerek osztályának hivatalos adattára.","schema:isPartOf":{"schema:name":"SZTAKI","@id":"https://repo.researchdata.hu/dataverse/sztaki","schema:description":"This is the dataverse collection of the Institute for Computer Science and Control of the Hungarian Research Network.","schema:isPartOf":{"schema:name":"Hungarian Research Network","@id":"https://repo.researchdata.hu/dataverse/hun-ren","schema:description":"<h2>A HUN-REN Magyar Kutatási Hálózat intézményi tárolója</h2>\nA HUN-REN Magyar Kutatási Hálózat a magyar tudományos élet alappillérét jelentő kutatási hálózat. A hálózat tizenegy kutatóközpontjának, hét kutatóintézetének, továbbá egyetemeken és más közintézményekben működő 116 támogatott kutatócsoportjának kutatói a matematikai és természettudományok, az élettudományok, illetve a bölcsészet- és társadalomtudományok legváltozatosabb területein végeznek alap- és alkalmazott kutatásokat.","schema:isPartOf":{"schema:name":"ARP","@id":"https://repo.researchdata.hu/dataverse/root","schema:description":"This is the root dataverse collection for HUN-REN ARP."}}}},"schema:inLanguage":"hu","ore:aggregates":[{"schema:name":"README.txt","dvcore:restricted":false,"schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/OLMFQR","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/OLMFQR","@type":"ore:AggregatedResource","schema:fileFormat":"text/plain","dvcore:filesize":317,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ecf6-9b96af192434","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"5a9d1ab8e4fa78ff465f69453cf772e0"}},{"schema:name":"annif.csv","dvcore:restricted":false,"dvcore:directoryLabel":"results","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/NF3M7P","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/NF3M7P","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":2267,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ed4c-22b64ccf0d86","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"8e3a64ace616bcb168b6e4f7fb2f282e"}},{"schema:name":"embedding_scikit.csv","dvcore:restricted":false,"dvcore:directoryLabel":"results","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/TUU2A3","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/TUU2A3","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":4075,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ed76-96578ee77634","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"b4c198539305ae175ad8a4fced2a604a"}},{"schema:name":"eval_svm_rbf.py","dvcore:restricted":false,"dvcore:directoryLabel":"src","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/GQ2QTF","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/GQ2QTF","@type":"ore:AggregatedResource","schema:fileFormat":"text/x-python","dvcore:filesize":6446,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ef6e-22e5dcc2e33e","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"6aadfcc714f0bba46bae9b7bb47fcb9c"}},{"schema:name":"frascati_mapping.json","dvcore:restricted":false,"dvcore:directoryLabel":"sample_data","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/UMCK7O","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/UMCK7O","@type":"ore:AggregatedResource","schema:fileFormat":"application/json","dvcore:filesize":2394,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ee6a-e94d2c6b0e0e","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"8a2a440fd99bf426350a6ed7a327dbfe"}},{"schema:name":"lvl4-mtmt-large-multiclass-svm-rbf.zip","dvcore:restricted":false,"dvcore:directoryLabel":"model","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/RSDNVR","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/RSDNVR","@type":"ore:AggregatedResource","schema:fileFormat":"application/octet-stream","dvcore:filesize":2015902080,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def6c795d-3d81f8a341ec","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"9e6b343b55a52c2c09c489aeaacc5af4"}},{"schema:name":"requirements.txt","dvcore:restricted":false,"dvcore:directoryLabel":"src","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/YSHVT5","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/YSHVT5","@type":"ore:AggregatedResource","schema:fileFormat":"text/plain","dvcore:filesize":182,"dvcore:storageIdentifier":"s3-sztaki://concorda:19e1c4141ac-65a315e6ccc3","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"ace08c9b245683be998b0fdc19ac8581"}},{"schema:name":"sample_evaluation_data.csv","dvcore:restricted":false,"dvcore:directoryLabel":"sample_data","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/WF4GHD","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/WF4GHD","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":25015,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ee85-16d4ebc37513","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"97a5cb0fdf30662e8b286e446fc71590"}},{"schema:name":"sample_evaluation_data.npy","dvcore:restricted":false,"dvcore:directoryLabel":"sample_data","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/2NLZ4Q","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/2NLZ4Q","@type":"ore:AggregatedResource","schema:fileFormat":"application/octet-stream","dvcore:filesize":1818752,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ee95-0ec4f09612a5","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"afa079c4bd87d0723929dedb2f148443"}},{"schema:name":"sample_training_data.csv","dvcore:restricted":false,"dvcore:directoryLabel":"sample_data","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/SKUVFP","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/SKUVFP","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":59377,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14eee4-2f6441cd19fe","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"48cdb73aeb915b85ea0ef38140354417"}},{"schema:name":"sample_training_data.npy","dvcore:restricted":false,"dvcore:directoryLabel":"sample_data","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/2Q93LH","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/2Q93LH","@type":"ore:AggregatedResource","schema:fileFormat":"application/octet-stream","dvcore:filesize":4243584,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ef09-d8cba9909b25","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"e01b3016acc80a112d2903ddf41d60d3"}},{"schema:name":"scibert_lvl3.csv","dvcore:restricted":false,"dvcore:directoryLabel":"results","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/W4KTNW","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/W4KTNW","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":3258,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ed99-6698ca45fc7a","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"1c9d9644d1de08e7f1fb85f0333f946b"}},{"schema:name":"scibert_lvl4.csv","dvcore:restricted":false,"dvcore:directoryLabel":"results","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/AKX2F6","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/AKX2F6","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":3288,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14edba-b7fe2b1bb578","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"9aeb860bf739ed09e094dbb9c7dc72e3"}},{"schema:name":"scibert_lvl4_subtopics.csv","dvcore:restricted":false,"dvcore:directoryLabel":"results","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/S6CBTZ","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/S6CBTZ","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":12681,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ee23-7a60dc9dc466","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"11cea9e8926e8bfc8be57226786c441a"}},{"schema:name":"scibert_moe.csv","dvcore:restricted":false,"dvcore:directoryLabel":"results","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/WMCKQO","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/WMCKQO","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":15114,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ee47-b3b3d74dc65d","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"2d789be516ce3c0cca8c9505a17dcd1c"}},{"schema:name":"svm_rbf_confusion_matrix_percent.csv","dvcore:restricted":false,"dvcore:directoryLabel":"model","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/MV8MX6","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/MV8MX6","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":5867,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14ed29-125582debc75","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"9836e85f21382168c3fa60d8bb5463eb"}},{"schema:name":"train_svm_rbf.py","dvcore:restricted":false,"dvcore:directoryLabel":"src","schema:version":1,"dvcore:datasetVersionId":81045,"@id":"hdl:21.15109/ARP/VWQFD2/4BGUBA","schema:sameAs":"https://repo.researchdata.hu/api/access/datafile/:persistentId?persistentId=hdl:21.15109/ARP/VWQFD2/4BGUBA","@type":"ore:AggregatedResource","schema:fileFormat":"text/x-python","dvcore:filesize":4807,"dvcore:storageIdentifier":"s3-sztaki://concorda:19def14efb8-a192a84d24f9","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"6cb2735f2e906219ed25a9e14ad21555"}}],"schema:hasPart":["hdl:21.15109/ARP/VWQFD2/OLMFQR","hdl:21.15109/ARP/VWQFD2/NF3M7P","hdl:21.15109/ARP/VWQFD2/TUU2A3","hdl:21.15109/ARP/VWQFD2/GQ2QTF","hdl:21.15109/ARP/VWQFD2/UMCK7O","hdl:21.15109/ARP/VWQFD2/RSDNVR","hdl:21.15109/ARP/VWQFD2/YSHVT5","hdl:21.15109/ARP/VWQFD2/WF4GHD","hdl:21.15109/ARP/VWQFD2/2NLZ4Q","hdl:21.15109/ARP/VWQFD2/SKUVFP","hdl:21.15109/ARP/VWQFD2/2Q93LH","hdl:21.15109/ARP/VWQFD2/W4KTNW","hdl:21.15109/ARP/VWQFD2/AKX2F6","hdl:21.15109/ARP/VWQFD2/S6CBTZ","hdl:21.15109/ARP/VWQFD2/WMCKQO","hdl:21.15109/ARP/VWQFD2/MV8MX6","hdl:21.15109/ARP/VWQFD2/4BGUBA"]},"@context":{"author":"http://purl.org/dc/terms/creator","authorAffiliation":"https://dataverse.org/schema/citation/authorAffiliation","authorIdentifier":"http://purl.org/spar/datacite/AgentIdentifier","authorIdentifierScheme":"http://purl.org/spar/datacite/AgentIdentifierScheme","authorName":"https://dataverse.org/schema/citation/authorName","datasetContact":"https://dataverse.org/schema/citation/datasetContact","datasetContactAffiliation":"https://dataverse.org/schema/citation/datasetContactAffiliation","datasetContactEmail":"https://dataverse.org/schema/citation/datasetContactEmail","datasetContactName":"https://dataverse.org/schema/citation/datasetContactName","dateOfDeposit":"http://purl.org/dc/terms/dateSubmitted","dcterms":"http://purl.org/dc/terms/","depositor":"https://dataverse.org/schema/citation/depositor","dsDescription":"https://dataverse.org/schema/citation/dsDescription","dsDescriptionValue":"https://dataverse.org/schema/citation/dsDescriptionValue","dvcore":"https://dataverse.org/schema/core#","grantNumber":"https://schema.org/sponsor","grantNumberAgency":"https://dataverse.org/schema/citation/grantNumberAgency","grantNumberValue":"https://dataverse.org/schema/citation/grantNumberValue","keyword":"https://dataverse.org/schema/citation/keyword","keywordValue":"https://dataverse.org/schema/citation/keywordValue","language":"http://purl.org/dc/terms/language","ore":"http://www.openarchives.org/ore/terms/","productionDate":"https://dataverse.org/schema/citation/productionDate","publication":"http://purl.org/dc/terms/isReferencedBy","publicationCitation":"http://purl.org/dc/terms/bibliographicCitation","publicationIDNumber":"http://purl.org/spar/datacite/ResourceIdentifier","publicationIDType":"http://purl.org/spar/datacite/ResourceIdentifierScheme","publicationRelationType":"http://datacite.org/schema/kernel-4/simpleTypes#relationType","publicationURL":"https://schema.org/distribution","schema":"http://schema.org/","software":"https://www.w3.org/TR/prov-o/#wasGeneratedBy","softwareName":"https://dataverse.org/schema/citation/softwareName","softwareVersion":"https://dataverse.org/schema/citation/softwareVersion","subject":"http://purl.org/dc/terms/subject","title":"http://purl.org/dc/terms/title","topicClassValue":"https://dataverse.org/schema/citation/topicClassValue","topicClassVocab":"https://dataverse.org/schema/citation/topicClassVocab","topicClassVocabURI":"https://dataverse.org/schema/citation/topicClassVocabURI","topicClassification":"https://dataverse.org/schema/citation/topicClassification"}}