- De
- En
@article{2_116509, author = {Triet Ho Anh Doan and Zeki Mustafa Doğan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/116509}, month = {01}, title = {OLA-HD – Ein OCR-D-Langzeitarchiv für historische Drucke}, type = {article}, year = {2020}, }
@misc{2_121682, abstract = {"Bereits seit einigen Jahren werden große Anstrengungen unternommen, um die im deutschen Sprachraum erschienenen Drucke des 16.-18. Jahrhunderts zu erfassen und zu digitalisieren. Deren Volltexttransformation konzeptionell und technisch vorzubereiten, ist das übergeordnete Ziel des DFG-Projekts OCR-D, das sich mit der Weiterentwicklung von Verfahren der Optical Character Recognition befasst. Der Beitrag beschreibt den aktuellen Entwicklungsstand der OCR-D-Software und analysiert deren erste Teststellung in ausgewählten Bibliotheken."}, author = {Konstantin Baierer and Matthias Boenig and Elisabeth Engl and Clemens Neudecker and Reinhard Altenhöner and Alexander Geyken and Johannes Mangei and Rainer Stotzka and Andreas Dengel and Martin Jenckel and Alexander Gehrke and Frank Puppe and Stefan Weil and Robert Sachunsky and Lena K. Schiffer and Maciej Janicki and Gerhard Heyer and Florian Fink and Klaus U. Schulz and Nikolaus Weichselbaumer and Saskia Limbach and Mathias Seuret and Rui Dong and Manuel Burghardt and Vincent Christlein and Triet Ho Anh Doan and Zeki Mustafa Dogan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121682}, month = {01}, title = {OCR-D kompakt: Ergebnisse und Stand der Forschung in der Förderinitiative}, type = {misc}, url = {https://publications.goettingen-research-online.de/handle/2/116509}, year = {2020}, }
Thema | Professor*in | Typ |
---|---|---|
Vergleich der Leistung von Remote-Visualisierungstechniken | Prof. Julian Kunkel | BSc, MSc |
Empfehlungssystem für die Leistungsüberwachung und -analyse im HPC | Prof. Julian Kunkel | BSc, MSc |
@article{2_133248, author = {Hendrik Nolte and Nicolai Spicher and Andrew Russel and Tim Ehlers and Sebastian Krey and Dagmar Krefting and Julian Kunkel}, doi = {10.1016/j.future.2022.12.019}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/133248}, month = {01}, title = {Secure HPC: A workflow providing a secure partition on an HPC system}, type = {article}, year = {2023}, }
@article{2_129372, abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."}, author = {Hendrik Nolte and Philipp Wieder}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129372}, month = {01}, title = {Toward data lakes as central building blocks for data management and analysis}, type = {article}, url = {https://publications.goettingen-research-online.de/handle/2/114449}, year = {2022}, }
@article{2_114449, abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."}, author = {Philipp Wieder and Hendrik Nolte}, doi = {10.3389/fdata.2022.945720}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/114449}, month = {01}, title = {Toward data lakes as central building blocks for data management and analysis}, type = {article}, year = {2022}, }
@article{2_129373, author = {Hendrik Nolte and Philipp Wieder}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129373}, month = {01}, title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes}, type = {article}, url = {https://publications.goettingen-research-online.de/handle/2/121151}, year = {2022}, }
@article{2_121151, author = {Hendrik Nolte and Philipp Wieder}, doi = {10.1162/dint_a_00141}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121151}, month = {01}, title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes}, type = {article}, year = {2022}, }
Thema | Professor*in | Typ |
---|---|---|
Entwicklung einer Provenance aware ad-hoc Schnittstelle für einen Data Lake | Prof. Julian Kunkel | BSc, MSc |
Semantische Klassifizierung von Metadatenattributen in einem Data Lake durch maschinelles Lernen | Prof. Julian Kunkel | BSc, MSc |
Governance für einen Data Lake | Prof. Julian Kunkel | BSc, MSc |
Triet Ho Anh Doan
Triet Ho Anh DoanLukas Friedrich
Lukas FriedrichAzat Khuziyakhmetov
Azat KhuziyakhmetovHendrik Nolte
Hendrik Nolte