@inproceedings{ahonen-hyvonen-historical-texts-2009,
  OPTkey =       {},
  author =       {Eeva Ahonen and Eero Hyvönen},
  title =        {Publishing Historical Texts on the Semantic Web - A Case Study},
  booktitle = {Proceedings of the Third IEEE International Conference on Semantic Computing (ICSC2009) 
(forthcoming)},
  OPThowpublished = {},
  month =        {September},
  year =         {2009},
  note = {},
  location = {Berkeley, CA, USA},
  abstract = {Historical texts are an important component of cultural heritage, and are being digitized and 
published on the web in various portals for the researhers and the public. However, searching and linking them with 
related contents is challenging due the non-structured text form, digitization errors, and the differences and 
variations between old and modern language, including historical names (e.g. places), used for querying. This paper 
addresses these issues by presenting an approach and a system for publishing old texts on the semantic web. As a case 
study, an existing historical newspaper archive on the web is considered.
In our model, semantic metadata is added to the text using automated concept extraction methods. Search is 
implemented with semantic techniques, by creating a multi-faceted search interface for the text materials.  Problems 
due to OCR errors and spelling variants are addressed with a fuzzy string matching algorithm trying to guess 
corresponding words in a lexicon, and giving suggestions for corrected words forms. References between texts in the 
library as well as links between the library and external knowledge sources are formed by using shared ontologies for 
semantic annotations.},
  OPTannote =    {},
  OPTproject = {http://www.seco.tkk.fi/projects/sw20}
}

