diff --git a/src/main/scala/app/RepositoryViewerController.scala b/src/main/scala/app/RepositoryViewerController.scala index 834e133..df7fd8a 100644 --- a/src/main/scala/app/RepositoryViewerController.scala +++ b/src/main/scala/app/RepositoryViewerController.scala @@ -2,7 +2,7 @@ import util.Directory._ import util.Implicits._ -import _root_.util.{ReferrerAuthenticator, JGitUtil, FileUtil} +import _root_.util.{ReferrerAuthenticator, JGitUtil, FileUtil, StringUtil} import service._ import org.scalatra._ import java.io.File @@ -106,7 +106,7 @@ val content = if(viewer == "other"){ if(bytes.isDefined && FileUtil.isText(bytes.get)){ // text - JGitUtil.ContentInfo("text", bytes.map(new String(_, "UTF-8"))) + JGitUtil.ContentInfo("text", bytes.map(StringUtil.convertFromByteArray)) } else { // binary JGitUtil.ContentInfo("binary", None) @@ -243,7 +243,7 @@ val files = JGitUtil.getFileList(git, revision, path) // process README.md val readme = files.find(_.name == "README.md").map { file => - new String(JGitUtil.getContent(Git.open(getRepositoryDir(repository.owner, repository.name)), file.id, true).get, "UTF-8") + StringUtil.convertFromByteArray(JGitUtil.getContent(Git.open(getRepositoryDir(repository.owner, repository.name)), file.id, true).get) } repo.html.files(revision, repository, diff --git a/src/main/scala/service/RepositorySearchService.scala b/src/main/scala/service/RepositorySearchService.scala index 1cc3333..4c80441 100644 --- a/src/main/scala/service/RepositorySearchService.scala +++ b/src/main/scala/service/RepositorySearchService.scala @@ -64,7 +64,7 @@ if(treeWalk.getFileMode(0) != FileMode.TREE){ JGitUtil.getContent(git, treeWalk.getObjectId(0), false).foreach { bytes => if(FileUtil.isText(bytes)){ - val text = new String(bytes, "UTF-8") + val text = StringUtil.convertFromByteArray(bytes) val lowerText = text.toLowerCase val indices = keywords.map(lowerText.indexOf _) if(!indices.exists(_ < 0)){ diff --git a/src/main/scala/util/JGitUtil.scala b/src/main/scala/util/JGitUtil.scala index 0b042ca..9114364 100644 --- a/src/main/scala/util/JGitUtil.scala +++ b/src/main/scala/util/JGitUtil.scala @@ -2,6 +2,7 @@ import org.eclipse.jgit.api.Git import util.Directory._ +import util.StringUtil._ import scala.collection.JavaConverters._ import org.eclipse.jgit.lib._ import org.eclipse.jgit.revwalk._ @@ -414,7 +415,7 @@ DiffInfo(ChangeType.ADD, null, walk.getPathString, None, None) } else { DiffInfo(ChangeType.ADD, null, walk.getPathString, None, - JGitUtil.getContent(git, walk.getObjectId(0), false).filter(FileUtil.isText).map(new String(_, "UTF-8"))) + JGitUtil.getContent(git, walk.getObjectId(0), false).filter(FileUtil.isText).map(convertFromByteArray)) })) } walk.release @@ -436,8 +437,8 @@ DiffInfo(diff.getChangeType, diff.getOldPath, diff.getNewPath, None, None) } else { DiffInfo(diff.getChangeType, diff.getOldPath, diff.getNewPath, - JGitUtil.getContent(git, diff.getOldId.toObjectId, false).filter(FileUtil.isText).map(new String(_, "UTF-8")), - JGitUtil.getContent(git, diff.getNewId.toObjectId, false).filter(FileUtil.isText).map(new String(_, "UTF-8"))) + JGitUtil.getContent(git, diff.getOldId.toObjectId, false).filter(FileUtil.isText).map(convertFromByteArray), + JGitUtil.getContent(git, diff.getNewId.toObjectId, false).filter(FileUtil.isText).map(convertFromByteArray)) } }.toList } diff --git a/src/main/scala/util/StringUtil.scala b/src/main/scala/util/StringUtil.scala index d8ab997..3484b83 100644 --- a/src/main/scala/util/StringUtil.scala +++ b/src/main/scala/util/StringUtil.scala @@ -1,6 +1,7 @@ package util import java.net.{URLDecoder, URLEncoder} +import org.mozilla.universalchardet.UniversalDetector object StringUtil { @@ -25,4 +26,15 @@ def escapeHtml(value: String): String = value.replace("&", "&").replace("<", "<").replace(">", ">").replace("\"", """) + def convertFromByteArray(content: Array[Byte]): String = new String(content, detectEncoding(content)) + + def detectEncoding(content: Array[Byte]): String = { + val detector = new UniversalDetector(null) + detector.handleData(content, 0, content.length) + detector.dataEnd() + detector.getDetectedCharset match { + case null => "UTF-8" + case e => e + } + } }