我不记得确切的数字了,但是使用 Google 的基于REST的翻译 URL,我翻译了一组完整的句子,它们的总字符数少于(或等于)1024 个字符,因此一个大段落会导致多个翻译服务电话。
只是为了证明一点,我把它放在一起:) 它的边缘很粗糙,但它可以处理大量的文本,而且它的翻译准确性和谷歌一样好,因为它使用谷歌 API。我使用此代码处理了 Apple 的整个 2005 年 SEC 10-K 文件并单击一个按钮(大约需要 45 分钟)。
结果与一次将一个句子复制并粘贴到谷歌翻译中得到的结果基本相同。它并不完美(结束标点不准确,我没有逐行写入文本文件),但它确实显示了概念证明。如果您更多地使用 Regex,它可能会有更好的标点符号。
Imports System.IO
Imports System.Text.RegularExpressions
Public Class Form1
Dim file As New String("Translate Me.txt")
Dim lineCount As Integer = countLines()
Private Function countLines()
If IO.File.Exists(file) Then
Dim reader As New StreamReader(file)
Dim lineCount As Integer = Split(reader.ReadToEnd.Trim(), Environment.NewLine).Length
Return lineCount
MsgBox(file + " cannot be found anywhere!", 0, "Oops!")
End If
Return 1
End Function
Private Sub translateText()
Dim lineLoop As Integer = 0
Dim currentLine As String
Dim currentLineSplit() As String
Dim input1 As New StreamReader(file)
Dim input2 As New StreamReader(file)
Dim filePunctuation As Integer = 1
Dim linePunctuation As Integer = 1
Dim delimiters(3) As Char
delimiters(0) = "."
delimiters(1) = "!"
delimiters(2) = "?"
Dim entireFile As String
entireFile = (input1.ReadToEnd)
For i = 1 To Len(entireFile)
If Mid$(entireFile, i, 1) = "." Then filePunctuation += 1
For i = 1 To Len(entireFile)
If Mid$(entireFile, i, 1) = "!" Then filePunctuation += 1
For i = 1 To Len(entireFile)
If Mid$(entireFile, i, 1) = "?" Then filePunctuation += 1
Dim sentenceArraySize = filePunctuation + lineCount
Dim sentenceArrayCount = 0
Dim sentence(sentenceArraySize) As String
Dim sentenceLoop As Integer
While lineLoop < lineCount
linePunctuation = 1
currentLine = (input2.ReadLine)
For i = 1 To Len(currentLine)
If Mid$(currentLine, i, 1) = "." Then linePunctuation += 1
For i = 1 To Len(currentLine)
If Mid$(currentLine, i, 1) = "!" Then linePunctuation += 1
For i = 1 To Len(currentLine)
If Mid$(currentLine, i, 1) = "?" Then linePunctuation += 1
currentLineSplit = currentLine.Split(delimiters)
sentenceLoop = 0
While linePunctuation > 0
Dim trans As New Google.API.Translate.TranslateClient("")
sentence(sentenceArrayCount) = trans.Translate(currentLineSplit(sentenceLoop), Google.API.Translate.Language.English, Google.API.Translate.Language.German, Google.API.Translate.TranslateFormat.Text)
sentenceLoop += 1
linePunctuation -= 1
sentenceArrayCount += 1
Catch ex As Exception
sentenceLoop += 1
linePunctuation -= 1
End Try
End While
lineLoop += 1
End While
Dim newFile As New String("Translated Text.txt")
Dim outputLoopCount As Integer = 0
Using output As StreamWriter = New StreamWriter(newFile)
While outputLoopCount < sentenceArraySize
output.Write(sentence(outputLoopCount) + ". ")
outputLoopCount += 1
End While
End Using
End Sub
Private Sub translateButton_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles translateButton.Click
End Sub
End Class
使用MyGengo。他们有一个免费的机器翻译 API——我不知道质量如何,但你也可以付费插入人工翻译。
免责声明:虽然我确实发现标记化作为一种翻译手段是可疑的,但如后来ubiquibacon 所示,拆分句子可能会产生满足您要求的结果。
我建议可以通过将 30 多行字符串修改减少到他在另一个问题中要求的单行正则表达式来改进他的代码,但这个建议没有得到很好的接受。
这是在 VB.NET 和 C# 中使用Google API for .NET的实现。
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using Google.API.Translate;
namespace TokenizingTranslatorCS
internal class Program
private static readonly TranslateClient Client =
new TranslateClient("http://code.google.com/p/google-api-for-dotnet/");
private static void Main(string[] args)
Language originalLanguage = Language.English;
Language targetLanguage = Language.German;
string filename = args[0];
StringBuilder output = new StringBuilder();
string[] input = File.ReadAllLines(filename);
foreach (string line in input)
List<string> translatedSentences = new List<string>();
string[] sentences = Regex.Split(line, "\\b(?<sentence>.*?[\\.!?](?:\\s|$))");
foreach (string sentence in sentences)
string sentenceToTranslate = sentence.Trim();
if (!string.IsNullOrEmpty(sentenceToTranslate))
translatedSentences.Add(TranslateSentence(sentence, originalLanguage, targetLanguage));
output.AppendLine(string.Format("{0}{1}", string.Join(" ", translatedSentences.ToArray()),
Console.WriteLine("Translated:{0}{1}{0}", Environment.NewLine, string.Join(Environment.NewLine, input));
Console.WriteLine("To:{0}{1}{0}", Environment.NewLine, output);
Console.WriteLine("{0}Press any key{0}", Environment.NewLine);
private static string TranslateSentence(string sentence, Language originalLanguage, Language targetLanguage)
string translatedSentence = Client.Translate(sentence, originalLanguage, targetLanguage);
return translatedSentence;
Imports System.Text.RegularExpressions
Imports System.IO
Imports System.Text
Imports Google.API.Translate
Module Module1
Private Client As TranslateClient = New TranslateClient("http://code.google.com/p/google-api-for-dotnet/")
Sub Main(ByVal args As String())
Dim originalLanguage As Language = Language.English
Dim targetLanguage As Language = Language.German
Dim filename As String = args(0)
Dim output As New StringBuilder
Dim input As String() = File.ReadAllLines(filename)
For Each line As String In input
Dim translatedSentences As New List(Of String)
Dim sentences As String() = Regex.Split(line, "\b(?<sentence>.*?[\.!?](?:\s|$))")
For Each sentence As String In sentences
Dim sentenceToTranslate As String = sentence.Trim
If Not String.IsNullOrEmpty(sentenceToTranslate) Then
translatedSentences.Add(TranslateSentence(sentence, originalLanguage, targetLanguage))
End If
output.AppendLine(String.Format("{0}{1}", String.Join(" ", translatedSentences.ToArray), Environment.NewLine))
Console.WriteLine("Translated:{0}{1}{0}", Environment.NewLine, String.Join(Environment.NewLine, input))
Console.WriteLine("To:{0}{1}{0}", Environment.NewLine, output)
Console.WriteLine("{0}Press any key{0}", Environment.NewLine)
End Sub
Private Function TranslateSentence(ByVal sentence As String, ByVal originalLanguage As Language, ByVal targetLanguage As Language) As String
Dim translatedSentence As String = Client.Translate(sentence, originalLanguage, targetLanguage)
Return translatedSentence
End Function
End Module
只是为了证明一点,我把它放在一起:) 它的边缘很粗糙,但它可以处理大量的文本,而且它在翻译准确性方面与谷歌一样好,因为它使用谷歌 API。我使用此代码处理了 Apple 的整个 2005 年 SEC 10-K 文件并单击一个按钮(大约需要 45 分钟)。结果与一次将一个句子复制并粘贴到谷歌翻译器中得到的结果基本相同。它并不完美(结束标点不准确,我没有逐行写入文本文件),但它确实显示了概念证明。如果您更多地使用 Regex,它可能会有更好的标点符号。
Nur um zu beweisen einen Punkt warf ich dies zusammen Ich verarbeitet Apple's gesamte 2005 SEC 10-K Filing bei diesem Code und dem Klicken einer Taste(dauerte ca. 45 Minuten)。Das Ergebnis war im wesentlichen identisch zu dem, 是 Sie erhalten würden, wenn Sie kopiert und eingefügt einem Satz in einer Zeit, in Google Translator。Es ist nicht perfekt (Endung Interpunktion ist nicht korrekt und ich wollte nicht in die Textdatei Zeile für Zeile) schreiben,aber es zeigt 概念证明。Es hätte besser Satzzeichen, wenn Sie mit Regex arbeitete einige mehr。
我们使用了 http://www.berlitz.co.uk/translation/。
private String TranslateTextEnglishSpanish(String textToTranslate)
HttpWebRequest http = WebRequest.Create("http://translate.google.com/") as HttpWebRequest;
http.Method = "POST";
http.ContentType = "application/x-www-form-urlencoded";
http.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv: Gecko/20100316 Firefox/3.6.2 (.NET CLR 3.5.30729)";
http.Referer = "http://translate.google.com/";
byte[] dataBytes = UTF8Encoding.UTF8.GetBytes(String.Format("js=y&prev=_t&hl=en&ie=UTF-8&layout=1&eotf=1&text={0}+&file=&sl=en&tl=es", textToTranslate);
http.ContentLength = dataBytes.Length;
using (Stream postStream = http.GetRequestStream())
postStream.Write(dataBytes, 0, dataBytes.Length);
HttpWebResponse httpResponse = http.GetResponse() as HttpWebResponse;
if (httpResponse != null)
using (StreamReader reader = new StreamReader(httpResponse.GetResponseStream()))
//* Return translated Text
return reader.ReadToEnd();
return "";
String translateText = TranslateTextEnglishSpanish("hello world");
翻译文本 == "你好世界";
您可以使用 Firefox 的Live Http Headers 插件获得 thous 值。