1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
---|
2 | /* ***** BEGIN LICENSE BLOCK *****
|
---|
3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
---|
4 | *
|
---|
5 | * The contents of this file are subject to the Mozilla Public License Version
|
---|
6 | * 1.1 (the "License"); you may not use this file except in compliance with
|
---|
7 | * the License. You may obtain a copy of the License at
|
---|
8 | * http://www.mozilla.org/MPL/
|
---|
9 | *
|
---|
10 | * Software distributed under the License is distributed on an "AS IS" basis,
|
---|
11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
---|
12 | * for the specific language governing rights and limitations under the
|
---|
13 | * License.
|
---|
14 | *
|
---|
15 | * The Original Code is mozilla.org code.
|
---|
16 | *
|
---|
17 | * The Initial Developer of the Original Code is
|
---|
18 | * Netscape Communications Corporation.
|
---|
19 | * Portions created by the Initial Developer are Copyright (C) 1998
|
---|
20 | * the Initial Developer. All Rights Reserved.
|
---|
21 | *
|
---|
22 | * Contributor(s):
|
---|
23 | *
|
---|
24 | * Alternatively, the contents of this file may be used under the terms of
|
---|
25 | * either of the GNU General Public License Version 2 or later (the "GPL"),
|
---|
26 | * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
---|
27 | * in which case the provisions of the GPL or the LGPL are applicable instead
|
---|
28 | * of those above. If you wish to allow use of your version of this file only
|
---|
29 | * under the terms of either the GPL or the LGPL, and not to allow others to
|
---|
30 | * use your version of this file under the terms of the MPL, indicate your
|
---|
31 | * decision by deleting the provisions above and replace them with the notice
|
---|
32 | * and other provisions required by the GPL or the LGPL. If you do not delete
|
---|
33 | * the provisions above, a recipient may use your version of this file under
|
---|
34 | * the terms of any one of the MPL, the GPL or the LGPL.
|
---|
35 | *
|
---|
36 | * ***** END LICENSE BLOCK ***** */
|
---|
37 |
|
---|
38 |
|
---|
39 | #include "nsIUnicharInputStream.h"
|
---|
40 | #include "nsIByteBuffer.h"
|
---|
41 | #include "nsIUnicharBuffer.h"
|
---|
42 | #include "nsIServiceManager.h"
|
---|
43 | #include "nsString.h"
|
---|
44 | #include "nsCRT.h"
|
---|
45 | #include "nsUTF8Utils.h"
|
---|
46 | #include <fcntl.h>
|
---|
47 | #if defined(NS_WIN32)
|
---|
48 | #include <io.h>
|
---|
49 | #else
|
---|
50 | #include <unistd.h>
|
---|
51 | #endif
|
---|
52 |
|
---|
53 | class StringUnicharInputStream : public nsIUnicharInputStream {
|
---|
54 | public:
|
---|
55 | StringUnicharInputStream(nsString* aString);
|
---|
56 |
|
---|
57 | NS_DECL_ISUPPORTS
|
---|
58 |
|
---|
59 | NS_IMETHOD Read(PRUnichar* aBuf,
|
---|
60 | PRUint32 aCount,
|
---|
61 | PRUint32 *aReadCount);
|
---|
62 | NS_IMETHOD ReadSegments(nsWriteUnicharSegmentFun aWriter,
|
---|
63 | void* aClosure,
|
---|
64 | PRUint32 aCount, PRUint32* aReadCount);
|
---|
65 | NS_IMETHOD Close();
|
---|
66 |
|
---|
67 | nsString* mString;
|
---|
68 | PRUint32 mPos;
|
---|
69 | PRUint32 mLen;
|
---|
70 |
|
---|
71 | private:
|
---|
72 | ~StringUnicharInputStream();
|
---|
73 | };
|
---|
74 |
|
---|
75 | StringUnicharInputStream::StringUnicharInputStream(nsString* aString)
|
---|
76 | {
|
---|
77 | mString = aString;
|
---|
78 | mPos = 0;
|
---|
79 | mLen = aString->Length();
|
---|
80 | }
|
---|
81 |
|
---|
82 | StringUnicharInputStream::~StringUnicharInputStream()
|
---|
83 | {
|
---|
84 | if (nsnull != mString) {
|
---|
85 | delete mString;
|
---|
86 | }
|
---|
87 | }
|
---|
88 |
|
---|
89 | NS_IMETHODIMP
|
---|
90 | StringUnicharInputStream::Read(PRUnichar* aBuf,
|
---|
91 | PRUint32 aCount,
|
---|
92 | PRUint32 *aReadCount)
|
---|
93 | {
|
---|
94 | if (mPos >= mLen) {
|
---|
95 | *aReadCount = 0;
|
---|
96 | return NS_OK;
|
---|
97 | }
|
---|
98 | const PRUnichar* us = mString->get();
|
---|
99 | NS_ASSERTION(mLen >= mPos, "unsigned madness");
|
---|
100 | PRUint32 amount = mLen - mPos;
|
---|
101 | if (amount > aCount) {
|
---|
102 | amount = aCount;
|
---|
103 | }
|
---|
104 | memcpy(aBuf, us + mPos, sizeof(PRUnichar) * amount);
|
---|
105 | mPos += amount;
|
---|
106 | *aReadCount = amount;
|
---|
107 | return NS_OK;
|
---|
108 | }
|
---|
109 |
|
---|
110 | NS_IMETHODIMP
|
---|
111 | StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
|
---|
112 | void* aClosure,
|
---|
113 | PRUint32 aCount, PRUint32 *aReadCount)
|
---|
114 | {
|
---|
115 | PRUint32 bytesWritten;
|
---|
116 | PRUint32 totalBytesWritten = 0;
|
---|
117 |
|
---|
118 | nsresult rv;
|
---|
119 | aCount = PR_MIN(mString->Length() - mPos, aCount);
|
---|
120 |
|
---|
121 | while (aCount) {
|
---|
122 | rv = aWriter(this, aClosure, mString->get() + mPos,
|
---|
123 | totalBytesWritten, aCount, &bytesWritten);
|
---|
124 |
|
---|
125 | if (NS_FAILED(rv)) {
|
---|
126 | // don't propagate errors to the caller
|
---|
127 | break;
|
---|
128 | }
|
---|
129 |
|
---|
130 | aCount -= bytesWritten;
|
---|
131 | totalBytesWritten += bytesWritten;
|
---|
132 | mPos += bytesWritten;
|
---|
133 | }
|
---|
134 |
|
---|
135 | *aReadCount = totalBytesWritten;
|
---|
136 |
|
---|
137 | return NS_OK;
|
---|
138 | }
|
---|
139 |
|
---|
140 | nsresult StringUnicharInputStream::Close()
|
---|
141 | {
|
---|
142 | mPos = mLen;
|
---|
143 | if (nsnull != mString) {
|
---|
144 | delete mString;
|
---|
145 | mString = 0;
|
---|
146 | }
|
---|
147 | return NS_OK;
|
---|
148 | }
|
---|
149 |
|
---|
150 | NS_IMPL_ISUPPORTS1(StringUnicharInputStream, nsIUnicharInputStream)
|
---|
151 |
|
---|
152 | NS_COM nsresult
|
---|
153 | NS_NewStringUnicharInputStream(nsIUnicharInputStream** aInstancePtrResult,
|
---|
154 | nsString* aString)
|
---|
155 | {
|
---|
156 | NS_PRECONDITION(nsnull != aString, "null ptr");
|
---|
157 | NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr");
|
---|
158 | if ((nsnull == aString) || (nsnull == aInstancePtrResult)) {
|
---|
159 | return NS_ERROR_NULL_POINTER;
|
---|
160 | }
|
---|
161 |
|
---|
162 | StringUnicharInputStream* it = new StringUnicharInputStream(aString);
|
---|
163 | if (nsnull == it) {
|
---|
164 | return NS_ERROR_OUT_OF_MEMORY;
|
---|
165 | }
|
---|
166 |
|
---|
167 | return it->QueryInterface(NS_GET_IID(nsIUnicharInputStream),
|
---|
168 | (void**) aInstancePtrResult);
|
---|
169 | }
|
---|
170 |
|
---|
171 | //----------------------------------------------------------------------
|
---|
172 |
|
---|
173 | class UTF8InputStream : public nsIUnicharInputStream {
|
---|
174 | public:
|
---|
175 | UTF8InputStream();
|
---|
176 | nsresult Init(nsIInputStream* aStream, PRUint32 aBufSize);
|
---|
177 |
|
---|
178 | NS_DECL_ISUPPORTS
|
---|
179 | NS_IMETHOD Read(PRUnichar* aBuf,
|
---|
180 | PRUint32 aCount,
|
---|
181 | PRUint32 *aReadCount);
|
---|
182 | NS_IMETHOD ReadSegments(nsWriteUnicharSegmentFun aWriter,
|
---|
183 | void* aClosure,
|
---|
184 | PRUint32 aCount,
|
---|
185 | PRUint32 *aReadCount);
|
---|
186 | NS_IMETHOD Close();
|
---|
187 |
|
---|
188 | private:
|
---|
189 | ~UTF8InputStream();
|
---|
190 |
|
---|
191 | protected:
|
---|
192 | PRInt32 Fill(nsresult * aErrorCode);
|
---|
193 |
|
---|
194 | static void CountValidUTF8Bytes(const char *aBuf, PRUint32 aMaxBytes, PRUint32& aValidUTF8bytes, PRUint32& aValidUCS2bytes);
|
---|
195 |
|
---|
196 | nsCOMPtr<nsIInputStream> mInput;
|
---|
197 | nsCOMPtr<nsIByteBuffer> mByteData;
|
---|
198 | nsCOMPtr<nsIUnicharBuffer> mUnicharData;
|
---|
199 |
|
---|
200 | PRUint32 mByteDataOffset;
|
---|
201 | PRUint32 mUnicharDataOffset;
|
---|
202 | PRUint32 mUnicharDataLength;
|
---|
203 | };
|
---|
204 |
|
---|
205 | UTF8InputStream::UTF8InputStream() :
|
---|
206 | mByteDataOffset(0),
|
---|
207 | mUnicharDataOffset(0),
|
---|
208 | mUnicharDataLength(0)
|
---|
209 | {
|
---|
210 | }
|
---|
211 |
|
---|
212 | nsresult
|
---|
213 | UTF8InputStream::Init(nsIInputStream* aStream, PRUint32 aBufferSize)
|
---|
214 | {
|
---|
215 | if (aBufferSize == 0) {
|
---|
216 | aBufferSize = 8192;
|
---|
217 | }
|
---|
218 |
|
---|
219 | nsresult rv = NS_NewByteBuffer(getter_AddRefs(mByteData), nsnull, aBufferSize);
|
---|
220 | if (NS_FAILED(rv)) return rv;
|
---|
221 | rv = NS_NewUnicharBuffer(getter_AddRefs(mUnicharData), nsnull, aBufferSize);
|
---|
222 | if (NS_FAILED(rv)) return rv;
|
---|
223 |
|
---|
224 | mInput = aStream;
|
---|
225 |
|
---|
226 | return NS_OK;
|
---|
227 | }
|
---|
228 |
|
---|
229 | NS_IMPL_ISUPPORTS1(UTF8InputStream,nsIUnicharInputStream)
|
---|
230 |
|
---|
231 | UTF8InputStream::~UTF8InputStream()
|
---|
232 | {
|
---|
233 | Close();
|
---|
234 | }
|
---|
235 |
|
---|
236 | nsresult UTF8InputStream::Close()
|
---|
237 | {
|
---|
238 | mInput = nsnull;
|
---|
239 | mByteData = nsnull;
|
---|
240 | mUnicharData = nsnull;
|
---|
241 |
|
---|
242 | return NS_OK;
|
---|
243 | }
|
---|
244 |
|
---|
245 | nsresult UTF8InputStream::Read(PRUnichar* aBuf,
|
---|
246 | PRUint32 aCount,
|
---|
247 | PRUint32 *aReadCount)
|
---|
248 | {
|
---|
249 | NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
|
---|
250 | PRUint32 rv = mUnicharDataLength - mUnicharDataOffset;
|
---|
251 | nsresult errorCode;
|
---|
252 | if (0 == rv) {
|
---|
253 | // Fill the unichar buffer
|
---|
254 | rv = Fill(&errorCode);
|
---|
255 | if (rv <= 0) {
|
---|
256 | *aReadCount = 0;
|
---|
257 | return errorCode;
|
---|
258 | }
|
---|
259 | }
|
---|
260 | if (rv > aCount) {
|
---|
261 | rv = aCount;
|
---|
262 | }
|
---|
263 | memcpy(aBuf, mUnicharData->GetBuffer() + mUnicharDataOffset,
|
---|
264 | rv * sizeof(PRUnichar));
|
---|
265 | mUnicharDataOffset += rv;
|
---|
266 | *aReadCount = rv;
|
---|
267 | return NS_OK;
|
---|
268 | }
|
---|
269 |
|
---|
270 | NS_IMETHODIMP
|
---|
271 | UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
|
---|
272 | void* aClosure,
|
---|
273 | PRUint32 aCount, PRUint32 *aReadCount)
|
---|
274 | {
|
---|
275 | NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
|
---|
276 | PRUint32 bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
|
---|
277 | nsresult rv = NS_OK;
|
---|
278 | if (0 == bytesToWrite) {
|
---|
279 | // Fill the unichar buffer
|
---|
280 | bytesToWrite = Fill(&rv);
|
---|
281 | if (bytesToWrite <= 0) {
|
---|
282 | *aReadCount = 0;
|
---|
283 | return rv;
|
---|
284 | }
|
---|
285 | }
|
---|
286 |
|
---|
287 | if (bytesToWrite > aCount)
|
---|
288 | bytesToWrite = aCount;
|
---|
289 |
|
---|
290 | PRUint32 bytesWritten;
|
---|
291 | PRUint32 totalBytesWritten = 0;
|
---|
292 |
|
---|
293 | while (bytesToWrite) {
|
---|
294 | rv = aWriter(this, aClosure,
|
---|
295 | mUnicharData->GetBuffer() + mUnicharDataOffset,
|
---|
296 | totalBytesWritten, bytesToWrite, &bytesWritten);
|
---|
297 |
|
---|
298 | if (NS_FAILED(rv)) {
|
---|
299 | // don't propagate errors to the caller
|
---|
300 | break;
|
---|
301 | }
|
---|
302 |
|
---|
303 | bytesToWrite -= bytesWritten;
|
---|
304 | totalBytesWritten += bytesWritten;
|
---|
305 | mUnicharDataOffset += bytesWritten;
|
---|
306 | }
|
---|
307 |
|
---|
308 | *aReadCount = totalBytesWritten;
|
---|
309 |
|
---|
310 | return NS_OK;
|
---|
311 | }
|
---|
312 |
|
---|
313 | PRInt32 UTF8InputStream::Fill(nsresult * aErrorCode)
|
---|
314 | {
|
---|
315 | if (nsnull == mInput) {
|
---|
316 | // We already closed the stream!
|
---|
317 | *aErrorCode = NS_BASE_STREAM_CLOSED;
|
---|
318 | return -1;
|
---|
319 | }
|
---|
320 |
|
---|
321 | NS_ASSERTION(mByteData->GetLength() >= mByteDataOffset, "unsigned madness");
|
---|
322 | PRUint32 remainder = mByteData->GetLength() - mByteDataOffset;
|
---|
323 | mByteDataOffset = remainder;
|
---|
324 | PRInt32 nb = mByteData->Fill(aErrorCode, mInput, remainder);
|
---|
325 | if (nb <= 0) {
|
---|
326 | // Because we assume a many to one conversion, the lingering data
|
---|
327 | // in the byte buffer must be a partial conversion
|
---|
328 | // fragment. Because we know that we have recieved no more new
|
---|
329 | // data to add to it, we can't convert it. Therefore, we discard
|
---|
330 | // it.
|
---|
331 | return nb;
|
---|
332 | }
|
---|
333 | NS_ASSERTION(remainder + nb == mByteData->GetLength(), "bad nb");
|
---|
334 |
|
---|
335 | // Now convert as much of the byte buffer to unicode as possible
|
---|
336 | PRUint32 srcLen, dstLen;
|
---|
337 | CountValidUTF8Bytes(mByteData->GetBuffer(),remainder + nb, srcLen, dstLen);
|
---|
338 |
|
---|
339 | // the number of UCS2 characters should always be <= the number of
|
---|
340 | // UTF8 chars
|
---|
341 | NS_ASSERTION( (remainder+nb >= srcLen), "cannot be longer than out buffer");
|
---|
342 | NS_ASSERTION(PRInt32(dstLen) <= mUnicharData->GetBufferSize(),
|
---|
343 | "Ouch. I would overflow my buffer if I wasn't so careful.");
|
---|
344 | if (PRInt32(dstLen) > mUnicharData->GetBufferSize()) return 0;
|
---|
345 |
|
---|
346 | ConvertUTF8toUTF16 converter(mUnicharData->GetBuffer());
|
---|
347 |
|
---|
348 | nsASingleFragmentCString::const_char_iterator start = mByteData->GetBuffer();
|
---|
349 | nsASingleFragmentCString::const_char_iterator end = mByteData->GetBuffer() + srcLen;
|
---|
350 |
|
---|
351 | copy_string(start, end, converter);
|
---|
352 | NS_ASSERTION(converter.Length() == dstLen, "length mismatch");
|
---|
353 |
|
---|
354 | mUnicharDataOffset = 0;
|
---|
355 | mUnicharDataLength = dstLen;
|
---|
356 | mByteDataOffset = srcLen;
|
---|
357 |
|
---|
358 | return dstLen;
|
---|
359 | }
|
---|
360 |
|
---|
361 | void
|
---|
362 | UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, PRUint32 aMaxBytes, PRUint32& aValidUTF8bytes, PRUint32& aValidUCS2chars)
|
---|
363 | {
|
---|
364 | const char *c = aBuffer;
|
---|
365 | const char *end = aBuffer + aMaxBytes;
|
---|
366 | const char *lastchar = c; // pre-initialize in case of 0-length buffer
|
---|
367 | PRUint32 ucs2bytes = 0;
|
---|
368 | while (c < end && *c) {
|
---|
369 | lastchar = c;
|
---|
370 | ucs2bytes++;
|
---|
371 |
|
---|
372 | if (UTF8traits::isASCII(*c))
|
---|
373 | c++;
|
---|
374 | else if (UTF8traits::is2byte(*c))
|
---|
375 | c += 2;
|
---|
376 | else if (UTF8traits::is3byte(*c))
|
---|
377 | c += 3;
|
---|
378 | else if (UTF8traits::is4byte(*c))
|
---|
379 | c += 4;
|
---|
380 | else if (UTF8traits::is5byte(*c))
|
---|
381 | c += 5;
|
---|
382 | else if (UTF8traits::is6byte(*c))
|
---|
383 | c += 6;
|
---|
384 | else {
|
---|
385 | NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()");
|
---|
386 | break; // Otherwise we go into an infinite loop. But what happens now?
|
---|
387 | }
|
---|
388 | }
|
---|
389 | if (c > end) {
|
---|
390 | c = lastchar;
|
---|
391 | ucs2bytes--;
|
---|
392 | }
|
---|
393 |
|
---|
394 | aValidUTF8bytes = c - aBuffer;
|
---|
395 | aValidUCS2chars = ucs2bytes;
|
---|
396 | }
|
---|
397 |
|
---|
398 | NS_COM nsresult
|
---|
399 | NS_NewUTF8ConverterStream(nsIUnicharInputStream** aInstancePtrResult,
|
---|
400 | nsIInputStream* aStreamToWrap,
|
---|
401 | PRInt32 aBufferSize)
|
---|
402 | {
|
---|
403 | // Create converter input stream
|
---|
404 | UTF8InputStream* it = new UTF8InputStream();
|
---|
405 | if (nsnull == it) {
|
---|
406 | return NS_ERROR_OUT_OF_MEMORY;
|
---|
407 | }
|
---|
408 |
|
---|
409 | nsresult rv = it->Init(aStreamToWrap, aBufferSize);
|
---|
410 | if (NS_FAILED(rv))
|
---|
411 | return rv;
|
---|
412 |
|
---|
413 | return it->QueryInterface(NS_GET_IID(nsIUnicharInputStream),
|
---|
414 | (void **) aInstancePtrResult);
|
---|
415 | }
|
---|