1 | {
|
---|
2 | This file is part of the Free Pascal run time library.
|
---|
3 | Copyright (c) 2005 by Florian Klaempfl,
|
---|
4 | member of the Free Pascal development team.
|
---|
5 |
|
---|
6 | libc based wide string support
|
---|
7 |
|
---|
8 | See the file COPYING.FPC, included in this distribution,
|
---|
9 | for details about the copyright.
|
---|
10 |
|
---|
11 | This program is distributed in the hope that it will be useful,
|
---|
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
---|
14 | **********************************************************************}
|
---|
15 |
|
---|
16 | {$mode objfpc}
|
---|
17 | {$inline on}
|
---|
18 |
|
---|
19 | unit cwstring;
|
---|
20 |
|
---|
21 | interface
|
---|
22 |
|
---|
23 | procedure SetCWidestringManager;
|
---|
24 |
|
---|
25 | implementation
|
---|
26 |
|
---|
27 | {$linklib c}
|
---|
28 |
|
---|
29 | {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
|
---|
30 | {$linklib iconv}
|
---|
31 | {$define useiconv}
|
---|
32 | {$endif linux}
|
---|
33 |
|
---|
34 | Uses
|
---|
35 | BaseUnix,
|
---|
36 | ctypes,
|
---|
37 | unix,
|
---|
38 | unixtype,
|
---|
39 | initc;
|
---|
40 |
|
---|
41 | Const
|
---|
42 | {$ifndef useiconv}
|
---|
43 | libiconvname='c'; // is in libc under Linux.
|
---|
44 | {$else}
|
---|
45 | libiconvname='iconv';
|
---|
46 | {$endif}
|
---|
47 |
|
---|
48 | { helper functions from libc }
|
---|
49 | function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
|
---|
50 | function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
|
---|
51 |
|
---|
52 | function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
|
---|
53 | function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
|
---|
54 | function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
|
---|
55 | {$ifndef beos}
|
---|
56 | function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
|
---|
57 | function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
|
---|
58 | function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
|
---|
59 | {$else beos}
|
---|
60 | function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
|
---|
61 | function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
|
---|
62 | function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
|
---|
63 | {$endif beos}
|
---|
64 |
|
---|
65 |
|
---|
66 | const
|
---|
67 | {$ifdef linux}
|
---|
68 | __LC_CTYPE = 0;
|
---|
69 | LC_ALL = 6;
|
---|
70 | _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
|
---|
71 | _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
|
---|
72 | CODESET = _NL_CTYPE_CODESET_NAME;
|
---|
73 | {$else linux}
|
---|
74 | {$ifdef darwin}
|
---|
75 | CODESET = 0;
|
---|
76 | LC_ALL = 0;
|
---|
77 | {$else darwin}
|
---|
78 | {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
|
---|
79 | __LC_CTYPE = 0;
|
---|
80 | LC_ALL = 0;
|
---|
81 | _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
|
---|
82 | _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
|
---|
83 | CODESET = 0; // _NL_CTYPE_CODESET_NAME;
|
---|
84 | {$else freebsd}
|
---|
85 | {$ifdef solaris}
|
---|
86 | CODESET=49;
|
---|
87 | LC_ALL = 6;
|
---|
88 | {$else solaris}
|
---|
89 | {$ifdef beos}
|
---|
90 | {$warning check correct value for BeOS}
|
---|
91 | CODESET=49;
|
---|
92 | LC_ALL = 6; // Checked for BeOS, but 0 under Haiku...
|
---|
93 | ESysEILSEQ = EILSEQ;
|
---|
94 | {$else}
|
---|
95 | {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
|
---|
96 | // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
|
---|
97 | {$endif beos}
|
---|
98 | {$endif solaris}
|
---|
99 | {$endif FreeBSD}
|
---|
100 | {$endif darwin}
|
---|
101 | {$endif linux}
|
---|
102 |
|
---|
103 | { unicode encoding name }
|
---|
104 | {$ifdef FPC_LITTLE_ENDIAN}
|
---|
105 | unicode_encoding2 = 'UTF-16LE';
|
---|
106 | unicode_encoding4 = 'UCS-4LE';
|
---|
107 | {$else FPC_LITTLE_ENDIAN}
|
---|
108 | unicode_encoding2 = 'UTF-16BE';
|
---|
109 | unicode_encoding4 = 'UCS-4BE';
|
---|
110 | {$endif FPC_LITTLE_ENDIAN}
|
---|
111 |
|
---|
112 | { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
|
---|
113 | { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
|
---|
114 | { that's a libc macro mapped to internal functions/variables }
|
---|
115 | { and thus not a stable external API on systems where libc }
|
---|
116 | { breaks backwards compatibility every now and then }
|
---|
117 | MB_CUR_MAX = 10;
|
---|
118 |
|
---|
119 | type
|
---|
120 | piconv_t = ^iconv_t;
|
---|
121 | iconv_t = pointer;
|
---|
122 | nl_item = cint;
|
---|
123 | {$ifndef beos}
|
---|
124 | function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
|
---|
125 | {$endif}
|
---|
126 |
|
---|
127 | {$if (not defined(bsd) and not defined(beos)) or defined(darwin)}
|
---|
128 | function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
|
---|
129 | function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
|
---|
130 | function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
|
---|
131 | {$else}
|
---|
132 | function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
|
---|
133 | function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
|
---|
134 | function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
|
---|
135 | {$endif}
|
---|
136 |
|
---|
137 | procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
|
---|
138 |
|
---|
139 |
|
---|
140 | threadvar
|
---|
141 | iconv_ansi2wide,
|
---|
142 | iconv_wide2ansi : iconv_t;
|
---|
143 |
|
---|
144 | {$ifdef beos}
|
---|
145 | function nl_langinfo(__item:nl_item):pchar;
|
---|
146 | begin
|
---|
147 | {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
|
---|
148 | // Now implement the minimum required to correctly initialize WideString support
|
---|
149 | case __item of
|
---|
150 | CODESET : Result := 'UTF-8'; // BeOS use UTF-8
|
---|
151 | else
|
---|
152 | begin
|
---|
153 | Assert(False, 'nl_langinfo was called with an unknown nl_item value');
|
---|
154 | Result := '';
|
---|
155 | end;
|
---|
156 | end;
|
---|
157 | end;
|
---|
158 | {$endif}
|
---|
159 |
|
---|
160 | procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
|
---|
161 | var
|
---|
162 | outlength,
|
---|
163 | outoffset,
|
---|
164 | srclen,
|
---|
165 | outleft : size_t;
|
---|
166 | srcpos : pwidechar;
|
---|
167 | destpos: pchar;
|
---|
168 | mynil : pchar;
|
---|
169 | my0 : size_t;
|
---|
170 | err: cint;
|
---|
171 | begin
|
---|
172 | mynil:=nil;
|
---|
173 | my0:=0;
|
---|
174 | { rought estimation }
|
---|
175 | setlength(dest,len*3);
|
---|
176 | outlength:=len*3;
|
---|
177 | srclen:=len*2;
|
---|
178 | srcpos:=source;
|
---|
179 | destpos:=pchar(dest);
|
---|
180 | outleft:=outlength;
|
---|
181 | while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
|
---|
182 | begin
|
---|
183 | err:=fpgetCerrno;
|
---|
184 | case err of
|
---|
185 | { last character is incomplete sequence }
|
---|
186 | ESysEINVAL,
|
---|
187 | { incomplete sequence in the middle }
|
---|
188 | ESysEILSEQ:
|
---|
189 | begin
|
---|
190 | { skip and set to '?' }
|
---|
191 | inc(srcpos);
|
---|
192 | dec(srclen,2);
|
---|
193 | destpos^:='?';
|
---|
194 | inc(destpos);
|
---|
195 | dec(outleft);
|
---|
196 | { reset }
|
---|
197 | iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
|
---|
198 | if err=ESysEINVAL then
|
---|
199 | break;
|
---|
200 | end;
|
---|
201 | ESysE2BIG:
|
---|
202 | begin
|
---|
203 | outoffset:=destpos-pchar(dest);
|
---|
204 | { extend }
|
---|
205 | setlength(dest,outlength+len*3);
|
---|
206 | inc(outleft,len*3);
|
---|
207 | inc(outlength,len*3);
|
---|
208 | { string could have been moved }
|
---|
209 | destpos:=pchar(dest)+outoffset;
|
---|
210 | end;
|
---|
211 | else
|
---|
212 | runerror(231);
|
---|
213 | end;
|
---|
214 | end;
|
---|
215 | // truncate string
|
---|
216 | setlength(dest,length(dest)-outleft);
|
---|
217 | end;
|
---|
218 |
|
---|
219 |
|
---|
220 | procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
|
---|
221 | var
|
---|
222 | outlength,
|
---|
223 | outoffset,
|
---|
224 | outleft : size_t;
|
---|
225 | srcpos,
|
---|
226 | destpos: pchar;
|
---|
227 | mynil : pchar;
|
---|
228 | my0 : size_t;
|
---|
229 | err: cint;
|
---|
230 | begin
|
---|
231 | mynil:=nil;
|
---|
232 | my0:=0;
|
---|
233 | // extra space
|
---|
234 | outlength:=len+1;
|
---|
235 | setlength(dest,outlength);
|
---|
236 | srcpos:=source;
|
---|
237 | destpos:=pchar(dest);
|
---|
238 | outleft:=outlength*2;
|
---|
239 | while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
|
---|
240 | begin
|
---|
241 | err:=fpgetCerrno;
|
---|
242 | case err of
|
---|
243 | ESysEINVAL,
|
---|
244 | ESysEILSEQ:
|
---|
245 | begin
|
---|
246 | { skip and set to '?' }
|
---|
247 | inc(srcpos);
|
---|
248 | dec(len);
|
---|
249 | pwidechar(destpos)^:='?';
|
---|
250 | inc(destpos,2);
|
---|
251 | dec(outleft,2);
|
---|
252 | { reset }
|
---|
253 | iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
|
---|
254 | if err=ESysEINVAL then
|
---|
255 | break;
|
---|
256 | end;
|
---|
257 | ESysE2BIG:
|
---|
258 | begin
|
---|
259 | outoffset:=destpos-pchar(dest);
|
---|
260 | { extend }
|
---|
261 | setlength(dest,outlength+len);
|
---|
262 | inc(outleft,len*2);
|
---|
263 | inc(outlength,len);
|
---|
264 | { string could have been moved }
|
---|
265 | destpos:=pchar(dest)+outoffset;
|
---|
266 | end;
|
---|
267 | else
|
---|
268 | runerror(231);
|
---|
269 | end;
|
---|
270 | end;
|
---|
271 | // truncate string
|
---|
272 | setlength(dest,length(dest)-outleft div 2);
|
---|
273 | end;
|
---|
274 |
|
---|
275 |
|
---|
276 | function LowerWideString(const s : WideString) : WideString;
|
---|
277 | var
|
---|
278 | i : SizeInt;
|
---|
279 | begin
|
---|
280 | SetLength(result,length(s));
|
---|
281 | for i:=0 to length(s)-1 do
|
---|
282 | pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
|
---|
283 | end;
|
---|
284 |
|
---|
285 |
|
---|
286 | function UpperWideString(const s : WideString) : WideString;
|
---|
287 | var
|
---|
288 | i : SizeInt;
|
---|
289 | begin
|
---|
290 | SetLength(result,length(s));
|
---|
291 | for i:=0 to length(s)-1 do
|
---|
292 | pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
|
---|
293 | end;
|
---|
294 |
|
---|
295 |
|
---|
296 | procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
|
---|
297 | begin
|
---|
298 | if (len>length(s)) then
|
---|
299 | if (length(s) < 10*256) then
|
---|
300 | setlength(s,length(s)+10)
|
---|
301 | else
|
---|
302 | setlength(s,length(s)+length(s) shr 8);
|
---|
303 | end;
|
---|
304 |
|
---|
305 |
|
---|
306 | procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
|
---|
307 | begin
|
---|
308 | EnsureAnsiLen(s,index);
|
---|
309 | pchar(@s[index])^:=c;
|
---|
310 | inc(index);
|
---|
311 | end;
|
---|
312 |
|
---|
313 |
|
---|
314 | { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
|
---|
315 | {$ifndef beos}
|
---|
316 | procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
|
---|
317 | {$else not beos}
|
---|
318 | procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
|
---|
319 | {$endif beos}
|
---|
320 | var
|
---|
321 | p : pchar;
|
---|
322 | mblen : size_t;
|
---|
323 | begin
|
---|
324 | { we know that s is unique -> avoid uniquestring calls}
|
---|
325 | p:=@s[index];
|
---|
326 | if (nc<=127) then
|
---|
327 | ConcatCharToAnsiStr(char(nc),s,index)
|
---|
328 | else
|
---|
329 | begin
|
---|
330 | EnsureAnsiLen(s,index+MB_CUR_MAX);
|
---|
331 | {$ifndef beos}
|
---|
332 | mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
|
---|
333 | {$else not beos}
|
---|
334 | mblen:=wctomb(p,wchar_t(nc));
|
---|
335 | {$endif not beos}
|
---|
336 | if (mblen<>size_t(-1)) then
|
---|
337 | inc(index,mblen)
|
---|
338 | else
|
---|
339 | begin
|
---|
340 | { invalid wide char }
|
---|
341 | p^:='?';
|
---|
342 | inc(index);
|
---|
343 | end;
|
---|
344 | end;
|
---|
345 | end;
|
---|
346 |
|
---|
347 |
|
---|
348 | function LowerAnsiString(const s : AnsiString) : AnsiString;
|
---|
349 | var
|
---|
350 | i, slen,
|
---|
351 | resindex : SizeInt;
|
---|
352 | mblen : size_t;
|
---|
353 | {$ifndef beos}
|
---|
354 | ombstate,
|
---|
355 | nmbstate : mbstate_t;
|
---|
356 | {$endif beos}
|
---|
357 | wc : wchar_t;
|
---|
358 | begin
|
---|
359 | {$ifndef beos}
|
---|
360 | fillchar(ombstate,sizeof(ombstate),0);
|
---|
361 | fillchar(nmbstate,sizeof(nmbstate),0);
|
---|
362 | {$endif beos}
|
---|
363 | slen:=length(s);
|
---|
364 | SetLength(result,slen+10);
|
---|
365 | i:=1;
|
---|
366 | resindex:=1;
|
---|
367 | while (i<=slen) do
|
---|
368 | begin
|
---|
369 | if (s[i]<=#127) then
|
---|
370 | begin
|
---|
371 | wc:=wchar_t(s[i]);
|
---|
372 | mblen:= 1;
|
---|
373 | end
|
---|
374 | else
|
---|
375 | {$ifndef beos}
|
---|
376 | mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
|
---|
377 | {$else not beos}
|
---|
378 | mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
|
---|
379 | {$endif not beos}
|
---|
380 | case mblen of
|
---|
381 | size_t(-2):
|
---|
382 | begin
|
---|
383 | { partial invalid character, copy literally }
|
---|
384 | while (i<=slen) do
|
---|
385 | begin
|
---|
386 | ConcatCharToAnsiStr(s[i],result,resindex);
|
---|
387 | inc(i);
|
---|
388 | end;
|
---|
389 | end;
|
---|
390 | size_t(-1), 0:
|
---|
391 | begin
|
---|
392 | { invalid or null character }
|
---|
393 | ConcatCharToAnsiStr(s[i],result,resindex);
|
---|
394 | inc(i);
|
---|
395 | end;
|
---|
396 | else
|
---|
397 | begin
|
---|
398 | { a valid sequence }
|
---|
399 | { even if mblen = 1, the lowercase version may have a }
|
---|
400 | { different length }
|
---|
401 | { We can't do anything special if wchar_t is 16 bit... }
|
---|
402 | {$ifndef beos}
|
---|
403 | ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
|
---|
404 | {$else not beos}
|
---|
405 | ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
|
---|
406 | {$endif not beos}
|
---|
407 | inc(i,mblen);
|
---|
408 | end;
|
---|
409 | end;
|
---|
410 | end;
|
---|
411 | SetLength(result,resindex-1);
|
---|
412 | end;
|
---|
413 |
|
---|
414 |
|
---|
415 | function UpperAnsiString(const s : AnsiString) : AnsiString;
|
---|
416 | var
|
---|
417 | i, slen,
|
---|
418 | resindex : SizeInt;
|
---|
419 | mblen : size_t;
|
---|
420 | {$ifndef beos}
|
---|
421 | ombstate,
|
---|
422 | nmbstate : mbstate_t;
|
---|
423 | {$endif beos}
|
---|
424 | wc : wchar_t;
|
---|
425 | begin
|
---|
426 | {$ifndef beos}
|
---|
427 | fillchar(ombstate,sizeof(ombstate),0);
|
---|
428 | fillchar(nmbstate,sizeof(nmbstate),0);
|
---|
429 | {$endif beos}
|
---|
430 | slen:=length(s);
|
---|
431 | SetLength(result,slen+10);
|
---|
432 | i:=1;
|
---|
433 | resindex:=1;
|
---|
434 | while (i<=slen) do
|
---|
435 | begin
|
---|
436 | if (s[i]<=#127) then
|
---|
437 | begin
|
---|
438 | wc:=wchar_t(s[i]);
|
---|
439 | mblen:= 1;
|
---|
440 | end
|
---|
441 | else
|
---|
442 | {$ifndef beos}
|
---|
443 | mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
|
---|
444 | {$else not beos}
|
---|
445 | mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
|
---|
446 | {$endif beos}
|
---|
447 | case mblen of
|
---|
448 | size_t(-2):
|
---|
449 | begin
|
---|
450 | { partial invalid character, copy literally }
|
---|
451 | while (i<=slen) do
|
---|
452 | begin
|
---|
453 | ConcatCharToAnsiStr(s[i],result,resindex);
|
---|
454 | inc(i);
|
---|
455 | end;
|
---|
456 | end;
|
---|
457 | size_t(-1), 0:
|
---|
458 | begin
|
---|
459 | { invalid or null character }
|
---|
460 | ConcatCharToAnsiStr(s[i],result,resindex);
|
---|
461 | inc(i);
|
---|
462 | end;
|
---|
463 | else
|
---|
464 | begin
|
---|
465 | { a valid sequence }
|
---|
466 | { even if mblen = 1, the uppercase version may have a }
|
---|
467 | { different length }
|
---|
468 | { We can't do anything special if wchar_t is 16 bit... }
|
---|
469 | {$ifndef beos}
|
---|
470 | ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
|
---|
471 | {$else not beos}
|
---|
472 | ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
|
---|
473 | {$endif not beos}
|
---|
474 | inc(i,mblen);
|
---|
475 | end;
|
---|
476 | end;
|
---|
477 | end;
|
---|
478 | SetLength(result,resindex-1);
|
---|
479 | end;
|
---|
480 |
|
---|
481 |
|
---|
482 | function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; external name 'FPC_UTF16TOUTF32';
|
---|
483 |
|
---|
484 | function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
|
---|
485 | var
|
---|
486 | i, slen,
|
---|
487 | destindex : SizeInt;
|
---|
488 | len : longint;
|
---|
489 | uch : UCS4Char;
|
---|
490 | begin
|
---|
491 | slen:=length(s);
|
---|
492 | setlength(result,slen+1);
|
---|
493 | i:=1;
|
---|
494 | destindex:=0;
|
---|
495 | while (i<=slen) do
|
---|
496 | begin
|
---|
497 | uch:=utf16toutf32(s,i,len);
|
---|
498 | if (uch=UCS4Char(0)) then
|
---|
499 | uch:=UCS4Char(32);
|
---|
500 | result[destindex]:=uch;
|
---|
501 | inc(destindex);
|
---|
502 | inc(i,len);
|
---|
503 | end;
|
---|
504 | result[destindex]:=UCS4Char(0);
|
---|
505 | { destindex <= slen }
|
---|
506 | setlength(result,destindex+1);
|
---|
507 | end;
|
---|
508 |
|
---|
509 |
|
---|
510 | function CompareWideString(const s1, s2 : WideString) : PtrInt;
|
---|
511 | var
|
---|
512 | hs1,hs2 : UCS4String;
|
---|
513 | begin
|
---|
514 | { wcscoll interprets null chars as end-of-string -> filter out }
|
---|
515 | hs1:=WideStringToUCS4StringNoNulls(s1);
|
---|
516 | hs2:=WideStringToUCS4StringNoNulls(s2);
|
---|
517 | result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
|
---|
518 | end;
|
---|
519 |
|
---|
520 |
|
---|
521 | function CompareTextWideString(const s1, s2 : WideString): PtrInt;
|
---|
522 | begin
|
---|
523 | result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
|
---|
524 | end;
|
---|
525 |
|
---|
526 |
|
---|
527 | function CharLengthPChar(const Str: PChar): PtrInt;
|
---|
528 | var
|
---|
529 | nextlen: ptrint;
|
---|
530 | s: pchar;
|
---|
531 | {$ifndef beos}
|
---|
532 | mbstate: mbstate_t;
|
---|
533 | {$endif not beos}
|
---|
534 | begin
|
---|
535 | result:=0;
|
---|
536 | s:=str;
|
---|
537 | repeat
|
---|
538 | {$ifdef beos}
|
---|
539 | nextlen:=ptrint(mblen(str,MB_CUR_MAX));
|
---|
540 | {$else beos}
|
---|
541 | nextlen:=ptrint(mbrlen(str,MB_CUR_MAX,@mbstate));
|
---|
542 | {$endif beos}
|
---|
543 | { skip invalid/incomplete sequences }
|
---|
544 | if (nextlen<0) then
|
---|
545 | nextlen:=1;
|
---|
546 | inc(result,nextlen);
|
---|
547 | inc(s,nextlen);
|
---|
548 | until (nextlen=0);
|
---|
549 | end;
|
---|
550 |
|
---|
551 |
|
---|
552 | function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
|
---|
553 | var
|
---|
554 | a,b: pchar;
|
---|
555 | i: PtrInt;
|
---|
556 | begin
|
---|
557 | if not(canmodifys1) then
|
---|
558 | getmem(a,len1+1)
|
---|
559 | else
|
---|
560 | a:=s1;
|
---|
561 | for i:=0 to len1-1 do
|
---|
562 | if s1[i]<>#0 then
|
---|
563 | a[i]:=s1[i]
|
---|
564 | else
|
---|
565 | a[i]:=#32;
|
---|
566 | a[len1]:=#0;
|
---|
567 |
|
---|
568 | if not(canmodifys2) then
|
---|
569 | getmem(b,len2+1)
|
---|
570 | else
|
---|
571 | b:=s2;
|
---|
572 | for i:=0 to len2-1 do
|
---|
573 | if s2[i]<>#0 then
|
---|
574 | b[i]:=s2[i]
|
---|
575 | else
|
---|
576 | b[i]:=#32;
|
---|
577 | b[len2]:=#0;
|
---|
578 | result:=strcoll(a,b);
|
---|
579 | if not(canmodifys1) then
|
---|
580 | freemem(a);
|
---|
581 | if not(canmodifys2) then
|
---|
582 | freemem(b);
|
---|
583 | end;
|
---|
584 |
|
---|
585 |
|
---|
586 | function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
|
---|
587 | begin
|
---|
588 | result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
|
---|
589 | end;
|
---|
590 |
|
---|
591 |
|
---|
592 | function StrCompAnsi(s1,s2 : PChar): PtrInt;
|
---|
593 | begin
|
---|
594 | result:=strcoll(s1,s2);
|
---|
595 | end;
|
---|
596 |
|
---|
597 |
|
---|
598 | function AnsiCompareText(const S1, S2: ansistring): PtrInt;
|
---|
599 | var
|
---|
600 | a, b: AnsiString;
|
---|
601 | begin
|
---|
602 | a:=UpperAnsistring(s1);
|
---|
603 | b:=UpperAnsistring(s2);
|
---|
604 | result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
|
---|
605 | end;
|
---|
606 |
|
---|
607 |
|
---|
608 | function AnsiStrIComp(S1, S2: PChar): PtrInt;
|
---|
609 | begin
|
---|
610 | result:=AnsiCompareText(ansistring(s1),ansistring(s2));
|
---|
611 | end;
|
---|
612 |
|
---|
613 |
|
---|
614 | function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
|
---|
615 | var
|
---|
616 | a, b: pchar;
|
---|
617 | begin
|
---|
618 | if (maxlen=0) then
|
---|
619 | exit(0);
|
---|
620 | if (s1[maxlen]<>#0) then
|
---|
621 | begin
|
---|
622 | getmem(a,maxlen+1);
|
---|
623 | move(s1^,a^,maxlen);
|
---|
624 | a[maxlen]:=#0;
|
---|
625 | end
|
---|
626 | else
|
---|
627 | a:=s1;
|
---|
628 | if (s2[maxlen]<>#0) then
|
---|
629 | begin
|
---|
630 | getmem(b,maxlen+1);
|
---|
631 | move(s2^,b^,maxlen);
|
---|
632 | b[maxlen]:=#0;
|
---|
633 | end
|
---|
634 | else
|
---|
635 | b:=s2;
|
---|
636 | result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
|
---|
637 | if (a<>s1) then
|
---|
638 | freemem(a);
|
---|
639 | if (b<>s2) then
|
---|
640 | freemem(b);
|
---|
641 | end;
|
---|
642 |
|
---|
643 |
|
---|
644 | function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
|
---|
645 | var
|
---|
646 | a, b: ansistring;
|
---|
647 | begin
|
---|
648 | if (maxlen=0) then
|
---|
649 | exit(0);
|
---|
650 | setlength(a,maxlen);
|
---|
651 | move(s1^,a[1],maxlen);
|
---|
652 | setlength(b,maxlen);
|
---|
653 | move(s2^,b[1],maxlen);
|
---|
654 | result:=AnsiCompareText(a,b);
|
---|
655 | end;
|
---|
656 |
|
---|
657 |
|
---|
658 | procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
|
---|
659 | var
|
---|
660 | newlen: sizeint;
|
---|
661 | begin
|
---|
662 | newlen:=length(s);
|
---|
663 | if newlen>strlen(orgp) then
|
---|
664 | fpc_rangeerror;
|
---|
665 | p:=orgp;
|
---|
666 | if (newlen>0) then
|
---|
667 | move(s[1],p[0],newlen);
|
---|
668 | p[newlen]:=#0;
|
---|
669 | end;
|
---|
670 |
|
---|
671 |
|
---|
672 | function AnsiStrLower(Str: PChar): PChar;
|
---|
673 | var
|
---|
674 | temp: ansistring;
|
---|
675 | begin
|
---|
676 | temp:=loweransistring(str);
|
---|
677 | ansi2pchar(temp,str,result);
|
---|
678 | end;
|
---|
679 |
|
---|
680 |
|
---|
681 | function AnsiStrUpper(Str: PChar): PChar;
|
---|
682 | var
|
---|
683 | temp: ansistring;
|
---|
684 | begin
|
---|
685 | temp:=upperansistring(str);
|
---|
686 | ansi2pchar(temp,str,result);
|
---|
687 | end;
|
---|
688 |
|
---|
689 |
|
---|
690 | procedure InitThread;
|
---|
691 | begin
|
---|
692 | iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
|
---|
693 | iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
|
---|
694 | end;
|
---|
695 |
|
---|
696 |
|
---|
697 | procedure FiniThread;
|
---|
698 | begin
|
---|
699 | if (iconv_wide2ansi <> iconv_t(-1)) then
|
---|
700 | iconv_close(iconv_wide2ansi);
|
---|
701 | if (iconv_ansi2wide <> iconv_t(-1)) then
|
---|
702 | iconv_close(iconv_ansi2wide);
|
---|
703 | end;
|
---|
704 |
|
---|
705 |
|
---|
706 | Procedure SetCWideStringManager;
|
---|
707 | Var
|
---|
708 | CWideStringManager : TWideStringManager;
|
---|
709 | begin
|
---|
710 | CWideStringManager:=widestringmanager;
|
---|
711 | With CWideStringManager do
|
---|
712 | begin
|
---|
713 | Wide2AnsiMoveProc:=@Wide2AnsiMove;
|
---|
714 | Ansi2WideMoveProc:=@Ansi2WideMove;
|
---|
715 |
|
---|
716 | UpperWideStringProc:=@UpperWideString;
|
---|
717 | LowerWideStringProc:=@LowerWideString;
|
---|
718 |
|
---|
719 | CompareWideStringProc:=@CompareWideString;
|
---|
720 | CompareTextWideStringProc:=@CompareTextWideString;
|
---|
721 |
|
---|
722 | CharLengthPCharProc:=@CharLengthPChar;
|
---|
723 |
|
---|
724 | UpperAnsiStringProc:=@UpperAnsiString;
|
---|
725 | LowerAnsiStringProc:=@LowerAnsiString;
|
---|
726 | CompareStrAnsiStringProc:=@CompareStrAnsiString;
|
---|
727 | CompareTextAnsiStringProc:=@AnsiCompareText;
|
---|
728 | StrCompAnsiStringProc:=@StrCompAnsi;
|
---|
729 | StrICompAnsiStringProc:=@AnsiStrIComp;
|
---|
730 | StrLCompAnsiStringProc:=@AnsiStrLComp;
|
---|
731 | StrLICompAnsiStringProc:=@AnsiStrLIComp;
|
---|
732 | StrLowerAnsiStringProc:=@AnsiStrLower;
|
---|
733 | StrUpperAnsiStringProc:=@AnsiStrUpper;
|
---|
734 | ThreadInitProc:=@InitThread;
|
---|
735 | ThreadFiniProc:=@FiniThread;
|
---|
736 | end;
|
---|
737 | SetWideStringManager(CWideStringManager);
|
---|
738 | end;
|
---|
739 |
|
---|
740 |
|
---|
741 | initialization
|
---|
742 | SetCWideStringManager;
|
---|
743 |
|
---|
744 | { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
|
---|
745 | { with the information from the environment variables according to POSIX }
|
---|
746 | { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
|
---|
747 | setlocale(LC_ALL,'');
|
---|
748 |
|
---|
749 | { init conversion tables for main program }
|
---|
750 | InitThread;
|
---|
751 | finalization
|
---|
752 | { fini conversion tables for main program }
|
---|
753 | FiniThread;
|
---|
754 | end.
|
---|