Таблица подскажет декодировку:
ReadFile(0,"1.txt")
CreateFile(1,"2.txt")
WriteWord(1,$FEFF);BOM
While Not Eof(0)
b.a=ReadAsciiCharacter(0)
If b<=$7F
c.l=b
ElseIf b&$E0=$C0
c=(b&$1F)<<6
b.a=ReadAsciiCharacter(0)
c=c|(b&$3F)
ElseIf b&$F0=$E0
c=(b&$0F)<<12
b.a=ReadAsciiCharacter(0)
c=c|((b&$3f)<<6)
b.a=ReadAsciiCharacter(0)
c=c|(b&$4F)
ElseIf (b & $F1) = $F0
c = (b & $0F) << 18;
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 12)
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|(b & $3F)
ElseIf (b & $FD) = $F8
c = (b & $0F) << 24;
b.a=ReadAsciiCharacter(0)
c = c|((b & $0F) << 18)
b.a=ReadAsciiCharacter(0)
c =c|( (b & $3F) << 12)
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|( b & $3F)
ElseIf (b & $FE) = $FC
c = (b & $0F) << 30;
b.a=ReadAsciiCharacter(0)
c =c|( (b & $0F) << 24)
b.a=ReadAsciiCharacter(0)
c =c|((b & $0F) << 18)
b.a=ReadAsciiCharacter(0)
c =c| (b & $3F) << 12;
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|(b & $3F)
Else
Debug "err:"+Str(b)
EndIf
If c<65536
WriteWord(1,c)
Else
WriteLong(1,c)
EndIf
Wend
CloseFile(1)
CloseFile(0)
CreateFile(1,"2.txt")
WriteWord(1,$FEFF);BOM
While Not Eof(0)
b.a=ReadAsciiCharacter(0)
If b<=$7F
c.l=b
ElseIf b&$E0=$C0
c=(b&$1F)<<6
b.a=ReadAsciiCharacter(0)
c=c|(b&$3F)
ElseIf b&$F0=$E0
c=(b&$0F)<<12
b.a=ReadAsciiCharacter(0)
c=c|((b&$3f)<<6)
b.a=ReadAsciiCharacter(0)
c=c|(b&$4F)
ElseIf (b & $F1) = $F0
c = (b & $0F) << 18;
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 12)
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|(b & $3F)
ElseIf (b & $FD) = $F8
c = (b & $0F) << 24;
b.a=ReadAsciiCharacter(0)
c = c|((b & $0F) << 18)
b.a=ReadAsciiCharacter(0)
c =c|( (b & $3F) << 12)
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|( b & $3F)
ElseIf (b & $FE) = $FC
c = (b & $0F) << 30;
b.a=ReadAsciiCharacter(0)
c =c|( (b & $0F) << 24)
b.a=ReadAsciiCharacter(0)
c =c|((b & $0F) << 18)
b.a=ReadAsciiCharacter(0)
c =c| (b & $3F) << 12;
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|(b & $3F)
Else
Debug "err:"+Str(b)
EndIf
If c<65536
WriteWord(1,c)
Else
WriteLong(1,c)
EndIf
Wend
CloseFile(1)
CloseFile(0)
Хотелось бы получить не Unicode, а готовый файл в вендовой кодировке 1251. Нашел пример декодировки, который пока что оказался неполным.
ReadFile(0,"1.txt")
CreateFile(1,"2.txt")
;WriteWord(1,$FEFF);BOM
While Not Eof(0)
b.a=ReadAsciiCharacter(0)
If b<=$7F
c.l=b
ElseIf b&$E0=$C0
c=(b&$1F)<<6
b.a=ReadAsciiCharacter(0)
c=c|(b&$3F)
ElseIf b&$F0=$E0
c=(b&$0F)<<12
b.a=ReadAsciiCharacter(0)
c=c|((b&$3f)<<6)
b.a=ReadAsciiCharacter(0)
c=c|(b&$4F)
ElseIf (b & $F1) = $F0
c = (b & $0F) << 18;
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 12)
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|(b & $3F)
ElseIf (b & $FD) = $F8
c = (b & $0F) << 24;
b.a=ReadAsciiCharacter(0)
c = c|((b & $0F) << 18)
b.a=ReadAsciiCharacter(0)
c =c|( (b & $3F) << 12)
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|( b & $3F)
ElseIf (b & $FE) = $FC
c = (b & $0F) << 30;
b.a=ReadAsciiCharacter(0)
c =c|( (b & $0F) << 24)
b.a=ReadAsciiCharacter(0)
c =c|((b & $0F) << 18)
b.a=ReadAsciiCharacter(0)
c =c| (b & $3F) << 12;
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|(b & $3F)
Else
Debug "err:"+Str(b)
EndIf
;put data
If c<255
WriteAsciiCharacter(1,c)
ElseIf c<65536;А-$410,Я-42F,а-430 я-44F
Select c
Case $410 To $42f;А-Я
c=c-$350
Case $430 To $44f
c=c-$350
Case $401;Ё
c=$A8
Case $451;ё
c=$B8
Case 8197 ;8198 -...
c=$2D
Case 8198
c=$85
Default
Debug Chr(c)+":"+Str(c)
;WriteAsciiCharacter(1,63)
;WriteAsciiCharacter(1,63)
WriteString(1,"<"+Str(c)+">")
EndSelect
;WriteWord(1,c)
WriteAsciiCharacter(1,c)
Else
;WriteLong(1,c)
Debug Chr(c)+"::"+Str(c)
EndIf
Wend
CloseFile(1)
CloseFile(0)
CreateFile(1,"2.txt")
;WriteWord(1,$FEFF);BOM
While Not Eof(0)
b.a=ReadAsciiCharacter(0)
If b<=$7F
c.l=b
ElseIf b&$E0=$C0
c=(b&$1F)<<6
b.a=ReadAsciiCharacter(0)
c=c|(b&$3F)
ElseIf b&$F0=$E0
c=(b&$0F)<<12
b.a=ReadAsciiCharacter(0)
c=c|((b&$3f)<<6)
b.a=ReadAsciiCharacter(0)
c=c|(b&$4F)
ElseIf (b & $F1) = $F0
c = (b & $0F) << 18;
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 12)
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|(b & $3F)
ElseIf (b & $FD) = $F8
c = (b & $0F) << 24;
b.a=ReadAsciiCharacter(0)
c = c|((b & $0F) << 18)
b.a=ReadAsciiCharacter(0)
c =c|( (b & $3F) << 12)
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|( b & $3F)
ElseIf (b & $FE) = $FC
c = (b & $0F) << 30;
b.a=ReadAsciiCharacter(0)
c =c|( (b & $0F) << 24)
b.a=ReadAsciiCharacter(0)
c =c|((b & $0F) << 18)
b.a=ReadAsciiCharacter(0)
c =c| (b & $3F) << 12;
b.a=ReadAsciiCharacter(0)
c =c|((b & $3F) << 6)
b.a=ReadAsciiCharacter(0)
c =c|(b & $3F)
Else
Debug "err:"+Str(b)
EndIf
;put data
If c<255
WriteAsciiCharacter(1,c)
ElseIf c<65536;А-$410,Я-42F,а-430 я-44F
Select c
Case $410 To $42f;А-Я
c=c-$350
Case $430 To $44f
c=c-$350
Case $401;Ё
c=$A8
Case $451;ё
c=$B8
Case 8197 ;8198 -...
c=$2D
Case 8198
c=$85
Default
Debug Chr(c)+":"+Str(c)
;WriteAsciiCharacter(1,63)
;WriteAsciiCharacter(1,63)
WriteString(1,"<"+Str(c)+">")
EndSelect
;WriteWord(1,c)
WriteAsciiCharacter(1,c)
Else
;WriteLong(1,c)
Debug Chr(c)+"::"+Str(c)
EndIf
Wend
CloseFile(1)
CloseFile(0)
Решение простое - взять готовую таблицу, она расположена в исходнике utf8to1251.c , сравнить полученный результат с таблицей. Пока я работаю над заполнением всех данных. Исходники.
Комментарии
Отправить комментарий