Ubuntu下实现UTF8编码转为Unicode编码 C程序_Linux系统教程

Ubuntu下实现UTF8编码转为Unicode编码 C程序：

unicode.c

#include <stdio.h>

#include<string.h>

#include"unicode.h"

unsigned char output[4];

//单字utf8编码长度　字节

int get_utf8_size(unsigned char Input)

{

int firstch=Input;

int temp = 0x80;

int num = 0;

while (temp & firstch)

{

num++;

temp = (temp >> 1);

}

return num;

}

//单字utf8 to unicode

int one_utf8_to_unicode(unsigned char* input, int utfbytes)

{

bzero(output,4);

// b1 表示UTF-8编码的pInput中的高字节, b2 表示次高字节, ...

unsigned char b1, b2, b3, b4, b5, b6;

switch ( utfbytes)

{

case 0:

*output = *input;

*(output+1) = 0;

utfbytes += 1;

break;

case 2:

b1 = *input;

b2 = *(input+1);

if ( (b2 & 0xE0) != 0x80 ) //此高位10xx xxxx

return 0;

*output = (b1 << 6) + (b2 & 0x3F);

*(output+1) = (b1 >> 2) & 0x07; //2位范围0000 0080-0000 07ff

break;

case 3:

b1 = *input;

b2 = *(input+1);

b3 = *(input+2);

if ( ((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80) )

return 0;

*output = (b2 << 6) + (b3 & 0x3F);

*(output+1) = (b1 << 4) + ((b2 >> 2) & 0x0F);

break;

case 4:

b1 = *input;

b2 = *(input+1);

b3 = *(input+2);

b4 = *(input+3);

if ( ((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80)

|| ((b4 & 0xC0) != 0x80) )

return 0;

*output = (b3 << 6) + (b4 & 0x3F);

*(output+1) = (b2 << 4) + ((b3 >> 2) & 0x0F);

*(output+2) = ((b1 << 2) & 0x1C) + ((b2 >> 4) & 0x03);

break;

case 5:

b1 = *input;

b2 = *(input+1);

b3 = *(input+2);

b4 = *(input+3);

b5 = *(input+4);

if ( ((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80)

|| ((b4 & 0xC0) != 0x80) || ((b5 & 0xC0) != 0x80) )

return 0;

*output = (b4 << 6) + (b5 & 0x3F);

*(output+1) = (b3 << 4) + ((b4 >> 2) & 0x0F);

*(output+2) = (b2 << 2) + ((b3 >> 4) & 0x03);

*(output+3) = (b1 << 6);

break;

case 6:

b1 = *input;

b2 = *(input+1);

b3 = *(input+2);

b4 = *(input+3);

b5 = *(input+4);

b6 = *(input+5);

if ( ((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80)

|| ((b4 & 0xC0) != 0x80) || ((b5 & 0xC0) != 0x80)

|| ((b6 & 0xC0) != 0x80) )

return 0;

*output = (b5 << 6) + (b6 & 0x3F);

*(output+1) = (b5 << 4) + ((b6 >> 2) & 0x0F);

*(output+2) = (b3 << 2) + ((b4 >> 4) & 0x03);

*(output+3) = ((b1 << 6) & 0x40) + (b2 & 0x3F);

break;

default:

return 0;

break;

}

return utfbytes;

}

//inbyte inputs字节

int utf8_to_unicode(unsigned char* inputs,unsigned char* outputs,int inbyte)

{

unsigned char midInput[6]={0,0,0,0,0,0};

int i,bytes=0;

int midbyte=0;

int j=0;

int readbytes=0;

int readlenth=0;

int endbytes=inbyte;

while( endbytes>0 )

{

bytes = get_utf8_size( *(inputs+readbytes) );

if(bytes>=0)

readlenth++;

midbyte=bytes;

if(0==bytes)

midbyte=1;

for(i=0;i<midbyte;i++)

midInput[i]=*(inputs+readbytes + i);

one_utf8_to_unicode(midInput,bytes);

*(outputs+j)=output[1];

*(outputs+j+1)=output[0];

j=j+2;

readbytes+=midbyte;

endbytes=inbyte-readbytes;

}

return readlenth;

}

unicode.h文件：

#ifndef _UNICODE_H_

#define _UNICODE_H_

int get_utf8_size(unsigned char Input);//单utf8编码字节数

int one_utf8_to_unicode(unsigned char *input,int utfbytes);

int utf8_to_unicode(unsigned char* inputs,unsigned char* outputs,int inbyte);

#endif

main.c文件：

#include <stdio.h>

#include<string.h>

#include"unicode.h"

unsigned char out[1024];

void main()

{

int len=0; //utf8编码 unicode编码

unsigned char pss[]={

0x74, //0074

0xe7,0x9f,0xa5, //77e5

0xe4,0xb9,0x8e, //4e4e

0x49, //0049

0x4e, //004e N

0xe6,0x97,0xa5, //65e5

0xe6,0x8a,0xa5, //62a5

0xE8,0xBF,0x85, //8FC5

0xE8,0x83,0xBD, //80fd

0x58 //0058 X

};

len=utf8_to_unicode(pss,out,18);

int j=0;

int k;

for(k=0;k<len*2;k++)

{

printf("%.2x",out[k]);

if(k%2>0)

printf(" -%d\n",++j);

}

makefile文件：

mainFile = main.c unicode.c

object = unicode

all:$(object)

$(object):$(mainFile)

gcc -o $(object) $(mainFile) -lm -pthread -lrt

clean:

rm $(object)

代码完成!

ubuntu下运行终端，进入该文件保存目录，输入make回车完成编译，生成unicode可执行文件，输入./unicode回车执行。

执行结果：

snail@ubuntu:~/桌面/c/utf8-unicode$ make

gcc -o unicode main.c unicode.c -lm -pthread -lrt

snail@ubuntu:~/桌面/c/utf8-unicode$ ./unicode

0074 -1

77e5 -2

4e4e -3

0049 -4

004e -5

65e5 -6

62a5 -7

8fc5 -8

snail@ubuntu:~/桌面/c/utf8-unicode$

本文永久更新地址：http://www.linuxdiyf.com/linux/28328.html

Ubuntu下实现UTF8编码转为Unicode编码 C程序

频道文章

网站推荐文章

推荐教程

热点推荐