Java source code of CharsConverter.java

/*
 Eteria IRC Client, an RFC 1459 compliant client program written in Java.
 Copyright (C) 2000-2001  Javier Kohen <jkohen at tough.com>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
 package ar.com.jkohen.irc;
 
 import java.io.CharConversionException;
 import java.io.UnsupportedEncodingException;
 import java.util.StringTokenizer;
 
 public class CharsConverter
 {
 	private String decoding;
 	private String encoding;
 	
 	public CharsConverter(String dec, String enc) throws UnsupportedEncodingException
 	{
 		/* This throws an exception if charset name is not supported */
 		byte arr[] = {};
 		if (!dec.equalsIgnoreCase("IRC-mixed") && !dec.equalsIgnoreCase("IRC") && !dec.equals(""))
 			new String(arr, dec);
 		if (!enc.equalsIgnoreCase("IRC-mixed") && !enc.equalsIgnoreCase("IRC") && !dec.equals(""))
 			new String(arr, enc);
 		
 		decoding = dec;
 		encoding = enc;		
 	}
 
 	public String decode(byte buffer[], int num) throws UnsupportedEncodingException
 	{
 		if (num > 0)
 		{
 			if (decoding.equalsIgnoreCase("IRC-mixed"))
 			{
 				/*
 				** This decoder is similar to "IRC Hybrid"
 				** except that a single message can mix Latin and UTF-8
 				** separated by spaces and commas.
 				*/
 				String str = "";		
 				int start = 0, end = 0;
 				for(int i = 0; i < num; i++)
 				{
 					if (buffer[i] == ' ' || buffer[i] == ',' || i + 1 == num)
 					{
 						if (i + 1 != num)
 							end = i;
 						else
 							end = num;				
 					
 						try
 						{
 							str += UTF8ToLatin(buffer, start, end);
 						}
 						catch (CharConversionException e)
 						{
 							str += new String(buffer, start, end - start, "ISO_8859-1");
 						}
 					
 						start = i;
 					}
 				}			
 				return(str);
 
 			}
 			else if (decoding.equalsIgnoreCase("IRC"))
 			{
 				/* Similar to "IRC-Hybrid" on XChat */
 				
 				try
 				{
 					return(UTF8ToLatin(buffer, 0, num));
 				}
 				catch (CharConversionException e)
 				{
 					return(new String(buffer, 0, num, "ISO_8859-1"));
 				}
 			}
 			else if (!decoding.equals(""))
 			{
 				return(new String(buffer, 0, num, decoding));
 			}
 		}
 
 		return(new String(buffer, 0, num));
 	}
 	
 	public byte[] encode(String str) throws UnsupportedEncodingException
 	{
 		if (encoding.equalsIgnoreCase("IRC-mixed"))
 		{
 			byte dest[] = {};		
 			StringTokenizer st = new StringTokenizer(str, " ,", true);
 			while (st.hasMoreTokens())
 			{
 				String s = st.nextToken();
 				byte chunk[] = {};
 				if(isLatin(s))
 					chunk = s.getBytes("ISO_8859-1");
 				else
 					chunk = s.getBytes("UTF8");
 				
 				byte temp[] = new byte[dest.length + chunk.length];
 				for (int i = 0; i < dest.length; i++)
 					temp[i] = dest[i];
 				for (int i = 0; i < chunk.length; i++)
 					temp[dest.length + i] = chunk[i];
 				dest = temp;
 			}
 			
 			return(dest);
 		}
 		else if (encoding.equalsIgnoreCase("IRC"))
 		{
 			if (isLatin(str))
 				return(str.getBytes("ISO_8859-1"));
 			else
 				return(str.getBytes("UTF8"));
 		}
 		else if (!encoding.equals(""))
 		{
 			return(str.getBytes(encoding));
 		}
 		
 		return(str.getBytes());
 	}
 
  	private boolean isLatin(String str)
  	{
  		char chars[] = str.toCharArray();
  		char c = 0;
  		for (int i = 0; i < chars.length; i++)
  		{
  			c = chars[i];
  			if ((c >= 0x7F && c <= 0x9F) || c > 0xFF)
  				return(false);
  		}
  
  		return(true);
  	}
 	
 	public boolean isLatin(byte buffer[], int start, int end)
 	{
 		int c = 0;
 		for (int i = start; i < end; i++)
 		{
 			c = buffer[i] & 0xFF;
 			if ((c >= 0x7F && c <= 0x9F) || c > 0xFF)
 				return(false);
 		}
 
 		return(true);
 	}
 	
 	public String UTF8ToLatin(byte buffer[], int start, int end) throws CharConversionException
 	{
 		StringBuffer decoded = new StringBuffer();
 		int pos = start;
 		int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 		boolean valid = false;
 		char c = 0;
 		while(pos < end)
 		{
 			valid = false;
 			c1 = buffer[pos++] & 0xFF;
 
 			if (c1 < 0x80)
 			{
 				c = (char)c1;
 				valid = true;
 			}
 			else if (c1 < 0xC2)
 			{
 				valid = false;
 			}
 			else if (c1 < 0xE0 && pos < end)
 			{
 				c2 = buffer[pos++] & 0xFF;
 				if ((c2 & 0xC0) == 0x80)
 				{
 					c1 = (c1 & 0x1F) << 6;
 					c2 &= 0x3F;
 					c = (char)(c1 | c2);
 					valid = true;
 				}
 			}
 			else if (c1 < 0xF0 && (pos + 1 < end))
 			{
 				c2 = buffer[pos++] & 0xFF;
 				c3 = buffer[pos++] & 0xFF;
 
 				if ((c2 & c3 & 0xC0) == 0x80)
 				{
 					c1 = (c1 & 0x0F) << 12;
 					c2 = (c2 & 0x3F) << 6;
 					c3 &= 0x3F;
 					c = (char)(c1 | c2 | c3);
 					valid = true;
 				}
 			}
 			else if (c1 < 0xF5 && (pos + 2 < end))
 			{
 				c2 = buffer[pos++] & 0xFF;
 				c3 = buffer[pos++] & 0xFF;
 				c4 = buffer[pos++] & 0xFF;
 				if ((c2 & c3 & c4 & 0xC0) == 0x80)
 				{
 					c1 = (c1 & 0x07) << 18;
 					c2 = (c2 & 0x3F) << 12;
 					c3 = (c3 & 0x3F) << 6;
 					c4 &= 0x3F;
 					c = (char)(c1 | c2 | c3 | c4);
 					valid = true;
 				}
 			}
 
 			if (!valid)
 				throw new CharConversionException("UTF-8 not valid at " + pos);
 
 			decoded.append(c);
 		}
 
 		return(decoded.toString());
 	}
 }