<?php
/**
 * Detect charset
 *
 * Copyright 2003 Alexander Sabournekov
 *
 * This file is part of NOCC. NOCC is free software under the terms of the
 * GNU General Public License. You should have received a copy of the license
 * along with NOCC.  If not, see <http://www.gnu.org/licenses/>.
 *
 * @package    NOCC
 * @subpackage Utilities
 * @license    http://www.gnu.org/licenses/ GNU General Public License
 * @version    SVN: $Id: detect_cyr_charset.php,v 1.1 2014/02/20 15:56:03 cvs Exp $
 */

$cad_StatsTableKoi = array(
'��' => 1815, '��' => 1446, '��' => 1438, '��' => 1405, '��' => 1207, '��' => 1194, '��' => 1177, '��' => 1140, '��' => 1078,
'��' => 1073, '��' => 1034, '��' => 1008, '��' => 980, '��' => 947, '��' => 922, '��' => 916, '��' => 870, '��' => 849, '��' => 843,
'��' => 840, '��' => 828, '��' => 824, '��' => 797, '��' => 794, '��' => 794, '��' => 761, '��' => 752, '��' => 726, '��' => 722,
'��' => 686, '��' => 681, '��' => 679, '��' => 649, '��' => 633, '��' => 630, '��' => 629, '��' => 627, '��' => 619, '��' => 612,
'��' => 599, '��' => 596, '��' => 595, '��' => 592, '��' => 581, '��' => 578, '��' => 573, '��' => 551, '��' => 548, '��' => 547,
'��' => 543, '��' => 541, '��' => 527, '��' => 526, '��' => 519, '��' => 516, '��' => 515, '��' => 515, '��' => 513, '��' => 479,
'��' => 478, '��' => 464, '��' => 460, '��' => 457, '��' => 448, '��' => 446, '��' => 438, '��' => 437, '��' => 431, '��' => 431,
'��' => 425, '��' => 417, '��' => 386, '��' => 386, '��' => 385, '��' => 383, '��' => 373, '��' => 364, '��' => 361, '��' => 350,
'��' => 347, '��' => 346, '��' => 346, '��' => 344, '��' => 343, '��' => 342, '��' => 341, '��' => 340, '��' => 337, '��' => 335,
'��' => 335, '��' => 327, '��' => 324, '��' => 324, '��' => 316, '��' => 308, '��' => 306, '��' => 296, '��' => 276, '��' => 276,
'��' => 267, '��' => 264, '��' => 263, '��' => 259, '��' => 253, '��' => 253, '��' => 253, '��' => 250, '��' => 243, '��' => 242,
'��' => 242, '��' => 240, '��' => 239, '��' => 238, '��' => 236, '��' => 235, '��' => 234, '��' => 234, '��' => 231, '��' => 227,
'��' => 225, '��' => 225, '��' => 222, '��' => 220, '��' => 219, '��' => 218, '��' => 215, '��' => 214, '��' => 210, '��' => 210,
'��' => 210, '��' => 209, '��' => 208, '��' => 208, '��' => 207, '��' => 206, '��' => 206, '��' => 203, '��' => 201, '��' => 200,
'��' => 196, '��' => 195, '��' => 193, '��' => 192, '��' => 187, '��' => 184, '��' => 184, '��' => 181, '��' => 179, '��' => 175,
'��' => 174, '��' => 171, '��' => 170, '��' => 167, '��' => 166, '��' => 165, '��' => 163, '��' => 162, '��' => 162, '��' => 161,
'��' => 161, '��' => 160, '��' => 160, '��' => 156, '��' => 155, '��' => 154, '��' => 153, '��' => 151, '��' => 150, '��' => 149,
'��' => 148, '��' => 148, '��' => 147, '��' => 147, '��' => 145, '��' => 145, '��' => 144, '��' => 144, '��' => 144, '��' => 144,
'��' => 142, '��' => 139, '��' => 139, '��' => 136, '��' => 136, '��' => 136, '��' => 135, '��' => 132, '��' => 132, '��' => 131,
'��' => 131, '��' => 127, '��' => 127, '��' => 127, '��' => 125, '��' => 125, '��' => 124, '��' => 121, '��' => 119, '��' => 117,
'��' => 116, '��' => 114, '��' => 113, '��' => 113, '��' => 111, '��' => 111, '��' => 107, '��' => 105, '��' => 105, '��' => 104,
'��' => 103, '��' => 103, '��' => 103, '��' => 102, '��' => 100, '��' => 100, '��' => 99, '��' => 99, '��' => 98, '��' => 97,
'��' => 97, '��' => 96, '��' => 91, '��' => 90, '��' => 90, '��' => 90, '��' => 88, '��' => 88, '��' => 88, '��' => 87, '��' => 86,
'��' => 86, '��' => 86, '��' => 84, '��' => 84, '��' => 83, '��' => 83, '��' => 83, '��' => 82, '��' => 80, '��' => 78, '��' => 78,
'��' => 75, '��' => 75, '��' => 75, '��' => 74, '��' => 73, '��' => 73, '��' => 72, '��' => 72, '��' => 70, '��' => 69, '��' => 69,
'��' => 69, '��' => 68, '��' => 68, '��' => 67, '��' => 66, '��' => 65, '��' => 65, '��' => 64, '��' => 64, '��' => 63, '��' => 63,
'��' => 62, '��' => 62, '��' => 62, '��' => 61, '��' => 61, '��' => 59, '��' => 59, '��' => 58, '��' => 57, '��' => 57, '��' => 56,
'��' => 56, '��' => 54, '��' => 54, '��' => 53, '��' => 53, '��' => 52, '��' => 52, '��' => 51, '��' => 51, '��' => 51, '��' => 50,
'��' => 50, '��' => 50, '��' => 49, '��' => 49, '��' => 48, '��' => 48, '��' => 48, '��' => 48, '��' => 47, '��' => 46, '��' => 46,
'��' => 46, '��' => 45, '��' => 44, '��' => 44, '��' => 44, '��' => 43, '��' => 40, '��' => 40, '��' => 40, '��' => 40, '��' => 40,
'��' => 40, '��' => 40, '��' => 40, '��' => 39, '��' => 39, '��' => 39, '��' => 38, '��' => 37, '��' => 37, '��' => 36, '��' => 36,
'��' => 36, '��' => 36, '��' => 35, '��' => 35, '��' => 35, '��' => 35, '��' => 34, '��' => 34, '��' => 34, '��' => 34, '��' => 33,
'��' => 33, '��' => 33, '��' => 33, '��' => 33, '��' => 33, '��' => 33, '��' => 32, '��' => 32, '��' => 31, '��' => 31, '��' => 31,
'��' => 31, '��' => 31, '��' => 30, '��' => 30, '��' => 30, '��' => 29, '��' => 29, '��' => 28, '��' => 28, '��' => 27, '��' => 27,
'��' => 26, '��' => 26, '��' => 26, '��' => 26, '��' => 26, '��' => 25, '��' => 25, '��' => 25, '��' => 25, '��' => 24, '��' => 24,
'��' => 24, '��' => 23, '��' => 23, '��' => 23, '��' => 23, '��' => 23, '��' => 23, '��' => 21, '��' => 21, '��' => 21, '��' => 20,
'��' => 20, '��' => 20, '��' => 20, '��' => 20, '��' => 20, '��' => 20, '��' => 19, '��' => 19, '��' => 19, '��' => 19, '��' => 18,
'��' => 18, '��' => 18, '��' => 18, '��' => 18, '��' => 18, '��' => 17, '��' => 17, '��' => 17, '��' => 16, '��' => 16, '��' => 16,
'��' => 15, '��' => 15, '��' => 15, '��' => 15, '��' => 15, '��' => 15, '��' => 15, '��' => 14, '��' => 14, '��' => 14, '��' => 14,
'��' => 13, '��' => 13, '��' => 13, '��' => 13, '��' => 13, '��' => 13, '��' => 13, '��' => 13, '��' => 12, '��' => 12, '��' => 12,
'��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12,
'��' => 11, '��' => 11, '��' => 11, '��' => 11, '��' => 11, '��' => 11, '��' => 10, '��' => 10, '��' => 10, '��' => 9, '��' => 9,
'��' => 9, '��' => 9, '��' => 9, '��' => 9, '��' => 9, '��' => 9, '��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 8,
'��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 7, '��' => 7, '��' => 7, '��' => 7, '��' => 7, '��' => 7,
'��' => 7, '��' => 7, '��' => 7, '��' => 7, '��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 6,
'��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 5,
'��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4,
'��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4,
'��' => 3,  '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3,
'��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 2,
'��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2,
'��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2,
'��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1,
'��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1,
'��' => 1,  '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1,
'��' => 1,  '��' => 1, '��' => 1, '��' => 1, '��' => 1
);

$cad_StatsTableWin = array(
'��' => 1815, '��' => 1446, '��' => 1438, '��' => 1405, '��' => 1207, '��' => 1194, '��' => 1177, '��' => 1140, '��' => 1078, '��' => 1073,
'��' => 1034, '��' => 1008, '��' => 980, '��' => 947, '��' => 922, '��' => 916, '��' => 870, '��' => 849, '��' => 843, '��' => 840, '��' => 828,
'��' => 824, '��' => 797, '��' => 794, '��' => 794, '��' => 761, '��' => 752, '��' => 726, '��' => 722, '��' => 686, '��' => 681, '��' => 679,
'��' => 649, '��' => 633, '��' => 630, '��' => 629, '��' => 627, '��' => 619, '��' => 612, '��' => 599, '��' => 596, '��' => 595, '��' => 592,
'��' => 581, '��' => 578, '��' => 573, '��' => 551, '��' => 548, '��' => 547, '��' => 543, '��' => 541, '��' => 527, '��' => 526, '��' => 519,
'��' => 516, '��' => 515, '��' => 515, '��' => 513, '��' => 479, '��' => 478, '��' => 464, '��' => 460, '��' => 457, '��' => 448, '��' => 446,
'��' => 438, '��' => 437, '��' => 431, '��' => 425, '��' => 417, '��' => 386, '��' => 386, '��' => 385, '��' => 383, '��' => 373, '��' => 364,
'��' => 361, '��' => 350, '��' => 347, '��' => 346, '��' => 346, '��' => 344, '��' => 343, '��' => 342, '��' => 341, '��' => 340, '��' => 337,
'��' => 335, '��' => 335, '��' => 327, '��' => 324, '��' => 324, '��' => 316, '��' => 308, '��' => 306, '��' => 296, '��' => 276, '��' => 276,
'��' => 267, '��' => 264, '��' => 263, '��' => 259, '��' => 253, '��' => 253, '��' => 253, '��' => 250, '��' => 243, '��' => 242, '��' => 242,
'��' => 240, '��' => 239, '��' => 238, '��' => 236, '��' => 235, '��' => 234, '��' => 234, '��' => 231, '��' => 227, '��' => 225, '��' => 225,
'��' => 222, '��' => 220, '��' => 219, '��' => 218, '��' => 215, '��' => 214, '��' => 210, '��' => 210, '��' => 210, '��' => 209, '��' => 208,
'��' => 208, '��' => 207, '��' => 206, '��' => 206, '��' => 203, '��' => 201, '��' => 200, '��' => 196, '��' => 195, '��' => 193, '��' => 192,
'��' => 187, '��' => 184, '��' => 184, '��' => 181, '��' => 179, '��' => 175, '��' => 174, '��' => 171, '��' => 170, '��' => 167, '��' => 166,
'��' => 165, '��' => 163, '��' => 162, '��' => 162, '��' => 161, '��' => 161, '��' => 160, '��' => 160, '��' => 156, '��' => 155, '��' => 154,
'��' => 153, '��' => 151, '��' => 150, '��' => 149, '��' => 148, '��' => 148, '��' => 147, '��' => 147, '��' => 145, '��' => 145, '��' => 144,
'��' => 144, '��' => 144, '��' => 144, '��' => 142, '��' => 139, '��' => 139, '��' => 136, '��' => 136, '��' => 136, '��' => 135, '��' => 132,
'��' => 132, '��' => 131, '��' => 131, '��' => 127, '��' => 127, '��' => 127, '��' => 125, '��' => 125, '��' => 124, '��' => 121, '��' => 119,
'��' => 117, '��' => 116, '��' => 114, '��' => 113, '��' => 113, '��' => 111, '��' => 111, '��' => 107, '��' => 105, '��' => 105, '��' => 104,
'��' => 103, '��' => 103, '��' => 103, '��' => 102, '��' => 100, '��' => 100, '��' => 99, '��' => 99, '��' => 98, '��' => 97, '��' => 97,
'��' => 96, '��' => 91, '��' => 90, '��' => 90, '��' => 90, '��' => 88, '��' => 88, '��' => 88, '��' => 87, '��' => 86, '��' => 86, '��' => 86,
'��' => 84, '��' => 84, '��' => 83, '��' => 83, '��' => 83, '��' => 82, '��' => 80, '��' => 78, '��' => 78, '��' => 75, '��' => 75, '��' => 75,
'��' => 74, '��' => 73, '��' => 73, '��' => 72, '��' => 72, '��' => 70, '��' => 69, '��' => 69, '��' => 69, '��' => 68, '��' => 68, '��' => 67,
'��' => 66, '��' => 65, '��' => 65, '��' => 64, '��' => 64, '��' => 63, '��' => 63, '��' => 62, '��' => 62, '��' => 62, '��' => 61, '��' => 61,
'��' => 59, '��' => 59, '��' => 58, '��' => 57, '��' => 57, '��' => 56, '��' => 56, '��' => 54, '��' => 54, '��' => 53, '��' => 53, '��' => 52,
'��' => 52, '��' => 51, '��' => 51, '��' => 51, '��' => 50, '��' => 50, '��' => 50, '��' => 49, '��' => 49, '��' => 48, '��' => 48, '��' => 48,
'��' => 48, '��' => 47, '��' => 46, '��' => 46, '��' => 46, '��' => 45, '��' => 44, '��' => 44, '��' => 44, '��' => 43, '��' => 40, '��' => 40,
'��' => 40, '��' => 40, '��' => 40, '��' => 40, '��' => 40, '��' => 40, '��' => 39, '��' => 39, '��' => 39, '��' => 38, '��' => 37, '��' => 37,
'��' => 36, '��' => 36, '��' => 36, '��' => 36, '��' => 35, '��' => 35, '��' => 35, '��' => 35, '��' => 34, '��' => 34, '��' => 34, '��' => 34,
'��' => 33, '��' => 33, '��' => 33, '��' => 33, '��' => 33, '��' => 33, '��' => 33, '��' => 32, '��' => 32, '��' => 31, '��' => 31, '��' => 31,
'��' => 31, '��' => 31, '��' => 30, '��' => 30, '��' => 30, '��' => 29, '��' => 29, '��' => 28, '��' => 28, '��' => 27, '��' => 27, '��' => 26,
'��' => 26, '��' => 26, '��' => 26, '��' => 26, '��' => 25, '��' => 25, '��' => 25, '��' => 25, '��' => 24, '��' => 24, '��' => 24, '��' => 23,
'��' => 23, '��' => 23, '��' => 23, '��' => 23, '��' => 23, '��' => 21, '��' => 21, '��' => 21, '��' => 20, '��' => 20, '��' => 20, '��' => 20,
'��' => 20, '��' => 20, '��' => 20, '��' => 19, '��' => 19, '��' => 19, '��' => 19, '��' => 18, '��' => 18, '��' => 18, '��' => 18, '��' => 18,
'��' => 18, '��' => 17, '��' => 17, '��' => 17, '��' => 16, '��' => 16, '��' => 16, '��' => 15, '��' => 15, '��' => 15, '��' => 15, '��' => 15,
'��' => 15, '��' => 15, '��' => 14, '��' => 14, '��' => 14, '��' => 14, '��' => 13, '��' => 13, '��' => 13, '��' => 13, '��' => 13, '��' => 13,
'��' => 13, '��' => 13, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 12,
'��' => 12, '��' => 12, '��' => 12, '��' => 12, '��' => 11, '��' => 11, '��' => 11, '��' => 11, '��' => 11, '��' => 11, '��' => 10, '��' => 10,
'��' => 10, '��' => 9, '��' => 9, '��' => 9, '��' => 9, '��' => 9, '��' => 9, '��' => 9, '��' => 9, '��' => 8, '��' => 8, '��' => 8, '��' => 8,
'��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 8, '��' => 7, '��' => 7, '��' => 7, '��' => 7, '��' => 7,
'��' => 7, '��' => 7, '��' => 7, '��' => 7, '��' => 7, '��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 6,
'��' => 6, '��' => 6, '��' => 6, '��' => 6, '��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 5, '��' => 5,
'��' => 5, '��' => 5, '��' => 5, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4,
'��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 4, '��' => 3, '��' => 3, '��' => 3,
'��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3,
'��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 3, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2,
'��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2,
'��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 2, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1,
'��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1,
'��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1,
'��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1, '��' => 1
);




/* lxnt:  patched to return charset names that iconv() understands*/
function detect_charset($Data,$dbg_fl = 0) {
    /* for many small pices of text -  list of sender/subject*/
        $rc = preg_match("/(.*)([\x7F-\xFF]+)/xU", $Data, $tst_ar);
    if ($rc == 0) {
            return 'US-ASCII';
    } else {
                $beg_charset = strpos($Data, $tst_ar[2]);
    }
    list($KoiMark,$WinMark) = GetCodeScoreAll($Data, $beg_charset);
    $Ratio['koi8-r'] =  $KoiMark/($WinMark + 1);
    $Ratio['windows-1251'] =  $WinMark/($KoiMark + 1);

        list($MaxRation, $MaxRatioKey) = max_from_ratio($Ratio);
        return $MaxRatioKey;
}

function max_from_ratio($ar) {
    $max=0;
    $max_key="";
    // php8 bugfix
    //while (list($key,$val)=each($ar)) {
    foreach($ar as $key=>$val) {
        if ($val>$max) {
            $max=$val;
            $max_key=$key;
        }
    }
    reset($ar);
    return array($max,$max_key);
}

function GetCodeScoreAll($Data,$beg_charset) {
    global $cad_StatsTableWin, $cad_StatsTableKoi;
    $PairSize = 2;

    $Data = substr($Data, $beg_charset, 100);
    $Data = preg_replace('/[\n\r]/', '', $Data);
    //$old_locale = function_exists('nl_langinfo') ? nl_langinfo(LC_CTYPE) : 'ru_RU.KOI8-R';
    setlocale(LC_CTYPE, 'ru_RU.KOI8-R');

    $Mark_koi=0;
    $Mark_win=0;
    $cnt=0;
    $max_detect_limit=10;

    $sp = preg_split('/[\.\,\-\s\:\;\?\!\'\"\(\)\d<>]+/', $Data);
    // php8 bugfix
    //while ( list($key2, $val2) = each($sp) ) {
    foreach( $sp as $key2 => $val2) {
        /* for many small pices of text -  words in mesaage body */
        $rc = preg_match("/(.*)([\x7F-\xFF]+)/x", $val2);
        if ($rc == 0) {
            continue;
        }

        if ($cnt > $max_detect_limit) {
            break;
        } else {
            $cnt++;
        }
        $dlina=strlen($val2)-$PairSize;
        if ($dlina < 1) {
            $cnt--; continue;
        }
        $val3=strtolower($val2);
        if (ucfirst($val3) == $val2) {
            $scaleK=2;
        } else {
            $scaleK=1;
        }
        if (substr($val3, 0, 1) . strtoupper(substr($val2, 1, strlen($val2))) == $val2) {
            $scaleW=2;
        } else {
            $scaleW=1;
        }
        $Cur_mark_koi=0;
        $Cur_mark_win=0;
        for ($i=0; $i<$dlina; $i++ ) {
            $pp=substr($val3, $i, $PairSize);
            if (isset($cad_StatsTableKoi[$pp])) {
                    $Cur_mark_koi += $cad_StatsTableKoi[$pp];
            }
            if (isset($cad_StatsTableWin[$pp])) {
                    $Cur_mark_win += $cad_StatsTableWin[$pp];
            }
        }
        $Mark_koi+=$Cur_mark_koi*$scaleK;
        $Mark_win+=$Cur_mark_win*$scaleW;
    }
    $Mark_list=array($Mark_koi,$Mark_win);
    //setlocale(LC_CTYPE,$old_locale);
    return $Mark_list;
}

?>