1 files changed, 1355 insertions, 0 deletions
diff --git a/plat/gxb/crypto/bignum.c b/plat/gxb/crypto/bignum.c
new file mode 100644
index 0000000..ac0c811
--- /dev/null
+++ b/plat/gxb/crypto/bignum.c
@@ -0,0 +1,1355 @@
+/*
+ *  Multi-precision integer library
+ *
+ *  Copyright (C) 2006-2010, Brainspark B.V.
+ *
+ *  This file is part of PolarSSL (http://www.polarssl.org)
+ *  Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
+ *
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+/*
+ *  This MPI implementation is based on:
+ *
+ *  http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
+ *  http://www.stillhq.com/extracted/gnupg-api/mpi/
+ *  http://math.libtomcrypt.com/files/tommath.pdf
+ */
+
+#include "common.h"
+#include <string.h>
+#include <rsa_config.h>
+#include <stdio.h>
+
+
+#if defined(POLARSSL_BIGNUM_C)
+
+#include "bignum.h"
+#include "bn_mul.h"
+
+#define ciL    (sizeof(t_uint))         /* chars in limb  */
+#define biL    (ciL << 3)               /* bits  in limb  */
+#define biH    (ciL << 2)               /* half limb size */
+
+/*
+ * Convert between bits/chars and number of limbs
+ */
+#define BITS_TO_LIMBS(i)  (((i) + biL - 1) / biL)
+#define CHARS_TO_LIMBS(i) (((i) + ciL - 1) / ciL)
+
+
+#define CONFIG_MALLOC_MAX	(128 * 1024)
+
+//char malloc_buffer[CONFIG_MALLOC_MAX];
+//static unsigned char * malloc_buffer = (unsigned char *)(0x10400000);
+static int malloc_addr = 0;
+
+void *mymalloc(int size)
+{
+	void *p;
+
+	if (malloc_addr + size > CONFIG_MALLOC_MAX)
+		p = NULL;
+	else
+		//p = (char *)&malloc_buffer[0] + malloc_addr;
+		p = (char *)(unsigned long)(0x1700000+ malloc_addr);
+
+	if (p)
+		malloc_addr += size;
+	//ss_printf("malloc size %d/%d addr %p\n", size, malloc_addr, p);
+
+	//printf("aml log : malloc_buffer=%p malloc_addr=0x%x p=%p size=%d\n",
+	//	malloc_buffer,malloc_addr,p,size);
+
+	return p;
+}
+
+void myfree(void *ptr)
+{
+}
+
+void mymallocreset()
+{
+	malloc_addr = 0;
+}
+
+
+/*
+ * Initialize one MPI
+ */
+void mpi_init( mpi *X )
+{
+    if ( X == NULL )
+        return;
+
+    X->s = 1;
+    X->n = 0;
+    X->p = NULL;
+}
+
+/*
+ * Unallocate one MPI
+ */
+void mpi_free( mpi *X )
+{
+    if ( X == NULL )
+        return;
+
+    if ( X->p != NULL )
+    {
+        memset( X->p, 0, X->n * ciL );
+        myfree( X->p );
+    }
+
+    X->s = 1;
+    X->n = 0;
+    X->p = NULL;
+}
+
+/*
+ * Enlarge to the specified number of limbs
+ */
+int mpi_grow( mpi *X, size_t nblimbs )
+{
+    t_uint *p;
+
+    if ( nblimbs > POLARSSL_MPI_MAX_LIMBS )
+        return( POLARSSL_ERR_MPI_MALLOC_FAILED );
+
+
+    if ( X->n < nblimbs )
+    {
+
+        if ( ( p = (t_uint *) mymalloc( nblimbs * ciL ) ) == NULL )
+            return( POLARSSL_ERR_MPI_MALLOC_FAILED );
+
+        memset( p, 0, nblimbs * ciL );
+
+        if ( X->p != NULL )
+        {
+            memcpy( p, X->p, X->n * ciL );
+            memset( X->p, 0, X->n * ciL );
+            myfree( X->p );
+        }
+
+        X->n = nblimbs;
+        X->p = p;
+    }
+
+    return( 0 );
+}
+
+/*
+ * Copy the contents of Y into X
+ */
+int mpi_copy( mpi *X, const mpi *Y )
+{
+    int ret;
+    size_t i;
+
+    if ( X == Y )
+        return( 0 );
+
+    for ( i = Y->n - 1; i > 0; i-- )
+        if ( Y->p[i] != 0 )
+            break;
+    i++;
+
+    X->s = Y->s;
+
+    MPI_CHK( mpi_grow( X, i ) );
+
+    memset( X->p, 0, X->n * ciL );
+    memcpy( X->p, Y->p, i * ciL );
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Swap the contents of X and Y
+ */
+void mpi_swap( mpi *X, mpi *Y )
+{
+    mpi T;
+
+    memcpy( &T,  X, sizeof( mpi ) );
+    memcpy(  X,  Y, sizeof( mpi ) );
+    memcpy(  Y, &T, sizeof( mpi ) );
+}
+
+/*
+ * Set value from integer
+ */
+int mpi_lset( mpi *X, t_sint z )
+{
+    int ret;
+
+    MPI_CHK( mpi_grow( X, 1 ) );
+    memset( X->p, 0, X->n * ciL );
+
+    X->p[0] = ( z < 0 ) ? -z : z;
+    X->s    = ( z < 0 ) ? -1 : 1;
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Get a specific bit
+ */
+int mpi_get_bit( const mpi *X, size_t pos )
+{
+    if ( X->n * biL <= pos )
+        return( 0 );
+
+    return ( X->p[pos / biL] >> ( pos % biL ) ) & 0x01;
+}
+
+/*
+ * Set a bit to a specific value of 0 or 1
+ */
+int mpi_set_bit( mpi *X, size_t pos, unsigned char val )
+{
+    int ret = 0;
+    size_t off = pos / biL;
+    size_t idx = pos % biL;
+
+    if ( val != 0 && val != 1 )
+        return POLARSSL_ERR_MPI_BAD_INPUT_DATA;
+
+    if ( X->n * biL <= pos )
+    {
+        if ( val == 0 )
+            return ( 0 );
+
+        MPI_CHK( mpi_grow( X, off + 1 ) );
+    }
+
+    X->p[off] = ( X->p[off] & ~( 0x01 << idx ) ) | ( val << idx );
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Return the number of least significant bits
+ */
+size_t mpi_lsb( const mpi *X )
+{
+    size_t i, j, count = 0;
+
+    for ( i = 0; i < X->n; i++ )
+        for ( j = 0; j < biL; j++, count++ )
+            if ( ( ( X->p[i] >> j ) & 1 ) != 0 )
+                return( count );
+
+    return( 0 );
+}
+
+/*
+ * Return the number of most significant bits
+ */
+size_t mpi_msb( const mpi *X )
+{
+    size_t i, j;
+
+    for ( i = X->n - 1; i > 0; i-- )
+        if ( X->p[i] != 0 )
+            break;
+
+    for ( j = biL; j > 0; j-- )
+        if ( ( ( X->p[i] >> ( j - 1 ) ) & 1 ) != 0 )
+            break;
+
+    return( ( i * biL ) + j );
+}
+
+/*
+ * Return the total size in bytes
+ */
+size_t mpi_size( const mpi *X )
+{
+    return( ( mpi_msb( X ) + 7 ) >> 3 );
+}
+
+
+/*
+ * Import X from unsigned binary data, big endian
+ */
+int mpi_read_binary( mpi *X, const unsigned char *buf, size_t buflen )
+{
+    int ret;
+    size_t i, j, n;
+
+    for ( n = 0; n < buflen; n++ )
+        if ( buf[n] != 0 )
+            break;
+    MPI_CHK( mpi_grow( X, CHARS_TO_LIMBS( buflen - n ) ) );
+
+    MPI_CHK( mpi_lset( X, 0 ) );
+
+
+    for ( i = buflen, j = 0; i > n; i--, j++ )
+        X->p[j / ciL] |= ((t_uint) buf[i - 1]) << ((j % ciL) << 3);
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Export X into unsigned binary data, big endian
+ */
+int mpi_write_binary( const mpi *X, unsigned char *buf, size_t buflen )
+{
+    size_t i, j, n;
+
+    n = mpi_size( X );
+
+    if ( buflen < n )
+        return( POLARSSL_ERR_MPI_BUFFER_TOO_SMALL );
+
+    memset( buf, 0, buflen );
+
+    for ( i = buflen - 1, j = 0; n > 0; i--, j++, n-- )
+        buf[i] = (unsigned char)( X->p[j / ciL] >> ((j % ciL) << 3) );
+
+    return( 0 );
+}
+
+/*
+ * Left-shift: X <<= count
+ */
+int mpi_shift_l( mpi *X, size_t count )
+{
+    int ret;
+    size_t i, v0, t1;
+    t_uint r0 = 0, r1;
+
+    v0 = count / (biL    );
+    t1 = count & (biL - 1);
+
+    i = mpi_msb( X ) + count;
+
+    if ( X->n * biL < i )
+        MPI_CHK( mpi_grow( X, BITS_TO_LIMBS( i ) ) );
+
+    ret = 0;
+
+    /*
+     * shift by count / limb_size
+     */
+    if ( v0 > 0 )
+    {
+        for ( i = X->n; i > v0; i-- )
+            X->p[i - 1] = X->p[i - v0 - 1];
+
+        for ( ; i > 0; i-- )
+            X->p[i - 1] = 0;
+    }
+
+    /*
+     * shift by count % limb_size
+     */
+    if ( t1 > 0 )
+    {
+        for ( i = v0; i < X->n; i++ )
+        {
+            r1 = X->p[i] >> (biL - t1);
+            X->p[i] <<= t1;
+            X->p[i] |= r0;
+            r0 = r1;
+        }
+    }
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Right-shift: X >>= count
+ */
+int mpi_shift_r( mpi *X, size_t count )
+{
+    size_t i, v0, v1;
+    t_uint r0 = 0, r1;
+
+    v0 = count /  biL;
+    v1 = count & (biL - 1);
+
+    if ( v0 > X->n || ( v0 == X->n && v1 > 0 ) )
+        return mpi_lset( X, 0 );
+
+    /*
+     * shift by count / limb_size
+     */
+    if ( v0 > 0 )
+    {
+        for ( i = 0; i < X->n - v0; i++ )
+            X->p[i] = X->p[i + v0];
+
+        for ( ; i < X->n; i++ )
+            X->p[i] = 0;
+    }
+
+    /*
+     * shift by count % limb_size
+     */
+    if ( v1 > 0 )
+    {
+        for ( i = X->n; i > 0; i-- )
+        {
+            r1 = X->p[i - 1] << (biL - v1);
+            X->p[i - 1] >>= v1;
+            X->p[i - 1] |= r0;
+            r0 = r1;
+        }
+    }
+
+    return( 0 );
+}
+
+/*
+ * Compare unsigned values
+ */
+int mpi_cmp_abs( const mpi *X, const mpi *Y )
+{
+    size_t i, j;
+
+    for ( i = X->n; i > 0; i-- )
+        if ( X->p[i - 1] != 0 )
+            break;
+
+    for ( j = Y->n; j > 0; j-- )
+        if ( Y->p[j - 1] != 0 )
+            break;
+
+    if ( i == 0 && j == 0 )
+        return( 0 );
+
+    if ( i > j ) return(  1 ) ;
+    if ( j > i ) return( -1 ) ;
+
+    for ( ; i > 0; i-- )
+    {
+        if ( X->p[i - 1] > Y->p[i - 1] ) return(  1 ) ;
+        if ( X->p[i - 1] < Y->p[i - 1] ) return( -1 ) ;
+    }
+
+    return( 0 );
+}
+
+/*
+ * Compare signed values
+ */
+int mpi_cmp_mpi( const mpi *X, const mpi *Y )
+{
+    size_t i, j;
+
+    for ( i = X->n; i > 0; i-- )
+        if ( X->p[i - 1] != 0 )
+            break;
+
+    for ( j = Y->n; j > 0; j-- )
+        if ( Y->p[j - 1] != 0 )
+            break;
+
+    if ( i == 0 && j == 0 )
+        return( 0 );
+
+    if ( i > j ) return(  X->s ) ;
+    if ( j > i ) return( -Y->s ) ;
+
+    if ( X->s > 0 && Y->s < 0 ) return(  1 ) ;
+    if ( Y->s > 0 && X->s < 0 ) return( -1 ) ;
+
+    for ( ; i > 0; i-- )
+    {
+        if ( X->p[i - 1] > Y->p[i - 1] ) return(  X->s ) ;
+        if ( X->p[i - 1] < Y->p[i - 1] ) return( -X->s ) ;
+    }
+
+    return( 0 );
+}
+
+/*
+ * Compare signed values
+ */
+int mpi_cmp_int( const mpi *X, t_sint z )
+{
+    mpi Y;
+    t_uint p[1];
+
+    *p  = ( z < 0 ) ? -z : z;
+    Y.s = ( z < 0 ) ? -1 : 1;
+    Y.n = 1;
+    Y.p = p;
+
+    return( mpi_cmp_mpi( X, &Y ) );
+}
+
+/*
+ * Unsigned addition: X = |A| + |B|  (HAC 14.7)
+ */
+int mpi_add_abs( mpi *X, const mpi *A, const mpi *B )
+{
+    int ret;
+    size_t i, j;
+    t_uint *o, *p, c;
+
+    if ( X == B )
+    {
+        const mpi *T = A; A = X; B = T;
+    }
+
+    if ( X != A )
+        MPI_CHK( mpi_copy( X, A ) );
+
+    /*
+     * X should always be positive as a result of unsigned additions.
+     */
+    X->s = 1;
+
+    for ( j = B->n; j > 0; j-- )
+        if ( B->p[j - 1] != 0 )
+            break;
+
+    MPI_CHK( mpi_grow( X, j ) );
+
+    o = B->p; p = X->p; c = 0;
+
+    for ( i = 0; i < j; i++, o++, p++ )
+    {
+        *p +=  c; c  = ( *p <  c );
+        *p += *o; c += ( *p < *o );
+    }
+
+    while ( c != 0 )
+    {
+        if ( i >= X->n )
+        {
+            MPI_CHK( mpi_grow( X, i + 1 ) );
+            p = X->p + i;
+        }
+
+        *p += c; c = ( *p < c ); i++; p++;
+    }
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Helper for mpi substraction
+ */
+static void mpi_sub_hlp( size_t n, t_uint *s, t_uint *d )
+{
+    size_t i;
+    t_uint c, z;
+
+    for ( i = c = 0; i < n; i++, s++, d++ )
+    {
+        z = ( *d <  c );     *d -=  c;
+        c = ( *d < *s ) + z; *d -= *s;
+    }
+
+    while ( c != 0 )
+    {
+        z = ( *d < c ); *d -= c;
+        c = z; i++; d++;
+    }
+}
+
+/*
+ * Unsigned substraction: X = |A| - |B|  (HAC 14.9)
+ */
+int mpi_sub_abs( mpi *X, const mpi *A, const mpi *B )
+{
+    mpi TB;
+    int ret;
+    size_t n;
+
+    if ( mpi_cmp_abs( A, B ) < 0 )
+        return( POLARSSL_ERR_MPI_NEGATIVE_VALUE );
+
+    mpi_init( &TB );
+
+    if ( X == B )
+    {
+        MPI_CHK( mpi_copy( &TB, B ) );
+        B = &TB;
+    }
+
+    if ( X != A )
+        MPI_CHK( mpi_copy( X, A ) );
+
+    /*
+     * X should always be positive as a result of unsigned substractions.
+     */
+    X->s = 1;
+
+    ret = 0;
+
+    for ( n = B->n; n > 0; n-- )
+        if ( B->p[n - 1] != 0 )
+            break;
+
+    mpi_sub_hlp( n, B->p, X->p );
+
+cleanup:
+
+    mpi_free( &TB );
+
+    return( ret );
+}
+
+/*
+ * Signed addition: X = A + B
+ */
+int mpi_add_mpi( mpi *X, const mpi *A, const mpi *B )
+{
+    int ret, s = A->s;
+
+    if ( A->s * B->s < 0 )
+    {
+        if ( mpi_cmp_abs( A, B ) >= 0 )
+        {
+            MPI_CHK( mpi_sub_abs( X, A, B ) );
+            X->s =  s;
+        }
+        else
+        {
+            MPI_CHK( mpi_sub_abs( X, B, A ) );
+            X->s = -s;
+        }
+    }
+    else
+    {
+        MPI_CHK( mpi_add_abs( X, A, B ) );
+        X->s = s;
+    }
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Signed substraction: X = A - B
+ */
+int mpi_sub_mpi( mpi *X, const mpi *A, const mpi *B )
+{
+    int ret, s = A->s;
+
+    if ( A->s * B->s > 0 )
+    {
+        if ( mpi_cmp_abs( A, B ) >= 0 )
+        {
+            MPI_CHK( mpi_sub_abs( X, A, B ) );
+            X->s =  s;
+        }
+        else
+        {
+            MPI_CHK( mpi_sub_abs( X, B, A ) );
+            X->s = -s;
+        }
+    }
+    else
+    {
+        MPI_CHK( mpi_add_abs( X, A, B ) );
+        X->s = s;
+    }
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Signed addition: X = A + b
+ */
+int mpi_add_int( mpi *X, const mpi *A, t_sint b )
+{
+    mpi _B;
+    t_uint p[1];
+
+    p[0] = ( b < 0 ) ? -b : b;
+    _B.s = ( b < 0 ) ? -1 : 1;
+    _B.n = 1;
+    _B.p = p;
+
+    return( mpi_add_mpi( X, A, &_B ) );
+}
+
+/*
+ * Signed substraction: X = A - b
+ */
+int mpi_sub_int( mpi *X, const mpi *A, t_sint b )
+{
+    mpi _B;
+    t_uint p[1];
+
+    p[0] = ( b < 0 ) ? -b : b;
+    _B.s = ( b < 0 ) ? -1 : 1;
+    _B.n = 1;
+    _B.p = p;
+
+    return( mpi_sub_mpi( X, A, &_B ) );
+}
+
+/*
+ * Helper for mpi multiplication
+ */
+static void mpi_mul_hlp( size_t i, t_uint *s, t_uint *d, t_uint b )
+{
+    t_uint c = 0, t = 0;
+
+#if defined(MULADDC_HUIT)
+    for ( ; i >= 8; i -= 8 )
+    {
+        MULADDC_INIT
+        MULADDC_HUIT
+        MULADDC_STOP
+    }
+
+    for ( ; i > 0; i-- )
+    {
+        MULADDC_INIT
+        MULADDC_CORE
+        MULADDC_STOP
+    }
+#else
+    for ( ; i >= 16; i -= 16 )
+    {
+        MULADDC_INIT
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_STOP
+    }
+
+    for ( ; i >= 8; i -= 8 )
+    {
+        MULADDC_INIT
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_STOP
+    }
+
+    for ( ; i > 0; i-- )
+    {
+        MULADDC_INIT
+        MULADDC_CORE
+        MULADDC_STOP
+    }
+#endif
+
+    t++;
+
+    do {
+        *d += c; c = ( *d < c ); d++;
+    }
+    while ( c != 0 ) ;
+}
+
+/*
+ * Baseline multiplication: X = A * B  (HAC 14.12)
+ */
+int mpi_mul_mpi( mpi *X, const mpi *A, const mpi *B )
+{
+    int ret;
+    size_t i, j;
+    mpi TA, TB;
+
+    mpi_init( &TA ); mpi_init( &TB );
+
+    if ( X == A ) { MPI_CHK( mpi_copy( &TA, A ) ) ; A = &TA; }
+    if ( X == B ) { MPI_CHK( mpi_copy( &TB, B ) ) ; B = &TB; }
+
+    for ( i = A->n; i > 0; i-- )
+        if ( A->p[i - 1] != 0 )
+            break;
+
+    for ( j = B->n; j > 0; j-- )
+        if ( B->p[j - 1] != 0 )
+            break;
+
+    MPI_CHK( mpi_grow( X, i + j ) );
+    MPI_CHK( mpi_lset( X, 0 ) );
+
+    for ( i++; j > 0; j-- )
+        mpi_mul_hlp( i - 1, A->p, X->p + j - 1, B->p[j - 1] );
+
+    X->s = A->s * B->s;
+
+cleanup:
+
+    mpi_free( &TB ); mpi_free( &TA );
+
+    return( ret );
+}
+
+/*
+ * Baseline multiplication: X = A * b
+ */
+int mpi_mul_int( mpi *X, const mpi *A, t_sint b )
+{
+    mpi _B;
+    t_uint p[1];
+
+    _B.s = 1;
+    _B.n = 1;
+    _B.p = p;
+    p[0] = b;
+
+    return( mpi_mul_mpi( X, A, &_B ) );
+}
+
+/*
+ * Division by mpi: A = Q * B + R  (HAC 14.20)
+ */
+int mpi_div_mpi( mpi *Q, mpi *R, const mpi *A, const mpi *B )
+{
+    int ret;
+    size_t i, n, t, k;
+    mpi X, Y, Z, T1, T2;
+
+    if ( mpi_cmp_int( B, 0 ) == 0 )
+        return( POLARSSL_ERR_MPI_DIVISION_BY_ZERO );
+
+    mpi_init( &X ); mpi_init( &Y ); mpi_init( &Z );
+    mpi_init( &T1 ); mpi_init( &T2 );
+
+    if ( mpi_cmp_abs( A, B ) < 0 )
+    {
+        if ( Q != NULL ) MPI_CHK( mpi_lset( Q, 0 ) ) ;
+        if ( R != NULL ) MPI_CHK( mpi_copy( R, A ) ) ;
+        return( 0 );
+    }
+
+    MPI_CHK( mpi_copy( &X, A ) );
+    MPI_CHK( mpi_copy( &Y, B ) );
+    X.s = Y.s = 1;
+
+    MPI_CHK( mpi_grow( &Z, A->n + 2 ) );
+    MPI_CHK( mpi_lset( &Z,  0 ) );
+    MPI_CHK( mpi_grow( &T1, 2 ) );
+    MPI_CHK( mpi_grow( &T2, 3 ) );
+
+    k = mpi_msb( &Y ) % biL;
+    if ( k < biL - 1 )
+    {
+        k = biL - 1 - k;
+        MPI_CHK( mpi_shift_l( &X, k ) );
+        MPI_CHK( mpi_shift_l( &Y, k ) );
+    }
+    else k = 0;
+
+    n = X.n - 1;
+    t = Y.n - 1;
+    MPI_CHK( mpi_shift_l( &Y, biL * (n - t) ) );
+
+    while ( mpi_cmp_mpi( &X, &Y ) >= 0 )
+    {
+        Z.p[n - t]++;
+        mpi_sub_mpi( &X, &X, &Y );
+    }
+    mpi_shift_r( &Y, biL * (n - t) );
+
+    for ( i = n; i > t ; i-- )
+    {
+        if ( X.p[i] >= Y.p[t] )
+            Z.p[i - t - 1] = ~0;
+        else
+        {
+#if defined(POLARSSL_HAVE_UDBL)
+            t_udbl r;
+
+            r  = (t_udbl) X.p[i] << biL;
+            r |= (t_udbl) X.p[i - 1];
+            r /= Y.p[t];
+            if ( r > ((t_udbl) 1 << biL) - 1)
+                r = ((t_udbl) 1 << biL) - 1;
+
+            Z.p[i - t - 1] = (t_uint) r;
+#else
+            /*
+             * __udiv_qrnnd_c, from gmp/longlong.h
+             */
+            t_uint q0, q1, r0, r1;
+            t_uint d0, d1, d, m;
+
+            d  = Y.p[t];
+            d0 = ( d << biH ) >> biH;
+            d1 = ( d >> biH );
+
+            q1 = X.p[i] / d1;
+            r1 = X.p[i] - d1 * q1;
+            r1 <<= biH;
+            r1 |= ( X.p[i - 1] >> biH );
+
+            m = q1 * d0;
+            if ( r1 < m )
+            {
+                q1--, r1 += d;
+                while ( r1 >= d && r1 < m )
+                    q1--, r1 += d;
+            }
+            r1 -= m;
+
+            q0 = r1 / d1;
+            r0 = r1 - d1 * q0;
+            r0 <<= biH;
+            r0 |= ( X.p[i - 1] << biH ) >> biH;
+
+            m = q0 * d0;
+            if ( r0 < m )
+            {
+                q0--, r0 += d;
+                while ( r0 >= d && r0 < m )
+                    q0--, r0 += d;
+            }
+            r0 -= m;
+
+            Z.p[i - t - 1] = ( q1 << biH ) | q0;
+#endif
+        }
+
+        Z.p[i - t - 1]++;
+        do
+        {
+            Z.p[i - t - 1]--;
+
+            MPI_CHK( mpi_lset( &T1, 0 ) );
+            T1.p[0] = (t < 1) ? 0 : Y.p[t - 1];
+            T1.p[1] = Y.p[t];
+            MPI_CHK( mpi_mul_int( &T1, &T1, Z.p[i - t - 1] ) );
+
+            MPI_CHK( mpi_lset( &T2, 0 ) );
+            T2.p[0] = (i < 2) ? 0 : X.p[i - 2];
+            T2.p[1] = (i < 1) ? 0 : X.p[i - 1];
+            T2.p[2] = X.p[i];
+        }
+        while ( mpi_cmp_mpi( &T1, &T2 ) > 0 ) ;
+
+        MPI_CHK( mpi_mul_int( &T1, &Y, Z.p[i - t - 1] ) );
+        MPI_CHK( mpi_shift_l( &T1,  biL * (i - t - 1) ) );
+        MPI_CHK( mpi_sub_mpi( &X, &X, &T1 ) );
+
+        if ( mpi_cmp_int( &X, 0 ) < 0 )
+        {
+            MPI_CHK( mpi_copy( &T1, &Y ) );
+            MPI_CHK( mpi_shift_l( &T1, biL * (i - t - 1) ) );
+            MPI_CHK( mpi_add_mpi( &X, &X, &T1 ) );
+            Z.p[i - t - 1]--;
+        }
+    }
+
+    if ( Q != NULL )
+    {
+        mpi_copy( Q, &Z );
+        Q->s = A->s * B->s;
+    }
+
+    if ( R != NULL )
+    {
+        mpi_shift_r( &X, k );
+        X.s = A->s;
+        mpi_copy( R, &X );
+
+        if ( mpi_cmp_int( R, 0 ) == 0 )
+            R->s = 1;
+    }
+
+cleanup:
+
+    mpi_free( &X ); mpi_free( &Y ); mpi_free( &Z );
+    mpi_free( &T1 ); mpi_free( &T2 );
+
+    return( ret );
+}
+/*
+ * Modulo: R = A mod B
+ */
+int mpi_mod_mpi( mpi *R, const mpi *A, const mpi *B )
+{
+    int ret;
+
+    if ( mpi_cmp_int( B, 0 ) < 0 )
+        return POLARSSL_ERR_MPI_NEGATIVE_VALUE;
+
+    MPI_CHK( mpi_div_mpi( NULL, R, A, B ) );
+
+    while ( mpi_cmp_int( R, 0 ) < 0 )
+      MPI_CHK( mpi_add_mpi( R, R, B ) );
+
+    while ( mpi_cmp_mpi( R, B ) >= 0 )
+      MPI_CHK( mpi_sub_mpi( R, R, B ) );
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Modulo: r = A mod b
+ */
+int mpi_mod_int( t_uint *r, const mpi *A, t_sint b )
+{
+    size_t i;
+    t_uint x, y, z;
+
+    if ( b == 0 )
+        return( POLARSSL_ERR_MPI_DIVISION_BY_ZERO );
+
+    if ( b < 0 )
+        return POLARSSL_ERR_MPI_NEGATIVE_VALUE;
+
+    /*
+     * handle trivial cases
+     */
+    if ( b == 1 )
+    {
+        *r = 0;
+        return( 0 );
+    }
+
+    if ( b == 2 )
+    {
+        *r = A->p[0] & 1;
+        return( 0 );
+    }
+
+    /*
+     * general case
+     */
+    for ( i = A->n, y = 0; i > 0; i-- )
+    {
+        x  = A->p[i - 1];
+        y  = ( y << biH ) | ( x >> biH );
+        z  = y / b;
+        y -= z * b;
+
+        x <<= biH;
+        y  = ( y << biH ) | ( x >> biH );
+        z  = y / b;
+        y -= z * b;
+    }
+
+    /*
+     * If A is negative, then the current y represents a negative value.
+     * Flipping it to the positive side.
+     */
+    if ( A->s < 0 && y != 0 )
+        y = b - y;
+
+    *r = y;
+
+    return( 0 );
+}
+
+/*
+ * Fast Montgomery initialization (thanks to Tom St Denis)
+ */
+static void mpi_montg_init( t_uint *mm, const mpi *N )
+{
+    t_uint x, m0 = N->p[0];
+
+    x  = m0;
+    x += ( ( m0 + 2 ) & 4 ) << 1;
+    x *= ( 2 - ( m0 * x ) );
+
+    if ( biL >= 16 ) x *= ( 2 - ( m0 * x ) ) ;
+    if ( biL >= 32 ) x *= ( 2 - ( m0 * x ) ) ;
+    if ( biL >= 64 ) x *= ( 2 - ( m0 * x ) ) ;
+
+    *mm = ~x + 1;
+}
+
+/*
+ * Montgomery multiplication: A = A * B * R^-1 mod N  (HAC 14.36)
+ */
+static void mpi_montmul( mpi *A, const mpi *B, const mpi *N, t_uint mm, const mpi *T )
+{
+    size_t i, n, m;
+    t_uint u0, u1, *d;
+
+    memset( T->p, 0, T->n * ciL );
+
+    d = T->p;
+    n = N->n;
+    m = ( B->n < n ) ? B->n : n;
+
+    for ( i = 0; i < n; i++ )
+    {
+        /*
+         * T = (T + u0*B + u1*N) / 2^biL
+         */
+        u0 = A->p[i];
+        u1 = ( d[0] + u0 * B->p[0] ) * mm;
+
+        mpi_mul_hlp( m, B->p, d, u0 );
+        mpi_mul_hlp( n, N->p, d, u1 );
+
+        *d++ = u0; d[n + 1] = 0;
+    }
+
+    memcpy( A->p, d, (n + 1) * ciL );
+
+    if ( mpi_cmp_abs( A, N ) >= 0 )
+        mpi_sub_hlp( n, N->p, A->p );
+    else
+        /* prevent timing attacks */
+        mpi_sub_hlp( n, A->p, T->p );
+}
+
+/*
+ * Montgomery reduction: A = A * R^-1 mod N
+ */
+static void mpi_montred( mpi *A, const mpi *N, t_uint mm, const mpi *T )
+{
+    t_uint z = 1;
+    mpi U;
+
+    U.n = U.s = (int) z;
+    U.p = &z;
+
+    mpi_montmul( A, &U, N, mm, T );
+}
+
+/*
+ * Sliding-window exponentiation: X = A^E mod N  (HAC 14.85)
+ */
+int mpi_exp_mod( mpi *X, const mpi *A, const mpi *E, const mpi *N, mpi *_RR )
+{
+    int ret;
+    size_t wbits, wsize, one = 1;
+    size_t i, j, nblimbs;
+    size_t bufsize, nbits;
+    t_uint ei, mm, state;
+    mpi RR, T, W[ 2 << POLARSSL_MPI_WINDOW_SIZE ], Apos;
+    int neg;
+
+    if ( mpi_cmp_int( N, 0 ) < 0 || ( N->p[0] & 1 ) == 0 )
+        return( POLARSSL_ERR_MPI_BAD_INPUT_DATA );
+
+    if ( mpi_cmp_int( E, 0 ) < 0 )
+        return( POLARSSL_ERR_MPI_BAD_INPUT_DATA );
+
+    /*
+     * Init temps and window size
+     */
+    mpi_montg_init( &mm, N );
+    mpi_init( &RR ); mpi_init( &T );
+    memset( W, 0, sizeof( W ) );
+
+    i = mpi_msb( E );
+
+    wsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
+            ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
+
+    if ( wsize > POLARSSL_MPI_WINDOW_SIZE )
+        wsize = POLARSSL_MPI_WINDOW_SIZE;
+
+    j = N->n + 1;
+    MPI_CHK( mpi_grow( X, j ) );
+    MPI_CHK( mpi_grow( &W[1],  j ) );
+    MPI_CHK( mpi_grow( &T, j * 2 ) );
+
+    /*
+     * Compensate for negative A (and correct at the end)
+     */
+    neg = ( A->s == -1 );
+
+    mpi_init( &Apos );
+    if ( neg )
+    {
+        MPI_CHK( mpi_copy( &Apos, A ) );
+        Apos.s = 1;
+        A = &Apos;
+    }
+
+    /*
+     * If 1st call, pre-compute R^2 mod N
+     */
+    if ( _RR == NULL || _RR->p == NULL )
+    {
+        MPI_CHK( mpi_lset( &RR, 1 ) );
+        MPI_CHK( mpi_shift_l( &RR, N->n * 2 * biL ) );
+        MPI_CHK( mpi_mod_mpi( &RR, &RR, N ) );
+
+        if ( _RR != NULL )
+            memcpy( _RR, &RR, sizeof( mpi ) );
+    }
+    else
+        memcpy( &RR, _RR, sizeof( mpi ) );
+
+    /*
+     * W[1] = A * R^2 * R^-1 mod N = A * R mod N
+     */
+    if ( mpi_cmp_mpi( A, N ) >= 0 )
+        mpi_mod_mpi( &W[1], A, N );
+    else   mpi_copy( &W[1], A );
+
+    mpi_montmul( &W[1], &RR, N, mm, &T );
+
+    /*
+     * X = R^2 * R^-1 mod N = R mod N
+     */
+    MPI_CHK( mpi_copy( X, &RR ) );
+    mpi_montred( X, N, mm, &T );
+
+    if ( wsize > 1 )
+    {
+        /*
+         * W[1 << (wsize - 1)] = W[1] ^ (wsize - 1)
+         */
+        j =  one << (wsize - 1);
+
+        MPI_CHK( mpi_grow( &W[j], N->n + 1 ) );
+        MPI_CHK( mpi_copy( &W[j], &W[1]    ) );
+
+        for ( i = 0; i < wsize - 1; i++ )
+            mpi_montmul( &W[j], &W[j], N, mm, &T );
+
+        /*
+         * W[i] = W[i - 1] * W[1]
+         */
+        for ( i = j + 1; i < (one << wsize); i++ )
+        {
+            MPI_CHK( mpi_grow( &W[i], N->n + 1 ) );
+            MPI_CHK( mpi_copy( &W[i], &W[i - 1] ) );
+
+            mpi_montmul( &W[i], &W[1], N, mm, &T );
+        }
+    }
+
+    nblimbs = E->n;
+    bufsize = 0;
+    nbits   = 0;
+    wbits   = 0;
+    state   = 0;
+
+    while ( 1 )
+    {
+        if ( bufsize == 0 )
+        {
+            if ( nblimbs-- == 0 )
+                break;
+
+            bufsize = sizeof( t_uint ) << 3;
+        }
+
+        bufsize--;
+
+        ei = (E->p[nblimbs] >> bufsize) & 1;
+
+        /*
+         * skip leading 0s
+         */
+        if ( ei == 0 && state == 0 )
+            continue;
+
+        if ( ei == 0 && state == 1 )
+        {
+            /*
+             * out of window, square X
+             */
+            mpi_montmul( X, X, N, mm, &T );
+            continue;
+        }
+
+        /*
+         * add ei to current window
+         */
+        state = 2;
+
+        nbits++;
+        wbits |= (ei << (wsize - nbits));
+
+        if ( nbits == wsize )
+        {
+            /*
+             * X = X^wsize R^-1 mod N
+             */
+            for ( i = 0; i < wsize; i++ )
+                mpi_montmul( X, X, N, mm, &T );
+
+            /*
+             * X = X * W[wbits] R^-1 mod N
+             */
+            mpi_montmul( X, &W[wbits], N, mm, &T );
+
+            state--;
+            nbits = 0;
+            wbits = 0;
+        }
+    }
+
+    /*
+     * process the remaining bits
+     */
+    for ( i = 0; i < nbits; i++ )
+    {
+        mpi_montmul( X, X, N, mm, &T );
+
+        wbits <<= 1;
+
+        if ( (wbits & (one << wsize)) != 0 )
+            mpi_montmul( X, &W[1], N, mm, &T );
+    }
+
+    /*
+     * X = A^E * R * R^-1 mod N = A^E mod N
+     */
+    mpi_montred( X, N, mm, &T );
+
+    if ( neg )
+    {
+        X->s = -1;
+        mpi_add_mpi( X, N, X );
+    }
+
+cleanup:
+
+    for ( i = (one << (wsize - 1)); i < (one << wsize); i++ )
+        mpi_free( &W[i] );
+
+    mpi_free( &W[1] ); mpi_free( &T ); mpi_free( &Apos );
+
+    if ( _RR == NULL )
+        mpi_free( &RR );
+
+    return( ret );
+}
+
+#endif