View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math.linear;
19  
20  import java.io.Serializable;
21  
22  import org.apache.commons.math.Field;
23  import org.apache.commons.math.FieldElement;
24  import org.apache.commons.math.MathRuntimeException;
25  
26  /**
27   * Cache-friendly implementation of FieldMatrix using a flat arrays to store
28   * square blocks of the matrix.
29   * <p>
30   * This implementation is specially designed to be cache-friendly. Square blocks are
31   * stored as small arrays and allow efficient traversal of data both in row major direction
32   * and columns major direction, one block at a time. This greatly increases performances
33   * for algorithms that use crossed directions loops like multiplication or transposition.
34   * </p>
35   * <p>
36   * The size of square blocks is a static parameter. It may be tuned according to the cache
37   * size of the target computer processor. As a rule of thumbs, it should be the largest
38   * value that allows three blocks to be simultaneously cached (this is necessary for example
39   * for matrix multiplication). The default value is to use 36x36 blocks.
40   * </p>
41   * <p>
42   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
43   * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
44   * blocks are flattened in row major order in single dimension arrays which are therefore
45   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
46   * organized in row major order.
47   * </p>
48   * <p>
49   * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
50   * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
51   * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
52   * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
53   * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
54   * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
55   * rectangle.
56   * </p>
57   * <p>
58   * The layout complexity overhead versus simple mapping of matrices to java
59   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
60   * to up to 3-fold improvements for matrices of moderate to large size.
61   * </p>
62   * @param <T> the type of the field elements
63   * @version $Revision: 783741 $ $Date: 2009-06-11 08:35:42 -0400 (Thu, 11 Jun 2009) $
64   * @since 2.0
65   */
66  public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
67      
68      /** Serializable version identifier */
69      private static final long serialVersionUID = -4602336630143123183L;
70  
71      /** Block size. */
72      public static final int BLOCK_SIZE = 36;
73  
74      /** Blocks of matrix entries. */
75      private final T blocks[][];
76  
77      /** Number of rows of the matrix. */
78      private final int rows;
79  
80      /** Number of columns of the matrix. */
81      private final int columns;
82  
83      /** Number of block rows of the matrix. */
84      private final int blockRows;
85  
86      /** Number of block columns of the matrix. */
87      private final int blockColumns;
88  
89      /**
90       * Create a new matrix with the supplied row and column dimensions.
91       *
92       * @param field field to which the elements belong
93       * @param rows  the number of rows in the new matrix
94       * @param columns  the number of columns in the new matrix
95       * @throws IllegalArgumentException if row or column dimension is not
96       *  positive
97       */
98      public BlockFieldMatrix(final Field<T> field, final int rows, final int columns)
99          throws IllegalArgumentException {
100 
101         super(field, rows, columns);
102         this.rows    = rows;
103         this.columns = columns;
104 
105         // number of blocks
106         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
107         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
108 
109         // allocate storage blocks, taking care of smaller ones at right and bottom
110         blocks = createBlocksLayout(field, rows, columns);
111 
112     }
113 
114     /**
115      * Create a new dense matrix copying entries from raw layout data.
116      * <p>The input array <em>must</em> already be in raw layout.</p>
117      * <p>Calling this constructor is equivalent to call:
118      * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
119      *                                   toBlocksLayout(rawData), false);</pre>
120      * </p>
121      * @param rawData data for new matrix, in raw layout
122      *
123      * @exception IllegalArgumentException if <code>blockData</code> shape is
124      * inconsistent with block layout
125      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
126      */
127     public BlockFieldMatrix(final T[][] rawData)
128         throws IllegalArgumentException {
129         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
130     }
131 
132     /**
133      * Create a new dense matrix copying entries from block layout data.
134      * <p>The input array <em>must</em> already be in blocks layout.</p>
135      * @param rows  the number of rows in the new matrix
136      * @param columns  the number of columns in the new matrix
137      * @param blockData data for new matrix
138      * @param copyArray if true, the input array will be copied, otherwise
139      * it will be referenced
140      *
141      * @exception IllegalArgumentException if <code>blockData</code> shape is
142      * inconsistent with block layout
143      * @see #createBlocksLayout(Field, int, int)
144      * @see #toBlocksLayout(FieldElement[][])
145      * @see #BlockFieldMatrix(FieldElement[][])
146      */
147     public BlockFieldMatrix(final int rows, final int columns,
148                             final T[][] blockData, final boolean copyArray)
149         throws IllegalArgumentException {
150 
151         super(extractField(blockData), rows, columns);
152         this.rows    = rows;
153         this.columns = columns;
154 
155         // number of blocks
156         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
157         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
158 
159         if (copyArray) {
160             // allocate storage blocks, taking care of smaller ones at right and bottom
161             blocks = buildArray(getField(), blockRows * blockColumns, -1);
162         } else {
163             // reference existing array
164             blocks = blockData;
165         }
166 
167         int index = 0;
168         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
169             final int iHeight = blockHeight(iBlock);
170             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
171                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
172                     throw MathRuntimeException.createIllegalArgumentException(
173                             "wrong array shape (block length = {0}, expected {1})",
174                             blockData[index].length, iHeight * blockWidth(jBlock));
175                 }
176                 if (copyArray) {
177                     blocks[index] = blockData[index].clone();
178                 }
179             }
180         }
181 
182     }
183 
184     /**
185      * Convert a data array from raw layout to blocks layout.
186      * <p>
187      * Raw layout is the straightforward layout where element at row i and
188      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
189      * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
190      * is split in square blocks (except at right and bottom side where blocks may
191      * be rectangular to fit matrix size) and each block is stored in a flattened
192      * one-dimensional array.
193      * </p>
194      * <p>
195      * This method creates an array in blocks layout from an input array in raw layout.
196      * It can be used to provide the array argument of the {@link
197      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
198      * constructor.
199      * </p>
200      * @param <T> the type of the field elements
201      * @param rawData data array in raw layout
202      * @return a new data array containing the same entries but in blocks layout
203      * @exception IllegalArgumentException if <code>rawData</code> is not rectangular
204      *  (not all rows have the same length)
205      * @see #createBlocksLayout(Field, int, int)
206      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
207      */
208     public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
209         throws IllegalArgumentException {
210 
211         final int rows         = rawData.length;
212         final int columns      = rawData[0].length;
213         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
214         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
215 
216         // safety checks
217         for (int i = 0; i < rawData.length; ++i) {
218             final int length = rawData[i].length;
219             if (length != columns) {
220                 throw MathRuntimeException.createIllegalArgumentException(
221                         "some rows have length {0} while others have length {1}",
222                         columns, length); 
223             }
224         }
225 
226         // convert array
227         final Field<T> field = extractField(rawData);
228         final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
229         for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
230             final int pStart  = iBlock * BLOCK_SIZE;
231             final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
232             final int iHeight = pEnd - pStart;
233             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
234                 final int qStart = jBlock * BLOCK_SIZE;
235                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
236                 final int jWidth = qEnd - qStart;
237 
238                 // allocate new block
239                 final T[] block = buildArray(field, iHeight * jWidth);
240                 blocks[blockIndex] = block;
241 
242                 // copy data
243                 for (int p = pStart, index = 0; p < pEnd; ++p, index += jWidth) {
244                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
245                 }
246 
247             }
248         }
249 
250         return blocks;
251 
252     }
253 
254     /**
255      * Create a data array in blocks layout.
256      * <p>
257      * This method can be used to create the array argument of the {@link
258      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
259      * constructor.
260      * </p>
261      * @param <T> the type of the field elements
262      * @param field field to which the elements belong
263      * @param rows  the number of rows in the new matrix
264      * @param columns  the number of columns in the new matrix
265      * @return a new data array in blocks layout
266      * @see #toBlocksLayout(FieldElement[][])
267      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
268      */
269     public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
270                                                                        final int rows, final int columns) {
271 
272         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
273         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
274 
275         final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
276         for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
277             final int pStart  = iBlock * BLOCK_SIZE;
278             final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
279             final int iHeight = pEnd - pStart;
280             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
281                 final int qStart = jBlock * BLOCK_SIZE;
282                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
283                 final int jWidth = qEnd - qStart;
284                 blocks[blockIndex] = buildArray(field, iHeight * jWidth);
285             }
286         }
287 
288         return blocks;
289 
290     }
291 
292     /** {@inheritDoc} */
293     @Override
294     public FieldMatrix<T> createMatrix(final int rowDimension, final int columnDimension)
295         throws IllegalArgumentException {
296         return new BlockFieldMatrix<T>(getField(), rowDimension, columnDimension);
297     }
298 
299     /** {@inheritDoc} */
300     @Override
301     public FieldMatrix<T> copy() {
302 
303         // create an empty matrix
304         BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
305 
306         // copy the blocks
307         for (int i = 0; i < blocks.length; ++i) {
308             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
309         }
310 
311         return copied;
312 
313     }
314 
315     /** {@inheritDoc} */
316     @Override
317     public FieldMatrix<T> add(final FieldMatrix<T> m)
318         throws IllegalArgumentException {
319         try {
320             return add((BlockFieldMatrix<T>) m);
321         } catch (ClassCastException cce) {
322 
323             // safety check
324             checkAdditionCompatible(m);
325 
326             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
327 
328             // perform addition block-wise, to ensure good cache behavior
329             int blockIndex = 0;
330             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
331                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
332 
333                     // perform addition on the current block
334                     final T[] outBlock = out.blocks[blockIndex];
335                     final T[] tBlock   = blocks[blockIndex];
336                     final int      pStart   = iBlock * BLOCK_SIZE;
337                     final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
338                     final int      qStart   = jBlock * BLOCK_SIZE;
339                     final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
340                     for (int p = pStart, k = 0; p < pEnd; ++p) {
341                         for (int q = qStart; q < qEnd; ++q, ++k) {
342                             outBlock[k] = tBlock[k].add(m.getEntry(p, q));
343                         }
344                     }
345 
346                     // go to next block
347                     ++blockIndex;
348 
349                 }
350             }
351 
352             return out;
353 
354         }
355     }
356 
357     /**
358      * Compute the sum of this and <code>m</code>.
359      *
360      * @param m    matrix to be added
361      * @return     this + m
362      * @throws  IllegalArgumentException if m is not the same size as this
363      */
364     public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
365         throws IllegalArgumentException {
366 
367         // safety check
368         checkAdditionCompatible(m);
369 
370         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
371 
372         // perform addition block-wise, to ensure good cache behavior
373         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
374             final T[] outBlock = out.blocks[blockIndex];
375             final T[] tBlock   = blocks[blockIndex];
376             final T[] mBlock   = m.blocks[blockIndex];
377             for (int k = 0; k < outBlock.length; ++k) {
378                 outBlock[k] = tBlock[k].add(mBlock[k]);
379             }
380         }
381 
382         return out;
383 
384     }
385 
386     /** {@inheritDoc} */
387     @Override
388     public FieldMatrix<T> subtract(final FieldMatrix<T> m)
389         throws IllegalArgumentException {
390         try {
391             return subtract((BlockFieldMatrix<T>) m);
392         } catch (ClassCastException cce) {
393 
394             // safety check
395             checkSubtractionCompatible(m);
396 
397             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
398 
399             // perform subtraction block-wise, to ensure good cache behavior
400             int blockIndex = 0;
401             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
402                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
403 
404                     // perform subtraction on the current block
405                     final T[] outBlock = out.blocks[blockIndex];
406                     final T[] tBlock   = blocks[blockIndex];
407                     final int      pStart   = iBlock * BLOCK_SIZE;
408                     final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
409                     final int      qStart   = jBlock * BLOCK_SIZE;
410                     final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
411                     for (int p = pStart, k = 0; p < pEnd; ++p) {
412                         for (int q = qStart; q < qEnd; ++q, ++k) {
413                             outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
414                         }
415                     }
416 
417                     // go to next block
418                     ++blockIndex;
419 
420                 }
421             }
422 
423             return out;
424 
425         }
426     }
427 
428     /**
429      * Compute this minus <code>m</code>.
430      *
431      * @param m    matrix to be subtracted
432      * @return     this - m
433      * @throws  IllegalArgumentException if m is not the same size as this
434      */
435     public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m)
436         throws IllegalArgumentException {
437 
438         // safety check
439         checkSubtractionCompatible(m);
440 
441         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
442 
443         // perform subtraction block-wise, to ensure good cache behavior
444         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
445             final T[] outBlock = out.blocks[blockIndex];
446             final T[] tBlock   = blocks[blockIndex];
447             final T[] mBlock   = m.blocks[blockIndex];
448             for (int k = 0; k < outBlock.length; ++k) {
449                 outBlock[k] = tBlock[k].subtract(mBlock[k]);
450             }
451         }
452 
453         return out;
454 
455     }
456 
457     /** {@inheritDoc} */
458     @Override
459     public FieldMatrix<T> scalarAdd(final T d)
460         throws IllegalArgumentException {
461 
462         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
463 
464         // perform subtraction block-wise, to ensure good cache behavior
465         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
466             final T[] outBlock = out.blocks[blockIndex];
467             final T[] tBlock   = blocks[blockIndex];
468             for (int k = 0; k < outBlock.length; ++k) {
469                 outBlock[k] = tBlock[k].add(d);
470             }
471         }
472 
473         return out;
474 
475     }
476 
477     /** {@inheritDoc} */
478     @Override
479     public FieldMatrix<T> scalarMultiply(final T d)
480         throws IllegalArgumentException {
481 
482         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
483 
484         // perform subtraction block-wise, to ensure good cache behavior
485         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
486             final T[] outBlock = out.blocks[blockIndex];
487             final T[] tBlock   = blocks[blockIndex];
488             for (int k = 0; k < outBlock.length; ++k) {
489                 outBlock[k] = tBlock[k].multiply(d);
490             }
491         }
492 
493         return out;
494 
495     }
496 
497     /** {@inheritDoc} */
498     @Override
499     public FieldMatrix<T> multiply(final FieldMatrix<T> m)
500         throws IllegalArgumentException {
501         try {
502             return multiply((BlockFieldMatrix<T>) m);
503         } catch (ClassCastException cce) {
504 
505             // safety check
506             checkMultiplicationCompatible(m);
507 
508             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
509             final T zero = getField().getZero();
510 
511             // perform multiplication block-wise, to ensure good cache behavior
512             int blockIndex = 0;
513             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
514 
515                 final int pStart = iBlock * BLOCK_SIZE;
516                 final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
517 
518                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
519 
520                     final int qStart = jBlock * BLOCK_SIZE;
521                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, m.getColumnDimension());
522 
523                     // select current block
524                     final T[] outBlock = out.blocks[blockIndex];
525 
526                     // perform multiplication on current block
527                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
528                         final int kWidth      = blockWidth(kBlock);
529                         final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
530                         final int rStart      = kBlock * BLOCK_SIZE;
531                         for (int p = pStart, k = 0; p < pEnd; ++p) {
532                             final int lStart = (p - pStart) * kWidth;
533                             final int lEnd   = lStart + kWidth;
534                             for (int q = qStart; q < qEnd; ++q) {
535                                 T sum = zero;
536                                 for (int l = lStart, r = rStart; l < lEnd; ++l, ++r) {
537                                     sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
538                                 }
539                                 outBlock[k] = outBlock[k].add(sum);
540                                 ++k;
541                             }
542                         }
543                     }
544 
545                     // go to next block
546                     ++blockIndex;
547 
548                 }
549             }
550 
551             return out;
552 
553         }
554     }
555 
556     /**
557      * Returns the result of postmultiplying this by m.
558      *
559      * @param m    matrix to postmultiply by
560      * @return     this * m
561      * @throws     IllegalArgumentException
562      *             if columnDimension(this) != rowDimension(m)
563      */
564     public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m) throws IllegalArgumentException {
565 
566         // safety check
567         checkMultiplicationCompatible(m);
568 
569         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
570         final T zero = getField().getZero();
571 
572         // perform multiplication block-wise, to ensure good cache behavior
573         int blockIndex = 0;
574         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
575 
576             final int pStart = iBlock * BLOCK_SIZE;
577             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
578 
579             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
580                 final int jWidth = out.blockWidth(jBlock);
581                 final int jWidth2 = jWidth  + jWidth;
582                 final int jWidth3 = jWidth2 + jWidth;
583                 final int jWidth4 = jWidth3 + jWidth;
584 
585                 // select current block
586                 final T[] outBlock = out.blocks[blockIndex];
587 
588                 // perform multiplication on current block
589                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
590                     final int kWidth = blockWidth(kBlock);
591                     final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
592                     final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
593                     for (int p = pStart, k = 0; p < pEnd; ++p) {
594                         final int lStart = (p - pStart) * kWidth;
595                         final int lEnd   = lStart + kWidth;
596                         for (int nStart = 0; nStart < jWidth; ++nStart) {
597                             T sum = zero;
598                             int l = lStart;
599                             int n = nStart;
600                             while (l < lEnd - 3) {
601                                 sum = sum.
602                                       add(tBlock[l].multiply(mBlock[n])).
603                                       add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
604                                       add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
605                                       add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
606                                 l += 4;
607                                 n += jWidth4;
608                             }
609                             while (l < lEnd) {
610                                 sum = sum.add(tBlock[l++].multiply(mBlock[n]));
611                                 n += jWidth;
612                             }
613                             outBlock[k] = outBlock[k].add(sum);
614                             ++k;
615                         }
616                     }
617                 }
618 
619                 // go to next block
620                 ++blockIndex;
621 
622             }
623         }
624 
625         return out;
626 
627     }
628 
629     /** {@inheritDoc} */
630     @Override
631     public T[][] getData() {
632 
633         final T[][] data = buildArray(getField(), getRowDimension(), getColumnDimension());
634         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
635 
636         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
637             final int pStart = iBlock * BLOCK_SIZE;
638             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
639             int regularPos   = 0;
640             int lastPos      = 0;
641             for (int p = pStart; p < pEnd; ++p) {
642                 final T[] dataP = data[p];
643                 int blockIndex = iBlock * blockColumns;
644                 int dataPos    = 0;
645                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
646                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
647                     dataPos += BLOCK_SIZE;
648                 }
649                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
650                 regularPos += BLOCK_SIZE;
651                 lastPos    += lastColumns;
652             }
653         }
654 
655         return data;
656         
657     }
658 
659     /** {@inheritDoc} */
660     @Override
661     public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
662                                    final int startColumn, final int endColumn)
663         throws MatrixIndexException {
664 
665         // safety checks
666         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
667 
668         // create the output matrix
669         final BlockFieldMatrix<T> out =
670             new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
671 
672         // compute blocks shifts
673         final int blockStartRow    = startRow    / BLOCK_SIZE;
674         final int rowsShift        = startRow    % BLOCK_SIZE;
675         final int blockStartColumn = startColumn / BLOCK_SIZE;
676         final int columnsShift     = startColumn % BLOCK_SIZE;
677 
678         // perform extraction block-wise, to ensure good cache behavior
679         for (int iBlock = 0, pBlock = blockStartRow; iBlock < out.blockRows; ++iBlock, ++pBlock) {
680             final int iHeight = out.blockHeight(iBlock);
681             for (int jBlock = 0, qBlock = blockStartColumn; jBlock < out.blockColumns; ++jBlock, ++qBlock) {
682                 final int jWidth = out.blockWidth(jBlock);
683 
684                 // handle one block of the output matrix
685                 final int      outIndex = iBlock * out.blockColumns + jBlock;
686                 final T[] outBlock = out.blocks[outIndex];
687                 final int      index    = pBlock * blockColumns + qBlock;
688                 final int      width    = blockWidth(qBlock);
689 
690                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
691                 final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
692                 if (heightExcess > 0) {
693                     // the submatrix block spans on two blocks rows from the original matrix
694                     if (widthExcess > 0) {
695                         // the submatrix block spans on two blocks columns from the original matrix
696                         final int width2 = blockWidth(qBlock + 1);
697                         copyBlockPart(blocks[index], width,
698                                       rowsShift, BLOCK_SIZE,
699                                       columnsShift, BLOCK_SIZE,
700                                       outBlock, jWidth, 0, 0);
701                         copyBlockPart(blocks[index + 1], width2,
702                                       rowsShift, BLOCK_SIZE,
703                                       0, widthExcess,
704                                       outBlock, jWidth, 0, jWidth - widthExcess);
705                         copyBlockPart(blocks[index + blockColumns], width,
706                                       0, heightExcess,
707                                       columnsShift, BLOCK_SIZE,
708                                       outBlock, jWidth, iHeight - heightExcess, 0);
709                         copyBlockPart(blocks[index + blockColumns + 1], width2,
710                                       0, heightExcess,
711                                       0, widthExcess,
712                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
713                     } else {
714                         // the submatrix block spans on one block column from the original matrix
715                         copyBlockPart(blocks[index], width,
716                                       rowsShift, BLOCK_SIZE,
717                                       columnsShift, jWidth + columnsShift,
718                                       outBlock, jWidth, 0, 0);
719                         copyBlockPart(blocks[index + blockColumns], width,
720                                       0, heightExcess,
721                                       columnsShift, jWidth + columnsShift,
722                                       outBlock, jWidth, iHeight - heightExcess, 0);
723                     }
724                 } else {
725                     // the submatrix block spans on one block row from the original matrix
726                     if (widthExcess > 0) {
727                         // the submatrix block spans on two blocks columns from the original matrix
728                         final int width2 = blockWidth(qBlock + 1);
729                         copyBlockPart(blocks[index], width,
730                                       rowsShift, iHeight + rowsShift,
731                                       columnsShift, BLOCK_SIZE,
732                                       outBlock, jWidth, 0, 0);
733                         copyBlockPart(blocks[index + 1], width2,
734                                       rowsShift, iHeight + rowsShift,
735                                       0, widthExcess,
736                                       outBlock, jWidth, 0, jWidth - widthExcess);
737                     } else {
738                         // the submatrix block spans on one block column from the original matrix
739                         copyBlockPart(blocks[index], width,
740                                       rowsShift, iHeight + rowsShift,
741                                       columnsShift, jWidth + columnsShift,
742                                       outBlock, jWidth, 0, 0);
743                     }
744                }
745 
746             }
747         }
748 
749         return out;
750 
751     }
752 
753     /**
754      * Copy a part of a block into another one
755      * <p>This method can be called only when the specified part fits in both
756      * blocks, no verification is done here.</p>
757      * @param srcBlock source block
758      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
759      * @param srcStartRow start row in the source block
760      * @param srcEndRow end row (exclusive) in the source block
761      * @param srcStartColumn start column in the source block
762      * @param srcEndColumn end column (exclusive) in the source block
763      * @param dstBlock destination block
764      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
765      * @param dstStartRow start row in the destination block
766      * @param dstStartColumn start column in the destination block
767      */
768     private void copyBlockPart(final T[] srcBlock, final int srcWidth,
769                                final int srcStartRow, final int srcEndRow,
770                                final int srcStartColumn, final int srcEndColumn,
771                                final T[] dstBlock, final int dstWidth,
772                                final int dstStartRow, final int dstStartColumn) {
773         final int length = srcEndColumn - srcStartColumn;
774         int srcPos = srcStartRow * srcWidth + srcStartColumn;
775         int dstPos = dstStartRow * dstWidth + dstStartColumn;
776         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
777             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
778             srcPos += srcWidth;
779             dstPos += dstWidth;
780         }
781     }
782 
783     /** {@inheritDoc} */
784     @Override
785     public void setSubMatrix(final T[][] subMatrix, final int row, final int column)
786         throws MatrixIndexException {
787 
788         // safety checks
789         final int refLength = subMatrix[0].length;
790         if (refLength < 1) {
791             throw MathRuntimeException.createIllegalArgumentException("matrix must have at least one column");             
792         }
793         final int endRow    = row + subMatrix.length - 1;
794         final int endColumn = column + refLength - 1;
795         checkSubMatrixIndex(row, endRow, column, endColumn);
796         for (final T[] subRow : subMatrix) {
797             if (subRow.length != refLength) {
798                 throw MathRuntimeException.createIllegalArgumentException(
799                         "some rows have length {0} while others have length {1}",
800                         refLength, subRow.length); 
801             }
802         }
803 
804         // compute blocks bounds
805         final int blockStartRow    = row / BLOCK_SIZE;
806         final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
807         final int blockStartColumn = column / BLOCK_SIZE;
808         final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
809 
810         // perform copy block-wise, to ensure good cache behavior
811         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
812             final int iHeight  = blockHeight(iBlock);
813             final int firstRow = iBlock * BLOCK_SIZE;
814             final int iStart   = Math.max(row,    firstRow);
815             final int iEnd     = Math.min(endRow + 1, firstRow + iHeight);
816 
817             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
818                 final int jWidth      = blockWidth(jBlock);
819                 final int firstColumn = jBlock * BLOCK_SIZE;
820                 final int jStart      = Math.max(column,    firstColumn);
821                 final int jEnd        = Math.min(endColumn + 1, firstColumn + jWidth);
822                 final int jLength     = jEnd - jStart;
823 
824                 // handle one block, row by row
825                 final T[] block = blocks[iBlock * blockColumns + jBlock];
826                 for (int i = iStart; i < iEnd; ++i) {
827                     System.arraycopy(subMatrix[i - row], jStart - column,
828                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
829                                      jLength);
830                 }
831 
832             }
833         }
834     }
835 
836     /** {@inheritDoc} */
837     @Override
838     public FieldMatrix<T> getRowMatrix(final int row)
839         throws MatrixIndexException {
840 
841         checkRowIndex(row);
842         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
843 
844         // perform copy block-wise, to ensure good cache behavior
845         final int iBlock  = row / BLOCK_SIZE;
846         final int iRow    = row - iBlock * BLOCK_SIZE;
847         int outBlockIndex = 0;
848         int outIndex      = 0;
849         T[] outBlock = out.blocks[outBlockIndex];
850         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
851             final int jWidth     = blockWidth(jBlock);
852             final T[] block = blocks[iBlock * blockColumns + jBlock];
853             final int available  = outBlock.length - outIndex;
854             if (jWidth > available) {
855                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
856                 outBlock = out.blocks[++outBlockIndex];
857                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
858                 outIndex = jWidth - available;
859             } else {
860                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
861                 outIndex += jWidth;
862             }
863         }
864 
865         return out;
866 
867     }
868 
869     /** {@inheritDoc} */
870     @Override
871     public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
872         throws MatrixIndexException, InvalidMatrixException {
873         try {
874             setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
875         } catch (ClassCastException cce) {
876             super.setRowMatrix(row, matrix);
877         }
878     }
879 
880     /**
881      * Sets the entries in row number <code>row</code>
882      * as a row matrix.  Row indices start at 0.
883      *
884      * @param row the row to be set
885      * @param matrix row matrix (must have one row and the same number of columns
886      * as the instance)
887      * @throws MatrixIndexException if the specified row index is invalid
888      * @throws InvalidMatrixException if the matrix dimensions do not match one
889      * instance row
890      */
891     public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
892         throws MatrixIndexException, InvalidMatrixException {
893 
894         checkRowIndex(row);
895         final int nCols = getColumnDimension();
896         if ((matrix.getRowDimension() != 1) ||
897             (matrix.getColumnDimension() != nCols)) {
898             throw new InvalidMatrixException(
899                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
900                     matrix.getRowDimension(), matrix.getColumnDimension(),
901                     1, nCols);
902         }
903 
904         // perform copy block-wise, to ensure good cache behavior
905         final int iBlock = row / BLOCK_SIZE;
906         final int iRow   = row - iBlock * BLOCK_SIZE;
907         int mBlockIndex  = 0;
908         int mIndex       = 0;
909         T[] mBlock  = matrix.blocks[mBlockIndex];
910         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
911             final int jWidth     = blockWidth(jBlock);
912             final T[] block = blocks[iBlock * blockColumns + jBlock];
913             final int available  = mBlock.length - mIndex;
914             if (jWidth > available) {
915                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
916                 mBlock = matrix.blocks[++mBlockIndex];
917                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
918                 mIndex = jWidth - available;
919             } else {
920                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
921                 mIndex += jWidth;
922            }
923         }
924 
925     }
926     
927     /** {@inheritDoc} */
928     @Override
929     public FieldMatrix<T> getColumnMatrix(final int column)
930         throws MatrixIndexException {
931 
932         checkColumnIndex(column);
933         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
934 
935         // perform copy block-wise, to ensure good cache behavior
936         final int jBlock  = column / BLOCK_SIZE;
937         final int jColumn = column - jBlock * BLOCK_SIZE;
938         final int jWidth  = blockWidth(jBlock);
939         int outBlockIndex = 0;
940         int outIndex      = 0;
941         T[] outBlock = out.blocks[outBlockIndex];
942         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
943             final int iHeight = blockHeight(iBlock);
944             final T[] block = blocks[iBlock * blockColumns + jBlock];
945             for (int i = 0; i < iHeight; ++i) {
946                 if (outIndex >= outBlock.length) {
947                     outBlock = out.blocks[++outBlockIndex];
948                     outIndex = 0;
949                 }
950                 outBlock[outIndex++] = block[i * jWidth + jColumn];
951             }
952         }
953 
954         return out;
955 
956     }
957 
958     /** {@inheritDoc} */
959     @Override
960     public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
961         throws MatrixIndexException, InvalidMatrixException {
962         try {
963             setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
964         } catch (ClassCastException cce) {
965             super.setColumnMatrix(column, matrix);
966         }
967     }
968 
969     /**
970      * Sets the entries in column number <code>column</code>
971      * as a column matrix.  Column indices start at 0.
972      *
973      * @param column the column to be set
974      * @param matrix column matrix (must have one column and the same number of rows
975      * as the instance)
976      * @throws MatrixIndexException if the specified column index is invalid
977      * @throws InvalidMatrixException if the matrix dimensions do not match one
978      * instance column
979      */
980     void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
981         throws MatrixIndexException, InvalidMatrixException {
982 
983         checkColumnIndex(column);
984         final int nRows = getRowDimension();
985         if ((matrix.getRowDimension() != nRows) ||
986             (matrix.getColumnDimension() != 1)) {
987             throw new InvalidMatrixException(
988                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
989                     matrix.getRowDimension(), matrix.getColumnDimension(),
990                     nRows, 1);
991         }
992 
993         // perform copy block-wise, to ensure good cache behavior
994         final int jBlock  = column / BLOCK_SIZE;
995         final int jColumn = column - jBlock * BLOCK_SIZE;
996         final int jWidth  = blockWidth(jBlock);
997         int mBlockIndex = 0;
998         int mIndex      = 0;
999         T[] mBlock = matrix.blocks[mBlockIndex];
1000         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1001             final int iHeight = blockHeight(iBlock);
1002             final T[] block = blocks[iBlock * blockColumns + jBlock];
1003             for (int i = 0; i < iHeight; ++i) {
1004                 if (mIndex >= mBlock.length) {
1005                     mBlock = matrix.blocks[++mBlockIndex];
1006                     mIndex = 0;
1007                 }
1008                 block[i * jWidth + jColumn] = mBlock[mIndex++];
1009             }
1010         }
1011 
1012     }
1013 
1014     /** {@inheritDoc} */
1015     @Override
1016     public FieldVector<T> getRowVector(final int row)
1017         throws MatrixIndexException {
1018 
1019         checkRowIndex(row);
1020         final T[] outData = buildArray(getField(), columns);
1021 
1022         // perform copy block-wise, to ensure good cache behavior
1023         final int iBlock  = row / BLOCK_SIZE;
1024         final int iRow    = row - iBlock * BLOCK_SIZE;
1025         int outIndex      = 0;
1026         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1027             final int jWidth     = blockWidth(jBlock);
1028             final T[] block = blocks[iBlock * blockColumns + jBlock];
1029             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1030             outIndex += jWidth;
1031         }
1032 
1033         return new ArrayFieldVector<T>(outData, false);
1034 
1035     }
1036 
1037     /** {@inheritDoc} */
1038     @Override
1039     public void setRowVector(final int row, final FieldVector<T> vector)
1040         throws MatrixIndexException, InvalidMatrixException {
1041         try {
1042             setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1043         } catch (ClassCastException cce) {
1044             super.setRowVector(row, vector);
1045         }
1046     }
1047 
1048     /** {@inheritDoc} */
1049     @Override
1050     public FieldVector<T> getColumnVector(final int column)
1051         throws MatrixIndexException {
1052 
1053         checkColumnIndex(column);
1054         final T[] outData = buildArray(getField(), rows);
1055 
1056         // perform copy block-wise, to ensure good cache behavior
1057         final int jBlock  = column / BLOCK_SIZE;
1058         final int jColumn = column - jBlock * BLOCK_SIZE;
1059         final int jWidth  = blockWidth(jBlock);
1060         int outIndex      = 0;
1061         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1062             final int iHeight = blockHeight(iBlock);
1063             final T[] block = blocks[iBlock * blockColumns + jBlock];
1064             for (int i = 0; i < iHeight; ++i) {
1065                 outData[outIndex++] = block[i * jWidth + jColumn];
1066             }
1067         }
1068 
1069         return new ArrayFieldVector<T>(outData, false);
1070 
1071     }
1072 
1073     /** {@inheritDoc} */
1074     @Override
1075     public void setColumnVector(final int column, final FieldVector<T> vector)
1076         throws MatrixIndexException, InvalidMatrixException {
1077         try {
1078             setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1079         } catch (ClassCastException cce) {
1080             super.setColumnVector(column, vector);
1081         }
1082     }
1083 
1084     /** {@inheritDoc} */
1085     @Override
1086     public T[] getRow(final int row)
1087         throws MatrixIndexException {
1088 
1089         checkRowIndex(row);
1090         final T[] out = buildArray(getField(), columns);
1091 
1092         // perform copy block-wise, to ensure good cache behavior
1093         final int iBlock  = row / BLOCK_SIZE;
1094         final int iRow    = row - iBlock * BLOCK_SIZE;
1095         int outIndex      = 0;
1096         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1097             final int jWidth     = blockWidth(jBlock);
1098             final T[] block = blocks[iBlock * blockColumns + jBlock];
1099             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1100             outIndex += jWidth;
1101         }
1102 
1103         return out;
1104 
1105     }
1106 
1107     /** {@inheritDoc} */
1108     @Override
1109     public void setRow(final int row, final T[] array)
1110         throws MatrixIndexException, InvalidMatrixException {
1111 
1112         checkRowIndex(row);
1113         final int nCols = getColumnDimension();
1114         if (array.length != nCols) {
1115             throw new InvalidMatrixException(
1116                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1117                     1, array.length, 1, nCols);
1118         }
1119 
1120         // perform copy block-wise, to ensure good cache behavior
1121         final int iBlock  = row / BLOCK_SIZE;
1122         final int iRow    = row - iBlock * BLOCK_SIZE;
1123         int outIndex      = 0;
1124         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1125             final int jWidth     = blockWidth(jBlock);
1126             final T[] block = blocks[iBlock * blockColumns + jBlock];
1127             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1128             outIndex += jWidth;
1129         }
1130 
1131     }
1132 
1133     /** {@inheritDoc} */
1134     @Override
1135     public T[] getColumn(final int column)
1136         throws MatrixIndexException {
1137 
1138         checkColumnIndex(column);
1139         final T[] out = buildArray(getField(), rows);
1140 
1141         // perform copy block-wise, to ensure good cache behavior
1142         final int jBlock  = column / BLOCK_SIZE;
1143         final int jColumn = column - jBlock * BLOCK_SIZE;
1144         final int jWidth  = blockWidth(jBlock);
1145         int outIndex      = 0;
1146         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1147             final int iHeight = blockHeight(iBlock);
1148             final T[] block = blocks[iBlock * blockColumns + jBlock];
1149             for (int i = 0; i < iHeight; ++i) {
1150                 out[outIndex++] = block[i * jWidth + jColumn];
1151             }
1152         }
1153 
1154         return out;
1155 
1156     }
1157 
1158     /** {@inheritDoc} */
1159     @Override
1160     public void setColumn(final int column, final T[] array)
1161         throws MatrixIndexException, InvalidMatrixException {
1162 
1163         checkColumnIndex(column);
1164         final int nRows = getRowDimension();
1165         if (array.length != nRows) {
1166             throw new InvalidMatrixException(
1167                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1168                     array.length, 1, nRows, 1);
1169         }
1170 
1171         // perform copy block-wise, to ensure good cache behavior
1172         final int jBlock  = column / BLOCK_SIZE;
1173         final int jColumn = column - jBlock * BLOCK_SIZE;
1174         final int jWidth  = blockWidth(jBlock);
1175         int outIndex      = 0;
1176         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1177             final int iHeight = blockHeight(iBlock);
1178             final T[] block = blocks[iBlock * blockColumns + jBlock];
1179             for (int i = 0; i < iHeight; ++i) {
1180                 block[i * jWidth + jColumn] = array[outIndex++];
1181             }
1182         }
1183 
1184     }
1185 
1186     /** {@inheritDoc} */
1187     @Override
1188     public T getEntry(final int row, final int column)
1189         throws MatrixIndexException {
1190         try {
1191             final int iBlock = row    / BLOCK_SIZE;
1192             final int jBlock = column / BLOCK_SIZE;
1193             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1194                                (column - jBlock * BLOCK_SIZE);
1195             return blocks[iBlock * blockColumns + jBlock][k];
1196         } catch (ArrayIndexOutOfBoundsException e) {
1197             throw new MatrixIndexException(
1198                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1199                     row, column, getRowDimension(), getColumnDimension());
1200         }
1201     }
1202 
1203     /** {@inheritDoc} */
1204     @Override
1205     public void setEntry(final int row, final int column, final T value)
1206         throws MatrixIndexException {
1207         try {
1208             final int iBlock = row    / BLOCK_SIZE;
1209             final int jBlock = column / BLOCK_SIZE;
1210             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1211                                (column - jBlock * BLOCK_SIZE);
1212             blocks[iBlock * blockColumns + jBlock][k] = value;
1213         } catch (ArrayIndexOutOfBoundsException e) {
1214             throw new MatrixIndexException(
1215                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1216                     row, column, getRowDimension(), getColumnDimension());
1217         }
1218     }
1219 
1220     /** {@inheritDoc} */
1221     @Override
1222     public void addToEntry(final int row, final int column, final T increment)
1223         throws MatrixIndexException {
1224         try {
1225             final int iBlock = row    / BLOCK_SIZE;
1226             final int jBlock = column / BLOCK_SIZE;
1227             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1228                                (column - jBlock * BLOCK_SIZE);
1229             final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1230             blockIJ[k] = blockIJ[k].add(increment);
1231         } catch (ArrayIndexOutOfBoundsException e) {
1232             throw new MatrixIndexException(
1233                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1234                     row, column, getRowDimension(), getColumnDimension());
1235         }
1236     }
1237 
1238     /** {@inheritDoc} */
1239     @Override
1240     public void multiplyEntry(final int row, final int column, final T factor)
1241         throws MatrixIndexException {
1242         try {
1243             final int iBlock = row    / BLOCK_SIZE;
1244             final int jBlock = column / BLOCK_SIZE;
1245             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1246                                (column - jBlock * BLOCK_SIZE);
1247             final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1248             blockIJ[k] = blockIJ[k].multiply(factor);
1249         } catch (ArrayIndexOutOfBoundsException e) {
1250             throw new MatrixIndexException(
1251                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1252                     row, column, getRowDimension(), getColumnDimension());
1253         }
1254     }
1255 
1256     /** {@inheritDoc} */
1257     @Override
1258     public FieldMatrix<T> transpose() {
1259 
1260         final int nRows = getRowDimension();
1261         final int nCols = getColumnDimension();
1262         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1263 
1264         // perform transpose block-wise, to ensure good cache behavior
1265         int blockIndex = 0;
1266         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1267             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1268 
1269                 // transpose current block
1270                 final T[] outBlock = out.blocks[blockIndex];
1271                 final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1272                 final int      pStart   = iBlock * BLOCK_SIZE;
1273                 final int      pEnd     = Math.min(pStart + BLOCK_SIZE, columns);
1274                 final int      qStart   = jBlock * BLOCK_SIZE;
1275                 final int      qEnd     = Math.min(qStart + BLOCK_SIZE, rows);
1276                 for (int p = pStart, k = 0; p < pEnd; ++p) {
1277                     final int lInc = pEnd - pStart;
1278                     for (int q = qStart, l = p - pStart; q < qEnd; ++q, l+= lInc) {
1279                         outBlock[k++] = tBlock[l];
1280                     }
1281                 }
1282 
1283                 // go to next block
1284                 ++blockIndex;
1285 
1286             }
1287         }
1288 
1289         return out;
1290 
1291     }
1292 
1293     /** {@inheritDoc} */
1294     @Override
1295     public int getRowDimension() {
1296         return rows;
1297     }
1298 
1299     /** {@inheritDoc} */
1300     @Override
1301     public int getColumnDimension() {
1302         return columns;
1303     }
1304 
1305     /** {@inheritDoc} */
1306     @Override
1307     public T[] operate(final T[] v)
1308         throws IllegalArgumentException {
1309 
1310         if (v.length != columns) {
1311             throw MathRuntimeException.createIllegalArgumentException(
1312                     "vector length mismatch: got {0} but expected {1}",
1313                     v.length, columns);
1314         }
1315         final T[] out = buildArray(getField(), rows);
1316         final T zero = getField().getZero();
1317 
1318         // perform multiplication block-wise, to ensure good cache behavior
1319         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1320             final int pStart = iBlock * BLOCK_SIZE;
1321             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1322             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1323                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1324                 final int      qStart = jBlock * BLOCK_SIZE;
1325                 final int      qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1326                 for (int p = pStart, k = 0; p < pEnd; ++p) {
1327                     T sum = zero;
1328                     int q = qStart;
1329                     while (q < qEnd - 3) {
1330                         sum = sum.
1331                               add(block[k].multiply(v[q])).
1332                               add(block[k + 1].multiply(v[q + 1])).
1333                               add(block[k + 2].multiply(v[q + 2])).
1334                               add(block[k + 3].multiply(v[q + 3]));
1335                         k += 4;
1336                         q += 4;
1337                     }
1338                     while (q < qEnd) {
1339                         sum = sum.add(block[k++].multiply(v[q++]));
1340                     }
1341                     out[p] = out[p].add(sum);
1342                 }
1343             }
1344         }
1345 
1346         return out;
1347 
1348     }
1349 
1350     /** {@inheritDoc} */
1351     @Override
1352     public T[] preMultiply(final T[] v)
1353         throws IllegalArgumentException {
1354 
1355         if (v.length != rows) {
1356             throw MathRuntimeException.createIllegalArgumentException(
1357                     "vector length mismatch: got {0} but expected {1}",
1358                     v.length, rows);
1359         }
1360         final T[] out = buildArray(getField(), columns);
1361         final T zero = getField().getZero();
1362 
1363         // perform multiplication block-wise, to ensure good cache behavior
1364         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1365             final int jWidth  = blockWidth(jBlock);
1366             final int jWidth2 = jWidth  + jWidth;
1367             final int jWidth3 = jWidth2 + jWidth;
1368             final int jWidth4 = jWidth3 + jWidth;
1369             final int qStart = jBlock * BLOCK_SIZE;
1370             final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1371             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1372                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1373                 final int      pStart = iBlock * BLOCK_SIZE;
1374                 final int      pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1375                 for (int q = qStart; q < qEnd; ++q) {
1376                     int k = q - qStart;
1377                     T sum = zero;
1378                     int p = pStart;
1379                     while (p < pEnd - 3) {
1380                         sum = sum.
1381                               add(block[k].multiply(v[p])).
1382                               add(block[k + jWidth].multiply(v[p + 1])).
1383                               add(block[k + jWidth2].multiply(v[p + 2])).
1384                               add(block[k + jWidth3].multiply(v[p + 3]));
1385                         k += jWidth4;
1386                         p += 4;
1387                     }
1388                     while (p < pEnd) {
1389                         sum = sum.add(block[k].multiply(v[p++]));
1390                         k += jWidth;
1391                     }
1392                     out[q] = out[q].add(sum);
1393                 }
1394             }
1395         }
1396 
1397         return out;
1398 
1399     }
1400 
1401     /** {@inheritDoc} */
1402     @Override
1403     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor)
1404         throws MatrixVisitorException {
1405         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1406         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1407             final int pStart = iBlock * BLOCK_SIZE;
1408             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1409             for (int p = pStart; p < pEnd; ++p) {
1410                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1411                     final int jWidth = blockWidth(jBlock);
1412                     final int qStart = jBlock * BLOCK_SIZE;
1413                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1414                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1415                     for (int q = qStart, k = (p - pStart) * jWidth; q < qEnd; ++q, ++k) {
1416                         block[k] = visitor.visit(p, q, block[k]);
1417                     }
1418                 }
1419              }
1420         }
1421         return visitor.end();
1422     }
1423 
1424     /** {@inheritDoc} */
1425     @Override
1426     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor)
1427         throws MatrixVisitorException {
1428         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1429         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1430             final int pStart = iBlock * BLOCK_SIZE;
1431             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1432             for (int p = pStart; p < pEnd; ++p) {
1433                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1434                     final int jWidth = blockWidth(jBlock);
1435                     final int qStart = jBlock * BLOCK_SIZE;
1436                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1437                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1438                     for (int q = qStart, k = (p - pStart) * jWidth; q < qEnd; ++q, ++k) {
1439                         visitor.visit(p, q, block[k]);
1440                     }
1441                 }
1442              }
1443         }
1444         return visitor.end();
1445     }
1446 
1447     /** {@inheritDoc} */
1448     @Override
1449     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1450                                  final int startRow, final int endRow,
1451                                  final int startColumn, final int endColumn)
1452         throws MatrixIndexException, MatrixVisitorException {
1453         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1454         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1455         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1456             final int p0     = iBlock * BLOCK_SIZE;
1457             final int pStart = Math.max(startRow, p0);
1458             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1459             for (int p = pStart; p < pEnd; ++p) {
1460                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1461                     final int jWidth = blockWidth(jBlock);
1462                     final int q0     = jBlock * BLOCK_SIZE;
1463                     final int qStart = Math.max(startColumn, q0);
1464                     final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1465                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1466                     for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1467                         block[k] = visitor.visit(p, q, block[k]);
1468                     }
1469                 }
1470              }
1471         }
1472         return visitor.end();
1473     }
1474 
1475     /** {@inheritDoc} */
1476     @Override
1477     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1478                                  final int startRow, final int endRow,
1479                                  final int startColumn, final int endColumn)
1480         throws MatrixIndexException, MatrixVisitorException {
1481         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1482         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1483         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1484             final int p0     = iBlock * BLOCK_SIZE;
1485             final int pStart = Math.max(startRow, p0);
1486             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1487             for (int p = pStart; p < pEnd; ++p) {
1488                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1489                     final int jWidth = blockWidth(jBlock);
1490                     final int q0     = jBlock * BLOCK_SIZE;
1491                     final int qStart = Math.max(startColumn, q0);
1492                     final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1493                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1494                     for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1495                         visitor.visit(p, q, block[k]);
1496                     }
1497                 }
1498              }
1499         }
1500         return visitor.end();
1501     }
1502 
1503     /** {@inheritDoc} */
1504     @Override
1505     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor)
1506         throws MatrixVisitorException {
1507         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1508         for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
1509             final int pStart = iBlock * BLOCK_SIZE;
1510             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1511             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
1512                 final int qStart = jBlock * BLOCK_SIZE;
1513                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1514                 final T[] block = blocks[blockIndex];
1515                 for (int p = pStart, k = 0; p < pEnd; ++p) {
1516                     for (int q = qStart; q < qEnd; ++q, ++k) {
1517                         block[k] = visitor.visit(p, q, block[k]);
1518                     }
1519                 }
1520             }
1521         }
1522         return visitor.end();
1523     }
1524 
1525     /** {@inheritDoc} */
1526     @Override
1527     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor)
1528         throws MatrixVisitorException {
1529         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1530         for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
1531             final int pStart = iBlock * BLOCK_SIZE;
1532             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1533             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
1534                 final int qStart = jBlock * BLOCK_SIZE;
1535                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1536                 final T[] block = blocks[blockIndex];
1537                 for (int p = pStart, k = 0; p < pEnd; ++p) {
1538                     for (int q = qStart; q < qEnd; ++q, ++k) {
1539                         visitor.visit(p, q, block[k]);
1540                     }
1541                 }
1542             }
1543         }
1544         return visitor.end();
1545     }
1546 
1547     /** {@inheritDoc} */
1548     @Override
1549     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1550                                        final int startRow, final int endRow,
1551                                        final int startColumn, final int endColumn)
1552         throws MatrixIndexException, MatrixVisitorException {
1553         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1554         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1555         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1556             final int p0     = iBlock * BLOCK_SIZE;
1557             final int pStart = Math.max(startRow, p0);
1558             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1559             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1560                 final int jWidth = blockWidth(jBlock);
1561                 final int q0     = jBlock * BLOCK_SIZE;
1562                 final int qStart = Math.max(startColumn, q0);
1563                 final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1564                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1565                 for (int p = pStart; p < pEnd; ++p) {
1566                     for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1567                         block[k] = visitor.visit(p, q, block[k]);
1568                     }
1569                 }
1570             }
1571         }
1572         return visitor.end();
1573     }
1574 
1575     /** {@inheritDoc} */
1576     @Override
1577     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1578                                        final int startRow, final int endRow,
1579                                        final int startColumn, final int endColumn)
1580         throws MatrixIndexException, MatrixVisitorException {
1581         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1582         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1583         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1584             final int p0     = iBlock * BLOCK_SIZE;
1585             final int pStart = Math.max(startRow, p0);
1586             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1587             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1588                 final int jWidth = blockWidth(jBlock);
1589                 final int q0     = jBlock * BLOCK_SIZE;
1590                 final int qStart = Math.max(startColumn, q0);
1591                 final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1592                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1593                 for (int p = pStart; p < pEnd; ++p) {
1594                     for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1595                         visitor.visit(p, q, block[k]);
1596                     }
1597                 }
1598             }
1599         }
1600         return visitor.end();
1601     }
1602 
1603     /**
1604      * Get the height of a block.
1605      * @param blockRow row index (in block sense) of the block
1606      * @return height (number of rows) of the block
1607      */
1608     private int blockHeight(final int blockRow) {
1609         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1610     }
1611 
1612     /**
1613      * Get the width of a block.
1614      * @param blockColumn column index (in block sense) of the block
1615      * @return width (number of columns) of the block
1616      */
1617     private int blockWidth(final int blockColumn) {
1618         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1619     }
1620 
1621 }