org.apache.sysml.api.mlcontext

Class MLContextConversionUtil



  • public class MLContextConversionUtil
    extends Object
    Utility class containing methods to perform data conversions.
    • Method Summary

      All Methods Static Methods Concrete Methods 
      Modifier and Type Method and Description
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject binaryBlocksToFrameObject(String variableName, org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> binaryBlocks)
      Convert a JavaPairRDD<Long, FrameBlock> to a FrameObject.
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject binaryBlocksToFrameObject(String variableName, org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> binaryBlocks, FrameMetadata frameMetadata)
      Convert a JavaPairRDD<Long, FrameBlock> to a FrameObject.
      static MatrixBlock binaryBlocksToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlocks, MatrixMetadata matrixMetadata)
      Convert a JavaPairRDD<MatrixIndexes, MatrixBlock> to a MatrixBlock
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject binaryBlocksToMatrixObject(String variableName, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlocks)
      Convert a JavaPairRDD<MatrixIndexes, MatrixBlock> to a MatrixObject.
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject binaryBlocksToMatrixObject(String variableName, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlocks, MatrixMetadata matrixMetadata)
      Convert a JavaPairRDD<MatrixIndexes, MatrixBlock> to a MatrixObject.
      static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToFrameBinaryBlocks(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame, FrameMetadata frameMetadata)
      Convert a DataFrame to a JavaPairRDD<Long, FrameBlock> binary-block frame.
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject dataFrameToFrameObject(String variableName, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame)
      Convert a DataFrame to a FrameObject.
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject dataFrameToFrameObject(String variableName, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame, FrameMetadata frameMetadata)
      Convert a DataFrame to a FrameObject.
      static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> dataFrameToMatrixBinaryBlocks(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame)
      Convert a DataFrame to a JavaPairRDD<MatrixIndexes, MatrixBlock> binary-block matrix.
      static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> dataFrameToMatrixBinaryBlocks(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame, MatrixMetadata matrixMetadata)
      Convert a DataFrame to a JavaPairRDD<MatrixIndexes, MatrixBlock> binary-block matrix.
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject dataFrameToMatrixObject(String variableName, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame)
      Convert a DataFrame to a MatrixObject.
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject dataFrameToMatrixObject(String variableName, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame, MatrixMetadata matrixMetadata)
      Convert a DataFrame to a MatrixObject.
      static void determineFrameFormatIfNeeded(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame, FrameMetadata frameMetadata)
      If the FrameFormat of the DataFrame has not been explicitly specified, attempt to determine the proper FrameFormat.
      static void determineMatrixFormatIfNeeded(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame, MatrixMetadata matrixMetadata)
      If the MatrixFormat of the DataFrame has not been explicitly specified, attempt to determine the proper MatrixFormat.
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject doubleMatrixToMatrixObject(String variableName, double[][] doubleMatrix)
      Convert a two-dimensional double array to a MatrixObject.
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject doubleMatrixToMatrixObject(String variableName, double[][] doubleMatrix, MatrixMetadata matrixMetadata)
      Convert a two-dimensional double array to a MatrixObject.
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject frameBlockToFrameObject(String variableName, FrameBlock frameBlock, FrameMetadata frameMetadata)
      Convert a FrameBlock to a FrameObject.
      static String[][] frameObjectTo2DStringArray(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject)
      Convert a FrameObject to a two-dimensional string array.
      static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> frameObjectToBinaryBlocks(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject, org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext sparkExecutionContext)
      Convert a FrameObject to a JavaPairRDD<Long, FrameBlock>.
      static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> frameObjectToDataFrame(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject, org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext sparkExecutionContext)
      Convert a FrameObject to a DataFrame.
      static org.apache.spark.api.java.JavaRDD<String> frameObjectToJavaRDDStringCSV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject, String delimiter)
      Convert a FrameObject to a JavaRDD<String> in CSV format.
      static org.apache.spark.api.java.JavaRDD<String> frameObjectToJavaRDDStringIJV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject)
      Convert a FrameObject to a JavaRDD<String> in IJV format.
      static List<String> frameObjectToListStringCSV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject, String delimiter)
      Convert a FrameObject to a List<String> in CSV format.
      static List<String> frameObjectToListStringIJV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject)
      Convert a FrameObject to a List<String> in IJV format.
      static org.apache.spark.rdd.RDD<String> frameObjectToRDDStringCSV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject, String delimiter)
      Convert a FrameObject to a RDD<String> in CSV format.
      static org.apache.spark.rdd.RDD<String> frameObjectToRDDStringIJV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject)
      Convert a FrameObject to a RDD<String> in IJV format.
      static boolean isDataFrameWithIDColumn(FrameMetadata frameMetadata)
      Return whether or not the DataFrame has an ID column.
      static boolean isDataFrameWithIDColumn(MatrixMetadata matrixMetadata)
      Return whether or not the DataFrame has an ID column.
      static boolean isVectorBasedDataFrame(MatrixMetadata matrixMetadata)
      Return whether or not the DataFrame is vector-based.
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject javaRDDStringCSVToFrameObject(String variableName, org.apache.spark.api.java.JavaRDD<String> javaRDD)
      Convert a JavaRDD<String> in CSV format to a FrameObject
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject javaRDDStringCSVToFrameObject(String variableName, org.apache.spark.api.java.JavaRDD<String> javaRDD, FrameMetadata frameMetadata)
      Convert a JavaRDD<String> in CSV format to a FrameObject
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject javaRDDStringCSVToMatrixObject(String variableName, org.apache.spark.api.java.JavaRDD<String> javaRDD)
      Convert a JavaRDD<String> in CSV format to a MatrixObject
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject javaRDDStringCSVToMatrixObject(String variableName, org.apache.spark.api.java.JavaRDD<String> javaRDD, MatrixMetadata matrixMetadata)
      Convert a JavaRDD<String> in CSV format to a MatrixObject
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject javaRDDStringIJVToFrameObject(String variableName, org.apache.spark.api.java.JavaRDD<String> javaRDD, FrameMetadata frameMetadata)
      Convert a JavaRDD<String> in IJV format to a FrameObject .
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject javaRDDStringIJVToMatrixObject(String variableName, org.apache.spark.api.java.JavaRDD<String> javaRDD, MatrixMetadata matrixMetadata)
      Convert a JavaRDD<String> in IJV format to a MatrixObject .
      static org.apache.spark.api.java.JavaSparkContext jsc()
      Obtain JavaSparkContext from MLContextProxy.
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixBlockToMatrixObject(String variableName, MatrixBlock matrixBlock, MatrixMetadata matrixMetadata)
      Convert a MatrixBlock to a MatrixObject.
      static double[][] matrixObjectTo2DDoubleArray(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
      Convert a MatrixObject to a two-dimensional double array.
      static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> matrixObjectToBinaryBlocks(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject, org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext sparkExecutionContext)
      Convert a MatrixObject to a JavaPairRDD<MatrixIndexes, MatrixBlock>.
      static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> matrixObjectToDataFrame(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject, org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext sparkExecutionContext, boolean isVectorDF)
      Convert a MatrixObject to a DataFrame.
      static org.apache.spark.api.java.JavaRDD<String> matrixObjectToJavaRDDStringCSV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
      Convert a MatrixObject to a JavaRDD<String> in CSV format.
      static org.apache.spark.api.java.JavaRDD<String> matrixObjectToJavaRDDStringIJV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
      Convert a MatrixObject to a JavaRDD<String> in IJV format.
      static List<String> matrixObjectToListStringCSV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
      Convert a MatrixObject to a List<String> in CSV format.
      static List<String> matrixObjectToListStringIJV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
      Convert a MatrixObject to a List<String> in IJV format.
      static org.apache.spark.rdd.RDD<String> matrixObjectToRDDStringCSV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
      Convert a MatrixObject to a RDD<String> in CSV format.
      static org.apache.spark.rdd.RDD<String> matrixObjectToRDDStringIJV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
      Convert a MatrixObject to a RDD<String> in IJV format.
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject rddStringCSVToFrameObject(String variableName, org.apache.spark.rdd.RDD<String> rdd)
      Convert a RDD<String> in CSV format to a FrameObject
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject rddStringCSVToFrameObject(String variableName, org.apache.spark.rdd.RDD<String> rdd, FrameMetadata frameMetadata)
      Convert a RDD<String> in CSV format to a FrameObject
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject rddStringCSVToMatrixObject(String variableName, org.apache.spark.rdd.RDD<String> rdd)
      Convert a RDD<String> in CSV format to a MatrixObject
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject rddStringCSVToMatrixObject(String variableName, org.apache.spark.rdd.RDD<String> rdd, MatrixMetadata matrixMetadata)
      Convert a RDD<String> in CSV format to a MatrixObject
      static org.apache.sysml.runtime.controlprogram.caching.FrameObject rddStringIJVToFrameObject(String variableName, org.apache.spark.rdd.RDD<String> rdd, FrameMetadata frameMetadata)
      Convert a RDD<String> in IJV format to a FrameObject.
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject rddStringIJVToMatrixObject(String variableName, org.apache.spark.rdd.RDD<String> rdd, MatrixMetadata matrixMetadata)
      Convert a RDD<String> in IJV format to a MatrixObject.
      static org.apache.spark.SparkContext sc()
      Obtain SparkContext from MLContextProxy.
      static org.apache.spark.sql.SparkSession spark()
      Obtain SparkSession from MLContextProxy.
      static org.apache.sysml.runtime.controlprogram.caching.MatrixObject urlToMatrixObject(String variableName, URL url, MatrixMetadata matrixMetadata)
      Convert a matrix at a URL to a MatrixObject.
    • Constructor Detail

      • MLContextConversionUtil

        public MLContextConversionUtil()
    • Method Detail

      • doubleMatrixToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject doubleMatrixToMatrixObject(String variableName,
                                                                                                              double[][] doubleMatrix)
        Convert a two-dimensional double array to a MatrixObject.
        Parameters:
        variableName - name of the variable associated with the matrix
        doubleMatrix - matrix of double values
        Returns:
        the two-dimensional double matrix converted to a MatrixObject
      • doubleMatrixToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject doubleMatrixToMatrixObject(String variableName,
                                                                                                              double[][] doubleMatrix,
                                                                                                              MatrixMetadata matrixMetadata)
        Convert a two-dimensional double array to a MatrixObject.
        Parameters:
        variableName - name of the variable associated with the matrix
        doubleMatrix - matrix of double values
        matrixMetadata - the matrix metadata
        Returns:
        the two-dimensional double matrix converted to a MatrixObject
      • urlToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject urlToMatrixObject(String variableName,
                                                                                                     URL url,
                                                                                                     MatrixMetadata matrixMetadata)
        Convert a matrix at a URL to a MatrixObject.
        Parameters:
        variableName - name of the variable associated with the matrix
        url - the URL to a matrix (in CSV or IJV format)
        matrixMetadata - the matrix metadata
        Returns:
        the matrix at a URL converted to a MatrixObject
      • matrixBlockToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixBlockToMatrixObject(String variableName,
                                                                                                             MatrixBlock matrixBlock,
                                                                                                             MatrixMetadata matrixMetadata)
        Convert a MatrixBlock to a MatrixObject.
        Parameters:
        variableName - name of the variable associated with the matrix
        matrixBlock - matrix as a MatrixBlock
        matrixMetadata - the matrix metadata
        Returns:
        the MatrixBlock converted to a MatrixObject
      • frameBlockToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject frameBlockToFrameObject(String variableName,
                                                                                                          FrameBlock frameBlock,
                                                                                                          FrameMetadata frameMetadata)
        Convert a FrameBlock to a FrameObject.
        Parameters:
        variableName - name of the variable associated with the frame
        frameBlock - frame as a FrameBlock
        frameMetadata - the frame metadata
        Returns:
        the FrameBlock converted to a FrameObject
      • binaryBlocksToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject binaryBlocksToMatrixObject(String variableName,
                                                                                                              org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlocks)
        Convert a JavaPairRDD<MatrixIndexes, MatrixBlock> to a MatrixObject.
        Parameters:
        variableName - name of the variable associated with the matrix
        binaryBlocks - JavaPairRDD<MatrixIndexes, MatrixBlock> representation of a binary-block matrix
        Returns:
        the JavaPairRDD<MatrixIndexes, MatrixBlock> matrix converted to a MatrixObject
      • binaryBlocksToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject binaryBlocksToMatrixObject(String variableName,
                                                                                                              org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlocks,
                                                                                                              MatrixMetadata matrixMetadata)
        Convert a JavaPairRDD<MatrixIndexes, MatrixBlock> to a MatrixObject.
        Parameters:
        variableName - name of the variable associated with the matrix
        binaryBlocks - JavaPairRDD<MatrixIndexes, MatrixBlock> representation of a binary-block matrix
        matrixMetadata - the matrix metadata
        Returns:
        the JavaPairRDD<MatrixIndexes, MatrixBlock> matrix converted to a MatrixObject
      • binaryBlocksToMatrixBlock

        public static MatrixBlock binaryBlocksToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlocks,
                                                            MatrixMetadata matrixMetadata)
        Convert a JavaPairRDD<MatrixIndexes, MatrixBlock> to a MatrixBlock
        Parameters:
        binaryBlocks - JavaPairRDD<MatrixIndexes, MatrixBlock> representation of a binary-block matrix
        matrixMetadata - the matrix metadata
        Returns:
        the JavaPairRDD<MatrixIndexes, MatrixBlock> matrix converted to a MatrixBlock
      • binaryBlocksToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject binaryBlocksToFrameObject(String variableName,
                                                                                                            org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> binaryBlocks)
        Convert a JavaPairRDD<Long, FrameBlock> to a FrameObject.
        Parameters:
        variableName - name of the variable associated with the frame
        binaryBlocks - JavaPairRDD<Long, FrameBlock> representation of a binary-block frame
        Returns:
        the JavaPairRDD<Long, FrameBlock> frame converted to a FrameObject
      • binaryBlocksToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject binaryBlocksToFrameObject(String variableName,
                                                                                                            org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> binaryBlocks,
                                                                                                            FrameMetadata frameMetadata)
        Convert a JavaPairRDD<Long, FrameBlock> to a FrameObject.
        Parameters:
        variableName - name of the variable associated with the frame
        binaryBlocks - JavaPairRDD<Long, FrameBlock> representation of a binary-block frame
        frameMetadata - the frame metadata
        Returns:
        the JavaPairRDD<Long, FrameBlock> frame converted to a FrameObject
      • dataFrameToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject dataFrameToMatrixObject(String variableName,
                                                                                                           org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame)
        Convert a DataFrame to a MatrixObject.
        Parameters:
        variableName - name of the variable associated with the matrix
        dataFrame - the Spark DataFrame
        Returns:
        the DataFrame matrix converted to a converted to a MatrixObject
      • dataFrameToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject dataFrameToMatrixObject(String variableName,
                                                                                                           org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame,
                                                                                                           MatrixMetadata matrixMetadata)
        Convert a DataFrame to a MatrixObject.
        Parameters:
        variableName - name of the variable associated with the matrix
        dataFrame - the Spark DataFrame
        matrixMetadata - the matrix metadata
        Returns:
        the DataFrame matrix converted to a converted to a MatrixObject
      • dataFrameToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject dataFrameToFrameObject(String variableName,
                                                                                                         org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame)
        Convert a DataFrame to a FrameObject.
        Parameters:
        variableName - name of the variable associated with the frame
        dataFrame - the Spark DataFrame
        Returns:
        the DataFrame matrix converted to a converted to a FrameObject
      • dataFrameToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject dataFrameToFrameObject(String variableName,
                                                                                                         org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame,
                                                                                                         FrameMetadata frameMetadata)
        Convert a DataFrame to a FrameObject.
        Parameters:
        variableName - name of the variable associated with the frame
        dataFrame - the Spark DataFrame
        frameMetadata - the frame metadata
        Returns:
        the DataFrame frame converted to a converted to a FrameObject
      • dataFrameToMatrixBinaryBlocks

        public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> dataFrameToMatrixBinaryBlocks(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame)
        Convert a DataFrame to a JavaPairRDD<MatrixIndexes, MatrixBlock> binary-block matrix.
        Parameters:
        dataFrame - the Spark DataFrame
        Returns:
        the DataFrame matrix converted to a JavaPairRDD<MatrixIndexes, MatrixBlock> binary-block matrix
      • dataFrameToMatrixBinaryBlocks

        public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> dataFrameToMatrixBinaryBlocks(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame,
                                                                                                                     MatrixMetadata matrixMetadata)
        Convert a DataFrame to a JavaPairRDD<MatrixIndexes, MatrixBlock> binary-block matrix.
        Parameters:
        dataFrame - the Spark DataFrame
        matrixMetadata - the matrix metadata
        Returns:
        the DataFrame matrix converted to a JavaPairRDD<MatrixIndexes, MatrixBlock> binary-block matrix
      • dataFrameToFrameBinaryBlocks

        public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToFrameBinaryBlocks(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame,
                                                                                                          FrameMetadata frameMetadata)
        Convert a DataFrame to a JavaPairRDD<Long, FrameBlock> binary-block frame.
        Parameters:
        dataFrame - the Spark DataFrame
        frameMetadata - the frame metadata
        Returns:
        the DataFrame matrix converted to a JavaPairRDD<Long, FrameBlock> binary-block frame
      • determineMatrixFormatIfNeeded

        public static void determineMatrixFormatIfNeeded(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame,
                                                         MatrixMetadata matrixMetadata)
        If the MatrixFormat of the DataFrame has not been explicitly specified, attempt to determine the proper MatrixFormat.
        Parameters:
        dataFrame - the Spark DataFrame
        matrixMetadata - the matrix metadata, if available
      • determineFrameFormatIfNeeded

        public static void determineFrameFormatIfNeeded(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataFrame,
                                                        FrameMetadata frameMetadata)
        If the FrameFormat of the DataFrame has not been explicitly specified, attempt to determine the proper FrameFormat.
        Parameters:
        dataFrame - the Spark DataFrame
        frameMetadata - the frame metadata, if available
      • isDataFrameWithIDColumn

        public static boolean isDataFrameWithIDColumn(MatrixMetadata matrixMetadata)
        Return whether or not the DataFrame has an ID column.
        Parameters:
        matrixMetadata - the matrix metadata
        Returns:
        true if the DataFrame has an ID column, false otherwise.
      • isDataFrameWithIDColumn

        public static boolean isDataFrameWithIDColumn(FrameMetadata frameMetadata)
        Return whether or not the DataFrame has an ID column.
        Parameters:
        frameMetadata - the frame metadata
        Returns:
        true if the DataFrame has an ID column, false otherwise.
      • isVectorBasedDataFrame

        public static boolean isVectorBasedDataFrame(MatrixMetadata matrixMetadata)
        Return whether or not the DataFrame is vector-based.
        Parameters:
        matrixMetadata - the matrix metadata
        Returns:
        true if the DataFrame is vector-based, false otherwise.
      • javaRDDStringCSVToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject javaRDDStringCSVToMatrixObject(String variableName,
                                                                                                                  org.apache.spark.api.java.JavaRDD<String> javaRDD)
        Convert a JavaRDD<String> in CSV format to a MatrixObject
        Parameters:
        variableName - name of the variable associated with the matrix
        javaRDD - the Java RDD of strings
        Returns:
        the JavaRDD<String> converted to a MatrixObject
      • javaRDDStringCSVToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject javaRDDStringCSVToMatrixObject(String variableName,
                                                                                                                  org.apache.spark.api.java.JavaRDD<String> javaRDD,
                                                                                                                  MatrixMetadata matrixMetadata)
        Convert a JavaRDD<String> in CSV format to a MatrixObject
        Parameters:
        variableName - name of the variable associated with the matrix
        javaRDD - the Java RDD of strings
        matrixMetadata - matrix metadata
        Returns:
        the JavaRDD<String> converted to a MatrixObject
      • javaRDDStringCSVToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject javaRDDStringCSVToFrameObject(String variableName,
                                                                                                                org.apache.spark.api.java.JavaRDD<String> javaRDD)
        Convert a JavaRDD<String> in CSV format to a FrameObject
        Parameters:
        variableName - name of the variable associated with the frame
        javaRDD - the Java RDD of strings
        Returns:
        the JavaRDD<String> converted to a FrameObject
      • javaRDDStringCSVToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject javaRDDStringCSVToFrameObject(String variableName,
                                                                                                                org.apache.spark.api.java.JavaRDD<String> javaRDD,
                                                                                                                FrameMetadata frameMetadata)
        Convert a JavaRDD<String> in CSV format to a FrameObject
        Parameters:
        variableName - name of the variable associated with the frame
        javaRDD - the Java RDD of strings
        frameMetadata - frame metadata
        Returns:
        the JavaRDD<String> converted to a FrameObject
      • javaRDDStringIJVToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject javaRDDStringIJVToMatrixObject(String variableName,
                                                                                                                  org.apache.spark.api.java.JavaRDD<String> javaRDD,
                                                                                                                  MatrixMetadata matrixMetadata)
        Convert a JavaRDD<String> in IJV format to a MatrixObject . Note that metadata is required for IJV format.
        Parameters:
        variableName - name of the variable associated with the matrix
        javaRDD - the Java RDD of strings
        matrixMetadata - matrix metadata
        Returns:
        the JavaRDD<String> converted to a MatrixObject
      • javaRDDStringIJVToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject javaRDDStringIJVToFrameObject(String variableName,
                                                                                                                org.apache.spark.api.java.JavaRDD<String> javaRDD,
                                                                                                                FrameMetadata frameMetadata)
        Convert a JavaRDD<String> in IJV format to a FrameObject . Note that metadata is required for IJV format.
        Parameters:
        variableName - name of the variable associated with the frame
        javaRDD - the Java RDD of strings
        frameMetadata - frame metadata
        Returns:
        the JavaRDD<String> converted to a FrameObject
      • rddStringCSVToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject rddStringCSVToMatrixObject(String variableName,
                                                                                                              org.apache.spark.rdd.RDD<String> rdd)
        Convert a RDD<String> in CSV format to a MatrixObject
        Parameters:
        variableName - name of the variable associated with the matrix
        rdd - the RDD of strings
        Returns:
        the RDD<String> converted to a MatrixObject
      • rddStringCSVToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject rddStringCSVToMatrixObject(String variableName,
                                                                                                              org.apache.spark.rdd.RDD<String> rdd,
                                                                                                              MatrixMetadata matrixMetadata)
        Convert a RDD<String> in CSV format to a MatrixObject
        Parameters:
        variableName - name of the variable associated with the matrix
        rdd - the RDD of strings
        matrixMetadata - matrix metadata
        Returns:
        the RDD<String> converted to a MatrixObject
      • rddStringCSVToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject rddStringCSVToFrameObject(String variableName,
                                                                                                            org.apache.spark.rdd.RDD<String> rdd)
        Convert a RDD<String> in CSV format to a FrameObject
        Parameters:
        variableName - name of the variable associated with the frame
        rdd - the RDD of strings
        Returns:
        the RDD<String> converted to a FrameObject
      • rddStringCSVToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject rddStringCSVToFrameObject(String variableName,
                                                                                                            org.apache.spark.rdd.RDD<String> rdd,
                                                                                                            FrameMetadata frameMetadata)
        Convert a RDD<String> in CSV format to a FrameObject
        Parameters:
        variableName - name of the variable associated with the frame
        rdd - the RDD of strings
        frameMetadata - frame metadata
        Returns:
        the RDD<String> converted to a FrameObject
      • rddStringIJVToMatrixObject

        public static org.apache.sysml.runtime.controlprogram.caching.MatrixObject rddStringIJVToMatrixObject(String variableName,
                                                                                                              org.apache.spark.rdd.RDD<String> rdd,
                                                                                                              MatrixMetadata matrixMetadata)
        Convert a RDD<String> in IJV format to a MatrixObject. Note that metadata is required for IJV format.
        Parameters:
        variableName - name of the variable associated with the matrix
        rdd - the RDD of strings
        matrixMetadata - matrix metadata
        Returns:
        the RDD<String> converted to a MatrixObject
      • rddStringIJVToFrameObject

        public static org.apache.sysml.runtime.controlprogram.caching.FrameObject rddStringIJVToFrameObject(String variableName,
                                                                                                            org.apache.spark.rdd.RDD<String> rdd,
                                                                                                            FrameMetadata frameMetadata)
        Convert a RDD<String> in IJV format to a FrameObject. Note that metadata is required for IJV format.
        Parameters:
        variableName - name of the variable associated with the frame
        rdd - the RDD of strings
        frameMetadata - frame metadata
        Returns:
        the RDD<String> converted to a FrameObject
      • matrixObjectToJavaRDDStringCSV

        public static org.apache.spark.api.java.JavaRDD<String> matrixObjectToJavaRDDStringCSV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
        Convert a MatrixObject to a JavaRDD<String> in CSV format.
        Parameters:
        matrixObject - the MatrixObject
        Returns:
        the MatrixObject converted to a JavaRDD<String>
      • frameObjectToJavaRDDStringCSV

        public static org.apache.spark.api.java.JavaRDD<String> frameObjectToJavaRDDStringCSV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject,
                                                                                              String delimiter)
        Convert a FrameObject to a JavaRDD<String> in CSV format.
        Parameters:
        frameObject - the FrameObject
        delimiter - the delimiter
        Returns:
        the FrameObject converted to a JavaRDD<String>
      • matrixObjectToJavaRDDStringIJV

        public static org.apache.spark.api.java.JavaRDD<String> matrixObjectToJavaRDDStringIJV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
        Convert a MatrixObject to a JavaRDD<String> in IJV format.
        Parameters:
        matrixObject - the MatrixObject
        Returns:
        the MatrixObject converted to a JavaRDD<String>
      • frameObjectToJavaRDDStringIJV

        public static org.apache.spark.api.java.JavaRDD<String> frameObjectToJavaRDDStringIJV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject)
        Convert a FrameObject to a JavaRDD<String> in IJV format.
        Parameters:
        frameObject - the FrameObject
        Returns:
        the FrameObject converted to a JavaRDD<String>
      • matrixObjectToRDDStringIJV

        public static org.apache.spark.rdd.RDD<String> matrixObjectToRDDStringIJV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
        Convert a MatrixObject to a RDD<String> in IJV format.
        Parameters:
        matrixObject - the MatrixObject
        Returns:
        the MatrixObject converted to a RDD<String>
      • frameObjectToRDDStringIJV

        public static org.apache.spark.rdd.RDD<String> frameObjectToRDDStringIJV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject)
        Convert a FrameObject to a RDD<String> in IJV format.
        Parameters:
        frameObject - the FrameObject
        Returns:
        the FrameObject converted to a RDD<String>
      • matrixObjectToRDDStringCSV

        public static org.apache.spark.rdd.RDD<String> matrixObjectToRDDStringCSV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
        Convert a MatrixObject to a RDD<String> in CSV format.
        Parameters:
        matrixObject - the MatrixObject
        Returns:
        the MatrixObject converted to a RDD<String>
      • frameObjectToRDDStringCSV

        public static org.apache.spark.rdd.RDD<String> frameObjectToRDDStringCSV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject,
                                                                                 String delimiter)
        Convert a FrameObject to a RDD<String> in CSV format.
        Parameters:
        frameObject - the FrameObject
        delimiter - the delimiter
        Returns:
        the FrameObject converted to a RDD<String>
      • matrixObjectToListStringCSV

        public static List<String> matrixObjectToListStringCSV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
        Convert a MatrixObject to a List<String> in CSV format.
        Parameters:
        matrixObject - the MatrixObject
        Returns:
        the MatrixObject converted to a List<String>
      • frameObjectToListStringCSV

        public static List<String> frameObjectToListStringCSV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject,
                                                              String delimiter)
        Convert a FrameObject to a List<String> in CSV format.
        Parameters:
        frameObject - the FrameObject
        delimiter - the delimiter
        Returns:
        the FrameObject converted to a List<String>
      • matrixObjectToListStringIJV

        public static List<String> matrixObjectToListStringIJV(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
        Convert a MatrixObject to a List<String> in IJV format.
        Parameters:
        matrixObject - the MatrixObject
        Returns:
        the MatrixObject converted to a List<String>
      • frameObjectToListStringIJV

        public static List<String> frameObjectToListStringIJV(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject)
        Convert a FrameObject to a List<String> in IJV format.
        Parameters:
        frameObject - the FrameObject
        Returns:
        the FrameObject converted to a List<String>
      • matrixObjectTo2DDoubleArray

        public static double[][] matrixObjectTo2DDoubleArray(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject)
        Convert a MatrixObject to a two-dimensional double array.
        Parameters:
        matrixObject - the MatrixObject
        Returns:
        the MatrixObject converted to a double[][]
      • matrixObjectToDataFrame

        public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> matrixObjectToDataFrame(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject,
                                                                                                     org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext sparkExecutionContext,
                                                                                                     boolean isVectorDF)
        Convert a MatrixObject to a DataFrame.
        Parameters:
        matrixObject - the MatrixObject
        sparkExecutionContext - the Spark execution context
        isVectorDF - is the DataFrame a vector DataFrame?
        Returns:
        the MatrixObject converted to a DataFrame
      • frameObjectToDataFrame

        public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> frameObjectToDataFrame(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject,
                                                                                                    org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext sparkExecutionContext)
        Convert a FrameObject to a DataFrame.
        Parameters:
        frameObject - the FrameObject
        sparkExecutionContext - the Spark execution context
        Returns:
        the FrameObject converted to a DataFrame
      • matrixObjectToBinaryBlocks

        public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> matrixObjectToBinaryBlocks(org.apache.sysml.runtime.controlprogram.caching.MatrixObject matrixObject,
                                                                                                                  org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext sparkExecutionContext)
        Convert a MatrixObject to a JavaPairRDD<MatrixIndexes, MatrixBlock>.
        Parameters:
        matrixObject - the MatrixObject
        sparkExecutionContext - the Spark execution context
        Returns:
        the MatrixObject converted to a JavaPairRDD<MatrixIndexes, MatrixBlock>
      • frameObjectToBinaryBlocks

        public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> frameObjectToBinaryBlocks(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject,
                                                                                                       org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext sparkExecutionContext)
        Convert a FrameObject to a JavaPairRDD<Long, FrameBlock>.
        Parameters:
        frameObject - the FrameObject
        sparkExecutionContext - the Spark execution context
        Returns:
        the FrameObject converted to a JavaPairRDD<Long, FrameBlock>
      • frameObjectTo2DStringArray

        public static String[][] frameObjectTo2DStringArray(org.apache.sysml.runtime.controlprogram.caching.FrameObject frameObject)
        Convert a FrameObject to a two-dimensional string array.
        Parameters:
        frameObject - the FrameObject
        Returns:
        the FrameObject converted to a String[][]
      • jsc

        public static org.apache.spark.api.java.JavaSparkContext jsc()
        Obtain JavaSparkContext from MLContextProxy.
        Returns:
        the Java Spark Context
      • sc

        public static org.apache.spark.SparkContext sc()
        Obtain SparkContext from MLContextProxy.
        Returns:
        the Spark Context
      • spark

        public static org.apache.spark.sql.SparkSession spark()
        Obtain SparkSession from MLContextProxy.
        Returns:
        the Spark Session

Copyright © 2017 The Apache Software Foundation. All rights reserved.