Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions bin/systemds-standalone.sh
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file should not be part of the PR. You can keep it locally but you should untrack it and not add it to your commits. You could use git rm --cached bin/systemds-standalone.sh.

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
# Standalone-Launcher für SystemDS

SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
JAR_FILE="$SCRIPT_DIR/../target/systemds-3.4.0-SNAPSHOT.jar"

if [ ! -f "$JAR_FILE" ]; then
echo "ERROR: Standalone JAR nicht gefunden: $JAR_FILE"
exit 1
fi

java -cp "$JAR_FILE" org.apache.sysds.api.DMLScript "$@"
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,16 @@ public class CompressionSettings {
/**
* The sampling ratio used when choosing ColGroups. Note that, default behavior is to use exact estimator if the
* number of elements is below 1000.
*
*
* DEPRECATED
*/
public final double samplingRatio;

/**
* The sampling ratio power to use when choosing sample size. This is used in accordance to the function:
*
*
* sampleSize += nRows^samplePower;
*
*
* The value is bounded to be in the range of 0 to 1, 1 giving a sample size of everything, and 0 adding 1.
*/
public final double samplePower;
Expand Down Expand Up @@ -114,8 +114,9 @@ public class CompressionSettings {
/**
* Transpose input matrix, to optimize access when extracting bitmaps. This setting is changed inside the script
* based on the transposeInput setting.
*
* This is intentionally left as a mutable value, since the transposition of the input matrix is decided in phase 3.
*
* This is intentionally left as a mutable value, since the transposition of the input matrix is decided in phase
* 3.
*/
public boolean transposed = false;

Expand All @@ -135,6 +136,19 @@ public class CompressionSettings {

public final boolean preferDeltaEncoding;

// Handling Targetloss for piecewise linear Kompression

private double piecewiseTargetLoss = Double.NaN;

public void setPiecewiseTargetLoss(double piecewiseTargetLoss) {
this.piecewiseTargetLoss = piecewiseTargetLoss;

}

public double getPiecewiseTargetLoss() {
return piecewiseTargetLoss;
}

protected CompressionSettings(double samplingRatio, double samplePower, boolean allowSharedDictionary,
String transposeInput, int seed, boolean lossy, EnumSet<CompressionType> validCompressions,
boolean sortValuesByLength, PartitionerType columnPartitioner, int maxColGroupCoCode, double coCodePercentage,
Expand All @@ -161,7 +175,7 @@ protected CompressionSettings(double samplingRatio, double samplePower, boolean
this.sdcSortType = sdcSortType;
this.scaleFactors = scaleFactors;
this.preferDeltaEncoding = preferDeltaEncoding;

if(!printedStatus && LOG.isDebugEnabled()) {
printedStatus = true;
LOG.debug(this.toString());
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like you reformatted the file to revert the tabs -> spaces conversion, which is good. However, there are still many unnecessary changes. I would recommend you revert that file to the original state of this repository and then only add the enum CompressionType PiecewiseLinear

Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public abstract class AColGroup implements Serializable {

/** Public super types of compression ColGroups supported */
public static enum CompressionType {
UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, LinearFunctional;
UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, LinearFunctional, PiecewiseLinear;

public boolean isDense() {
return this == DDC || this == CONST || this == DDCFOR || this == DDCFOR;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory;
import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
import org.apache.sysds.runtime.compress.colgroup.functional.LinearRegression;
import org.apache.sysds.runtime.compress.colgroup.functional.PiecewiseLinearUtils;
import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
import org.apache.sysds.runtime.compress.colgroup.insertionsort.AInsertionSorter;
Expand Down Expand Up @@ -106,7 +107,7 @@ private ColGroupFactory(MatrixBlock in, CompressedSizeInfo csi, CompressionSetti

/**
* The actual compression method, that handles the logic of compressing multiple columns together.
*
*
* @param in The input matrix, that could have been transposed. If it is transposed the compSettings should specify
* this.
* @param csi The compression information extracted from the estimation, this contains which groups of columns to
Expand All @@ -120,7 +121,7 @@ public static List<AColGroup> compressColGroups(MatrixBlock in, CompressedSizeIn

/**
* The actual compression method, that handles the logic of compressing multiple columns together.
*
*
* @param in The input matrix, that could have been transposed. If it is transposed the compSettings should specify
* this.
* @param csi The compression information extracted from the estimation, this contains which groups of columns to
Expand All @@ -135,7 +136,7 @@ public static List<AColGroup> compressColGroups(MatrixBlock in, CompressedSizeIn
}

/**
*
*
* @param in The input matrix, that could have been transposed. If it is transposed the compSettings should specify
* this.
* @param csi The compression information extracted from the estimation, this contains which groups of columns to
Expand Down Expand Up @@ -232,8 +233,9 @@ private void logEstVsActual(double time, AColGroup act, CompressedSizeInfoColGro
time, retType, estC, actC, act.getNumValues(), cols, wanted, warning));
}
else {
LOG.debug(String.format("time[ms]: %10.2f %25s est %10.0f -- act %10.0f distinct:%5d cols:%s wanted:%s",
time, retType, estC, actC, act.getNumValues(), cols, wanted));
LOG.debug(
String.format("time[ms]: %10.2f %25s est %10.0f -- act %10.0f distinct:%5d cols:%s wanted:%s", time,
retType, estC, actC, act.getNumValues(), cols, wanted));
}

}
Expand Down Expand Up @@ -303,6 +305,9 @@ else if(ct == CompressionType.LinearFunctional) {
return compressLinearFunctional(colIndexes, in, cs);
}
}
else if(ct == CompressionType.PiecewiseLinear) {
return compressPiecewiseLinearFunctional(colIndexes, in, cs);
}
else if(ct == CompressionType.DDCFOR) {
AColGroup g = directCompressDDC(colIndexes, cg);
if(g instanceof ColGroupDDC)
Expand Down Expand Up @@ -698,7 +703,7 @@ private AColGroup directCompressDeltaDDC(IColIndex colIndexes, CompressedSizeInf
if(cs.scaleFactors != null) {
throw new NotImplementedException("Delta encoding with quantization not yet implemented");
}

if(colIndexes.size() > 1) {
return directCompressDeltaDDCMultiCol(colIndexes, cg);
}
Expand Down Expand Up @@ -730,7 +735,7 @@ private AColGroup directCompressDeltaDDCSingleCol(IColIndex colIndexes, Compress

if(map.size() == 0)
return new ColGroupEmpty(colIndexes);

final double[] dictValues = map.getDictionary();
IDictionary dict = new DeltaDictionary(dictValues, 1);

Expand All @@ -739,7 +744,8 @@ private AColGroup directCompressDeltaDDCSingleCol(IColIndex colIndexes, Compress
return ColGroupDeltaDDC.create(colIndexes, dict, resData, null);
}

private AColGroup directCompressDeltaDDCMultiCol(IColIndex colIndexes, CompressedSizeInfoColGroup cg) throws Exception {
private AColGroup directCompressDeltaDDCMultiCol(IColIndex colIndexes, CompressedSizeInfoColGroup cg)
throws Exception {
final AMapToData d = MapToFactory.create(nRow, Math.max(Math.min(cg.getNumOffs() + 1, nRow), 126));
final int fill = d.getUpperBoundValue();
d.fill(fill);
Expand Down Expand Up @@ -818,8 +824,8 @@ private boolean readToMapDDC(IColIndex colIndexes, DblArrayCountHashMap map, AMa
int fill) {

ReaderColumnSelection reader = (cs.scaleFactors == null) ? ReaderColumnSelection.createReader(in, colIndexes,
cs.transposed, rl,
ru) : ReaderColumnSelection.createQuantizedReader(in, colIndexes, cs.transposed, rl, ru, cs.scaleFactors);
cs.transposed, rl, ru) : ReaderColumnSelection.createQuantizedReader(in, colIndexes, cs.transposed, rl, ru,
cs.scaleFactors);

DblArray cellVals = reader.nextRow();
boolean extra = false;
Expand Down Expand Up @@ -1066,6 +1072,44 @@ private static AColGroup compressLinearFunctional(IColIndex colIndexes, MatrixBl
return ColGroupLinearFunctional.create(colIndexes, coefficients, numRows);
}

public static AColGroup compressPiecewiseLinearFunctional(
IColIndex colIndexes, MatrixBlock in, CompressionSettings cs) {

final int numRows = in.getNumRows();
AColGroup result = null;

//Compress every column
for (int col = 0; col < colIndexes.size(); col++) {
// get Column Index
IColIndex.SliceResult sliceResult = colIndexes.slice(col, col + 1);
IColIndex singleColIndex = sliceResult.ret; // ← .ret nötig!

// Get Column from Matrix
final int colIdx = colIndexes.get(col);
double[] column = PiecewiseLinearUtils.getColumn(in, colIdx);

//Compress column
PiecewiseLinearUtils.SegmentedRegression fit =
PiecewiseLinearUtils.compressSegmentedLeastSquares(column, cs);

AColGroup singleGroup = ColGroupPiecewiseLinearCompressed.create(
singleColIndex,
fit.getBreakpoints(),
fit.getSlopes(),
fit.getIntercepts(),
numRows);

// Combine multiple columns
if (result == null) {
result = singleGroup;
} else {
result = result.combineWithSameIndex(numRows, col, singleGroup);
}
}

return result;
}

private AColGroup compressSDCFromSparseTransposedBlock(IColIndex cols, int nrUniqueEstimate, double tupleSparsity) {
if(cols.size() > 1)
return compressMultiColSDCFromSparseTransposedBlock(cols, nrUniqueEstimate, tupleSparsity);
Expand Down
Loading