Support recent latest Torch versions
#3
by
danieldk
HF Staff
- opened
This view is limited to 50 files because it contains too many changes.
See the raw diff here.
- build.toml +5 -1
- build/torch210-cxx11-cu126-x86_64-linux/__init__.py +48 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/_ops.py +3 -3
- build/{torch28-cxx11-cu126-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so → torch210-cxx11-cu126-x86_64-linux/_quantization_e384bb2.abi3.so} +2 -2
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/compressed_tensors.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/cutlass.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/marlin.py +0 -0
- build/torch210-cxx11-cu126-x86_64-linux/metadata.json +3 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/platforms.py +0 -0
- build/torch210-cxx11-cu126-x86_64-linux/quantization/__init__.py +26 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/scalar_type.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/__init__.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_fp4.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_fp8.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test_24.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test_qqq.py +0 -0
- build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/quant_utils.py +0 -0
- build/torch210-cxx11-cu128-x86_64-linux/__init__.py +48 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/_ops.py +3 -3
- build/{torch28-cxx11-cu128-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so → torch210-cxx11-cu128-x86_64-linux/_quantization_e384bb2.abi3.so} +2 -2
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/compressed_tensors.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/cutlass.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/marlin.py +0 -0
- build/torch210-cxx11-cu128-x86_64-linux/metadata.json +3 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/platforms.py +0 -0
- build/torch210-cxx11-cu128-x86_64-linux/quantization/__init__.py +26 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/scalar_type.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/__init__.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_fp4.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_fp8.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test_24.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test_qqq.py +0 -0
- build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/quant_utils.py +0 -0
- build/torch210-cxx11-cu130-x86_64-linux/__init__.py +48 -0
- build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/_ops.py +3 -3
- build/{torch28-cxx11-cu129-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so → torch210-cxx11-cu130-x86_64-linux/_quantization_e384bb2.abi3.so} +2 -2
- build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/compressed_tensors.py +0 -0
- build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/cutlass.py +0 -0
- build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/marlin.py +0 -0
- build/torch210-cxx11-cu130-x86_64-linux/metadata.json +3 -0
- build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/platforms.py +0 -0
- build/torch210-cxx11-cu130-x86_64-linux/quantization/__init__.py +26 -0
- build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/scalar_type.py +0 -0
- build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/utils/__init__.py +0 -0
- build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/utils/marlin_utils.py +0 -0
- build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/utils/marlin_utils_fp4.py +0 -0
build.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
[general]
|
| 2 |
name = "quantization"
|
| 3 |
-
|
| 4 |
|
| 5 |
[torch]
|
| 6 |
include = ["."]
|
|
@@ -22,6 +22,7 @@ cuda-capabilities = [
|
|
| 22 |
"10.1",
|
| 23 |
"12.0",
|
| 24 |
]
|
|
|
|
| 25 |
depends = ["torch"]
|
| 26 |
include = ["."]
|
| 27 |
src = [
|
|
@@ -95,6 +96,7 @@ depends = ["torch"]
|
|
| 95 |
include = ["."]
|
| 96 |
src = [
|
| 97 |
"compressed_tensors/int8_quant_kernels.cu",
|
|
|
|
| 98 |
"dispatch_utils.h",
|
| 99 |
"vectorization_utils.cuh",
|
| 100 |
]
|
|
@@ -119,6 +121,7 @@ include = ["."]
|
|
| 119 |
src = [
|
| 120 |
"fp8/common.cu",
|
| 121 |
"fp8/common.cuh",
|
|
|
|
| 122 |
"dispatch_utils.h",
|
| 123 |
"utils.cuh",
|
| 124 |
"vectorization.cuh",
|
|
@@ -229,6 +232,7 @@ cuda-capabilities = [
|
|
| 229 |
"10.1",
|
| 230 |
"12.0",
|
| 231 |
]
|
|
|
|
| 232 |
depends = ["torch"]
|
| 233 |
include = ["."]
|
| 234 |
src = [
|
|
|
|
| 1 |
[general]
|
| 2 |
name = "quantization"
|
| 3 |
+
backends = ["cuda"]
|
| 4 |
|
| 5 |
[torch]
|
| 6 |
include = ["."]
|
|
|
|
| 22 |
"10.1",
|
| 23 |
"12.0",
|
| 24 |
]
|
| 25 |
+
cuda-flags = ["-static-global-template-stub=false"]
|
| 26 |
depends = ["torch"]
|
| 27 |
include = ["."]
|
| 28 |
src = [
|
|
|
|
| 96 |
include = ["."]
|
| 97 |
src = [
|
| 98 |
"compressed_tensors/int8_quant_kernels.cu",
|
| 99 |
+
"cub_helpers.h",
|
| 100 |
"dispatch_utils.h",
|
| 101 |
"vectorization_utils.cuh",
|
| 102 |
]
|
|
|
|
| 121 |
src = [
|
| 122 |
"fp8/common.cu",
|
| 123 |
"fp8/common.cuh",
|
| 124 |
+
"cub_helpers.h",
|
| 125 |
"dispatch_utils.h",
|
| 126 |
"utils.cuh",
|
| 127 |
"vectorization.cuh",
|
|
|
|
| 232 |
"10.1",
|
| 233 |
"12.0",
|
| 234 |
]
|
| 235 |
+
cuda-flags = ["-static-global-template-stub=false"]
|
| 236 |
depends = ["torch"]
|
| 237 |
include = ["."]
|
| 238 |
src = [
|
build/torch210-cxx11-cu126-x86_64-linux/__init__.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .compressed_tensors import scaled_fp8_quant, scaled_int8_quant
|
| 2 |
+
from .cutlass import (
|
| 3 |
+
cutlass_scaled_mm_supports_block_fp8,
|
| 4 |
+
cutlass_scaled_mm_supports_fp8,
|
| 5 |
+
cutlass_scaled_mm,
|
| 6 |
+
cutlass_scaled_mm_azp,
|
| 7 |
+
)
|
| 8 |
+
from .marlin import (
|
| 9 |
+
awq_marlin_repack,
|
| 10 |
+
gptq_marlin_gemm,
|
| 11 |
+
gptq_marlin_repack,
|
| 12 |
+
gptq_marlin_24_gemm,
|
| 13 |
+
marlin_qqq_gemm,
|
| 14 |
+
marlin_gemm,
|
| 15 |
+
)
|
| 16 |
+
from .scalar_type import (
|
| 17 |
+
ScalarType,
|
| 18 |
+
scalar_types,
|
| 19 |
+
)
|
| 20 |
+
from ._ops import ops
|
| 21 |
+
|
| 22 |
+
from .utils import marlin_utils
|
| 23 |
+
from .utils import marlin_utils_fp4
|
| 24 |
+
from .utils import marlin_utils_fp8
|
| 25 |
+
from .utils import quant_utils
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
__all__ = [
|
| 29 |
+
"ScalarType",
|
| 30 |
+
"awq_marlin_repack",
|
| 31 |
+
"cutlass_scaled_mm",
|
| 32 |
+
"cutlass_scaled_mm_azp",
|
| 33 |
+
"cutlass_scaled_mm_supports_block_fp8",
|
| 34 |
+
"cutlass_scaled_mm_supports_fp8",
|
| 35 |
+
"gptq_marlin_24_gemm",
|
| 36 |
+
"gptq_marlin_gemm",
|
| 37 |
+
"gptq_marlin_repack",
|
| 38 |
+
"marlin_gemm",
|
| 39 |
+
"marlin_qqq_gemm",
|
| 40 |
+
"marlin_utils",
|
| 41 |
+
"marlin_utils_fp4",
|
| 42 |
+
"marlin_utils_fp8",
|
| 43 |
+
"ops",
|
| 44 |
+
"quant_utils",
|
| 45 |
+
"scalar_types",
|
| 46 |
+
"scaled_fp8_quant",
|
| 47 |
+
"scaled_int8_quant",
|
| 48 |
+
]
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/_ops.py
RENAMED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _quantization_e384bb2
|
| 3 |
+
ops = torch.ops._quantization_e384bb2
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_quantization_e384bb2::{op_name}"
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so → torch210-cxx11-cu126-x86_64-linux/_quantization_e384bb2.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85e946af129d0f8ad6cd758d0270a5ff336a6cba021626d72b81e40529b55fc9
|
| 3 |
+
size 196115272
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/compressed_tensors.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/cutlass.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/marlin.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu126-x86_64-linux/metadata.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python-depends": []
|
| 3 |
+
}
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/platforms.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu126-x86_64-linux/quantization/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ctypes
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import importlib
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from types import ModuleType
|
| 7 |
+
|
| 8 |
+
def _import_from_path(file_path: Path) -> ModuleType:
|
| 9 |
+
# We cannot use the module name as-is, after adding it to `sys.modules`,
|
| 10 |
+
# it would also be used for other imports. So, we make a module name that
|
| 11 |
+
# depends on the path for it to be unique using the hex-encoded hash of
|
| 12 |
+
# the path.
|
| 13 |
+
path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
|
| 14 |
+
module_name = path_hash
|
| 15 |
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
| 16 |
+
if spec is None:
|
| 17 |
+
raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
|
| 18 |
+
module = importlib.util.module_from_spec(spec)
|
| 19 |
+
if module is None:
|
| 20 |
+
raise ImportError(f"Cannot load module {module_name} from spec")
|
| 21 |
+
sys.modules[module_name] = module
|
| 22 |
+
spec.loader.exec_module(module) # type: ignore
|
| 23 |
+
return module
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/scalar_type.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/__init__.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_fp4.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_fp8.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test_24.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test_qqq.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu126-x86_64-linux/quantization → torch210-cxx11-cu126-x86_64-linux}/utils/quant_utils.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu128-x86_64-linux/__init__.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .compressed_tensors import scaled_fp8_quant, scaled_int8_quant
|
| 2 |
+
from .cutlass import (
|
| 3 |
+
cutlass_scaled_mm_supports_block_fp8,
|
| 4 |
+
cutlass_scaled_mm_supports_fp8,
|
| 5 |
+
cutlass_scaled_mm,
|
| 6 |
+
cutlass_scaled_mm_azp,
|
| 7 |
+
)
|
| 8 |
+
from .marlin import (
|
| 9 |
+
awq_marlin_repack,
|
| 10 |
+
gptq_marlin_gemm,
|
| 11 |
+
gptq_marlin_repack,
|
| 12 |
+
gptq_marlin_24_gemm,
|
| 13 |
+
marlin_qqq_gemm,
|
| 14 |
+
marlin_gemm,
|
| 15 |
+
)
|
| 16 |
+
from .scalar_type import (
|
| 17 |
+
ScalarType,
|
| 18 |
+
scalar_types,
|
| 19 |
+
)
|
| 20 |
+
from ._ops import ops
|
| 21 |
+
|
| 22 |
+
from .utils import marlin_utils
|
| 23 |
+
from .utils import marlin_utils_fp4
|
| 24 |
+
from .utils import marlin_utils_fp8
|
| 25 |
+
from .utils import quant_utils
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
__all__ = [
|
| 29 |
+
"ScalarType",
|
| 30 |
+
"awq_marlin_repack",
|
| 31 |
+
"cutlass_scaled_mm",
|
| 32 |
+
"cutlass_scaled_mm_azp",
|
| 33 |
+
"cutlass_scaled_mm_supports_block_fp8",
|
| 34 |
+
"cutlass_scaled_mm_supports_fp8",
|
| 35 |
+
"gptq_marlin_24_gemm",
|
| 36 |
+
"gptq_marlin_gemm",
|
| 37 |
+
"gptq_marlin_repack",
|
| 38 |
+
"marlin_gemm",
|
| 39 |
+
"marlin_qqq_gemm",
|
| 40 |
+
"marlin_utils",
|
| 41 |
+
"marlin_utils_fp4",
|
| 42 |
+
"marlin_utils_fp8",
|
| 43 |
+
"ops",
|
| 44 |
+
"quant_utils",
|
| 45 |
+
"scalar_types",
|
| 46 |
+
"scaled_fp8_quant",
|
| 47 |
+
"scaled_int8_quant",
|
| 48 |
+
]
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/_ops.py
RENAMED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _quantization_e384bb2
|
| 3 |
+
ops = torch.ops._quantization_e384bb2
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_quantization_e384bb2::{op_name}"
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so → torch210-cxx11-cu128-x86_64-linux/_quantization_e384bb2.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e996222b90650b5074935088ed5cf27816148197c6dd75023d56a0c7b7f67d1
|
| 3 |
+
size 332877416
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/compressed_tensors.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/cutlass.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/marlin.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu128-x86_64-linux/metadata.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python-depends": []
|
| 3 |
+
}
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/platforms.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu128-x86_64-linux/quantization/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ctypes
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import importlib
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from types import ModuleType
|
| 7 |
+
|
| 8 |
+
def _import_from_path(file_path: Path) -> ModuleType:
|
| 9 |
+
# We cannot use the module name as-is, after adding it to `sys.modules`,
|
| 10 |
+
# it would also be used for other imports. So, we make a module name that
|
| 11 |
+
# depends on the path for it to be unique using the hex-encoded hash of
|
| 12 |
+
# the path.
|
| 13 |
+
path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
|
| 14 |
+
module_name = path_hash
|
| 15 |
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
| 16 |
+
if spec is None:
|
| 17 |
+
raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
|
| 18 |
+
module = importlib.util.module_from_spec(spec)
|
| 19 |
+
if module is None:
|
| 20 |
+
raise ImportError(f"Cannot load module {module_name} from spec")
|
| 21 |
+
sys.modules[module_name] = module
|
| 22 |
+
spec.loader.exec_module(module) # type: ignore
|
| 23 |
+
return module
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/scalar_type.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/__init__.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_fp4.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_fp8.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test_24.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test_qqq.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu128-x86_64-linux/quantization → torch210-cxx11-cu128-x86_64-linux}/utils/quant_utils.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu130-x86_64-linux/__init__.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .compressed_tensors import scaled_fp8_quant, scaled_int8_quant
|
| 2 |
+
from .cutlass import (
|
| 3 |
+
cutlass_scaled_mm_supports_block_fp8,
|
| 4 |
+
cutlass_scaled_mm_supports_fp8,
|
| 5 |
+
cutlass_scaled_mm,
|
| 6 |
+
cutlass_scaled_mm_azp,
|
| 7 |
+
)
|
| 8 |
+
from .marlin import (
|
| 9 |
+
awq_marlin_repack,
|
| 10 |
+
gptq_marlin_gemm,
|
| 11 |
+
gptq_marlin_repack,
|
| 12 |
+
gptq_marlin_24_gemm,
|
| 13 |
+
marlin_qqq_gemm,
|
| 14 |
+
marlin_gemm,
|
| 15 |
+
)
|
| 16 |
+
from .scalar_type import (
|
| 17 |
+
ScalarType,
|
| 18 |
+
scalar_types,
|
| 19 |
+
)
|
| 20 |
+
from ._ops import ops
|
| 21 |
+
|
| 22 |
+
from .utils import marlin_utils
|
| 23 |
+
from .utils import marlin_utils_fp4
|
| 24 |
+
from .utils import marlin_utils_fp8
|
| 25 |
+
from .utils import quant_utils
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
__all__ = [
|
| 29 |
+
"ScalarType",
|
| 30 |
+
"awq_marlin_repack",
|
| 31 |
+
"cutlass_scaled_mm",
|
| 32 |
+
"cutlass_scaled_mm_azp",
|
| 33 |
+
"cutlass_scaled_mm_supports_block_fp8",
|
| 34 |
+
"cutlass_scaled_mm_supports_fp8",
|
| 35 |
+
"gptq_marlin_24_gemm",
|
| 36 |
+
"gptq_marlin_gemm",
|
| 37 |
+
"gptq_marlin_repack",
|
| 38 |
+
"marlin_gemm",
|
| 39 |
+
"marlin_qqq_gemm",
|
| 40 |
+
"marlin_utils",
|
| 41 |
+
"marlin_utils_fp4",
|
| 42 |
+
"marlin_utils_fp8",
|
| 43 |
+
"ops",
|
| 44 |
+
"quant_utils",
|
| 45 |
+
"scalar_types",
|
| 46 |
+
"scaled_fp8_quant",
|
| 47 |
+
"scaled_int8_quant",
|
| 48 |
+
]
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/_ops.py
RENAMED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _quantization_e384bb2
|
| 3 |
+
ops = torch.ops._quantization_e384bb2
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_quantization_e384bb2::{op_name}"
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so → torch210-cxx11-cu130-x86_64-linux/_quantization_e384bb2.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f94359faa54a316eb410e331755a19c96c17e87d74ffae035ca8187d62f58891
|
| 3 |
+
size 289949016
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/compressed_tensors.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/cutlass.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/marlin.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu130-x86_64-linux/metadata.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python-depends": []
|
| 3 |
+
}
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/platforms.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu130-x86_64-linux/quantization/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ctypes
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import importlib
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from types import ModuleType
|
| 7 |
+
|
| 8 |
+
def _import_from_path(file_path: Path) -> ModuleType:
|
| 9 |
+
# We cannot use the module name as-is, after adding it to `sys.modules`,
|
| 10 |
+
# it would also be used for other imports. So, we make a module name that
|
| 11 |
+
# depends on the path for it to be unique using the hex-encoded hash of
|
| 12 |
+
# the path.
|
| 13 |
+
path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
|
| 14 |
+
module_name = path_hash
|
| 15 |
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
| 16 |
+
if spec is None:
|
| 17 |
+
raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
|
| 18 |
+
module = importlib.util.module_from_spec(spec)
|
| 19 |
+
if module is None:
|
| 20 |
+
raise ImportError(f"Cannot load module {module_name} from spec")
|
| 21 |
+
sys.modules[module_name] = module
|
| 22 |
+
spec.loader.exec_module(module) # type: ignore
|
| 23 |
+
return module
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/scalar_type.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/utils/__init__.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/utils/marlin_utils.py
RENAMED
|
File without changes
|
build/{torch28-cxx11-cu129-x86_64-linux/quantization → torch210-cxx11-cu130-x86_64-linux}/utils/marlin_utils_fp4.py
RENAMED
|
File without changes
|