Upload 11 files

Browse files

Files changed (11) hide show

README.md +47 -0
added_tokens.json +3 -0
config.json +38 -0
generation_config.json +7 -0
job_new.json +0 -0
measurement.json +0 -0
output.safetensors +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +51 -0

README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+---
+language:
+- en
+license: apache-2.0
+tags:
+- text-generation-inference
+- transformers
+- unsloth
+- mistral
+- trl
+- sft
+base_model: augmxnt/shisa-base-7b-v1
+datasets:
+- NilanE/ParallelFiction-Ja_En-100k
+- mpasila/ParallelFiction-Ja_En-100k-alpaca
+---
+This is an ExLlamaV2 quantized model in 4bpw of [mpasila/JP-EN-Translator-1K-steps-7B-merged](https://huggingface.co/mpasila/JP-EN-Translator-1K-steps-7B-merged) using the default calibration dataset.
+# Original Model card
+Experimental model, may not perform that well. Dataset used is [a modified](https://huggingface.co/datasets/mpasila/ParallelFiction-Ja_En-100k-alpaca) version of [NilanE/ParallelFiction-Ja_En-100k](https://huggingface.co/datasets/NilanE/ParallelFiction-Ja_En-100k).
+Next version should be better (I'll use a GPU with more memory since the dataset happens to use pretty long samples).
+### Prompt format: Alpaca
+```
+Below is a translation task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+{}
+### Input:
+{}
+### Response:
+{}
+```
+# Uploaded  model
+- **Developed by:** mpasila
+- **License:** apache-2.0
+- **Finetuned from model :** augmxnt/shisa-base-7b-v1
+This mistral model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
+[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<|extra_idx|>": 120073
+}

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+    "_name_or_path": "augmxnt/shisa-base-7b-v1",
+    "architectures": [
+        "MistralForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 1,
+    "eos_token_id": 2,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "initializer_range": 0.02,
+    "intermediate_size": 14336,
+    "max_position_embeddings": 32768,
+    "model_type": "mistral",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 8,
+    "rms_norm_eps": 1e-05,
+    "rope_theta": 10000.0,
+    "sliding_window": 4096,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.39.1",
+    "unsloth_version": "2024.3",
+    "use_cache": false,
+    "vocab_size": 120074,
+    "quantization_config": {
+        "quant_method": "exl2",
+        "version": "0.0.16",
+        "bits": 4.0,
+        "head_bits": 6,
+        "calibration": {
+            "rows": 100,
+            "length": 2048,
+            "dataset": "(default)"
+        }
+    }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.39.1",
+  "use_cache": false
+}

job_new.json ADDED Viewed

The diff for this file is too large to render. See raw diff

measurement.json ADDED Viewed

The diff for this file is too large to render. See raw diff

output.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69297ddca1f76aba083e8f3bf1c310886415507a98179e8431aa48b88f179d48
+size 4863127892

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "120073": {
+      "content": "<|extra_idx|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 32768,
+  "pad_token": "<unk>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}