Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -144,7 +144,7 @@ class HumanLikeVariations:
|
|
144 |
]
|
145 |
|
146 |
def add_human_touch(self, text):
|
147 |
-
"""Add subtle human-like imperfections - MORE
|
148 |
sentences = text.split('. ')
|
149 |
modified_sentences = []
|
150 |
|
@@ -152,80 +152,115 @@ class HumanLikeVariations:
|
|
152 |
if not sent.strip():
|
153 |
continue
|
154 |
|
155 |
-
#
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
sent = transition + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
|
159 |
|
160 |
-
# Add filler words occasionally (20% chance -
|
161 |
-
if random.random() < 0.2 and len(
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
for
|
166 |
-
if len(words)
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
sent = starter + " " + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
|
176 |
|
177 |
-
# Occasionally use contractions (35% chance
|
178 |
if random.random() < 0.35:
|
179 |
sent = self.apply_contractions(sent)
|
180 |
|
181 |
-
# Add occasional comma splices (10% chance) -
|
182 |
-
if random.random() < 0.1 and ',' in sent and len(
|
183 |
-
#
|
184 |
parts = sent.split(', ')
|
185 |
-
if len(parts)
|
186 |
-
|
187 |
-
parts[
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
"
|
218 |
-
|
219 |
-
"
|
220 |
-
|
221 |
-
"Sound familiar?"
|
222 |
-
|
223 |
-
|
224 |
-
"Following along?",
|
225 |
-
"Crazy, right?",
|
226 |
-
"Wild, isn't it?"
|
227 |
-
]
|
228 |
-
sent = sent + " " + random.choice(rhetorical_questions)
|
229 |
|
230 |
modified_sentences.append(sent)
|
231 |
|
@@ -261,27 +296,41 @@ class HumanLikeVariations:
|
|
261 |
return text
|
262 |
|
263 |
def add_minor_errors(self, text):
|
264 |
-
"""Add very minor, human-like errors - MORE REALISTIC"""
|
265 |
# Occasionally miss Oxford comma (15% chance)
|
266 |
if random.random() < 0.15:
|
267 |
-
|
|
|
268 |
|
269 |
# Sometimes use 'which' instead of 'that' (8% chance)
|
270 |
if random.random() < 0.08:
|
271 |
-
|
|
|
|
|
|
|
|
|
272 |
|
273 |
-
# NEW: Add very occasional typos (
|
274 |
sentences = text.split('. ')
|
275 |
for i, sent in enumerate(sentences):
|
276 |
-
if random.random() < 0.
|
277 |
words = sent.split()
|
278 |
# Pick a random word to potentially typo
|
279 |
-
word_idx = random.randint(
|
280 |
word = words[word_idx].lower()
|
281 |
|
282 |
-
# Only typo common words
|
283 |
-
|
284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
# Preserve original capitalization
|
286 |
if words[word_idx][0].isupper():
|
287 |
typo = typo[0].upper() + typo[1:]
|
@@ -290,28 +339,22 @@ class HumanLikeVariations:
|
|
290 |
|
291 |
text = '. '.join(sentences)
|
292 |
|
293 |
-
#
|
294 |
-
if random.random() < 0.02:
|
295 |
-
words = text.split()
|
296 |
-
if len(words) > 20:
|
297 |
-
# Pick a small common word to double
|
298 |
-
small_words = ['the', 'a', 'an', 'is', 'was', 'are', 'were', 'to', 'of', 'in', 'on']
|
299 |
-
for idx, word in enumerate(words):
|
300 |
-
if word.lower() in small_words and random.random() < 0.1:
|
301 |
-
words[idx] = word + ' ' + word
|
302 |
-
break
|
303 |
-
text = ' '.join(words)
|
304 |
|
305 |
-
#
|
306 |
-
if random.random() < 0.
|
307 |
-
|
308 |
-
('
|
309 |
-
('
|
310 |
]
|
311 |
-
for pair in
|
312 |
-
|
313 |
-
|
314 |
-
|
|
|
|
|
|
|
|
|
315 |
|
316 |
return text
|
317 |
|
@@ -1161,7 +1204,7 @@ class EnhancedDipperHumanizer:
|
|
1161 |
return text
|
1162 |
|
1163 |
def apply_sentence_variation(self, text):
|
1164 |
-
"""Apply natural sentence structure variations - MORE
|
1165 |
sentences = self.split_into_sentences_advanced(text)
|
1166 |
varied_sentences = []
|
1167 |
|
@@ -1170,89 +1213,143 @@ class EnhancedDipperHumanizer:
|
|
1170 |
if not sentence.strip():
|
1171 |
continue
|
1172 |
|
1173 |
-
|
1174 |
-
|
|
|
1175 |
if (i < len(sentences) - 1 and
|
1176 |
-
len(
|
1177 |
len(sentences[i+1].split()) < 15 and
|
1178 |
random.random() < 0.5):
|
1179 |
|
1180 |
-
connectors = [', and', ', but', '; however,', '. Also,', '. Plus,', ', so', ', which means',
|
1181 |
-
' - and', ' - but', '; meanwhile,', '. That said,', ', yet', ' - though']
|
1182 |
-
connector = random.choice(connectors)
|
1183 |
-
|
1184 |
-
# Handle the next sentence properly
|
1185 |
next_sent = sentences[i+1].strip()
|
1186 |
if next_sent:
|
1187 |
-
|
1188 |
-
|
1189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1190 |
|
1191 |
elif sentence: # Only process non-empty sentences
|
1192 |
-
# Split very long sentences more
|
1193 |
-
if len(
|
1194 |
-
|
1195 |
-
|
1196 |
-
|
1197 |
-
|
1198 |
-
|
1199 |
-
|
1200 |
-
|
1201 |
-
|
1202 |
-
|
1203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1204 |
else:
|
1205 |
varied_sentences.append(sentence)
|
1206 |
else:
|
1207 |
-
# Add natural variations more often (35% chance)
|
1208 |
if i > 0 and random.random() < 0.35:
|
1209 |
-
#
|
1210 |
-
|
1211 |
-
|
1212 |
-
|
1213 |
-
|
1214 |
-
|
1215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1216 |
|
1217 |
-
# Add mid-sentence interruptions (10% chance)
|
1218 |
-
if random.random() < 0.1 and len(
|
1219 |
-
|
1220 |
-
|
1221 |
-
|
1222 |
-
|
1223 |
-
|
1224 |
-
|
1225 |
-
|
1226 |
-
|
1227 |
-
|
1228 |
-
|
1229 |
-
|
1230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1231 |
|
1232 |
varied_sentences.append(sentence)
|
1233 |
|
1234 |
# Post-process for additional human patterns
|
1235 |
result = ' '.join([s for s in varied_sentences if s])
|
1236 |
|
1237 |
-
# Add occasional fragments for human touch (5% chance)
|
1238 |
-
if random.random() < 0.05:
|
1239 |
-
fragments = [
|
1240 |
-
"Crazy, I know.",
|
1241 |
-
"Wild stuff.",
|
1242 |
-
"Makes you think.",
|
1243 |
-
"Pretty interesting.",
|
1244 |
-
"Go figure.",
|
1245 |
-
"Who knew?",
|
1246 |
-
"There you have it.",
|
1247 |
-
"Food for thought.",
|
1248 |
-
"Just saying.",
|
1249 |
-
"Worth considering."
|
1250 |
-
]
|
1251 |
sentences = result.split('. ')
|
1252 |
-
|
1253 |
-
|
1254 |
-
|
1255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1256 |
|
1257 |
return result
|
1258 |
|
|
|
144 |
]
|
145 |
|
146 |
def add_human_touch(self, text):
|
147 |
+
"""Add subtle human-like imperfections - MORE CONTEXT-AWARE"""
|
148 |
sentences = text.split('. ')
|
149 |
modified_sentences = []
|
150 |
|
|
|
152 |
if not sent.strip():
|
153 |
continue
|
154 |
|
155 |
+
# Parse sentence structure for better filler placement
|
156 |
+
words = sent.split()
|
157 |
+
if not words:
|
158 |
+
continue
|
159 |
+
|
160 |
+
# Occasionally start with casual transition (25% chance)
|
161 |
+
if i > 0 and random.random() < 0.25 and len(words) > 5:
|
162 |
+
# Choose transitions based on sentence type
|
163 |
+
if any(q in sent.lower() for q in ['why', 'how', 'what', 'when', 'where']):
|
164 |
+
# Question-appropriate transitions
|
165 |
+
transition = random.choice(["So, ", "Well, ", "Now, ", "Okay, ", "Right, "])
|
166 |
+
elif any(w in sent.lower() for w in ['however', 'but', 'although', 'despite']):
|
167 |
+
# Contrast-appropriate transitions
|
168 |
+
transition = random.choice(["Still, ", "Yet, ", "Even so, ", "That said, ", "Nonetheless, "])
|
169 |
+
else:
|
170 |
+
# General transitions
|
171 |
+
transition = random.choice(self.casual_transitions[:20]) # Use more common ones
|
172 |
+
|
173 |
sent = transition + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
|
174 |
|
175 |
+
# Add filler words occasionally (20% chance) - SMARTER PLACEMENT
|
176 |
+
if random.random() < 0.2 and len(words) > 8:
|
177 |
+
# Find good positions for fillers (after verbs, before adjectives, etc.)
|
178 |
+
good_positions = []
|
179 |
+
|
180 |
+
for idx, word in enumerate(words):
|
181 |
+
if idx > 0 and idx < len(words) - 1:
|
182 |
+
# After "is/are/was/were" (good for "really", "actually", etc.)
|
183 |
+
if word.lower() in ['is', 'are', 'was', 'were', 'been', 'be']:
|
184 |
+
good_positions.append(idx + 1)
|
185 |
+
# Before adjectives (good for "quite", "rather", etc.)
|
186 |
+
elif words[idx-1].lower() in ['a', 'an', 'the', 'very', 'so']:
|
187 |
+
good_positions.append(idx)
|
188 |
+
# After "can/could/will/would" (good for "probably", "definitely", etc.)
|
189 |
+
elif word.lower() in ['can', 'could', 'will', 'would', 'should', 'might', 'may']:
|
190 |
+
good_positions.append(idx + 1)
|
191 |
+
|
192 |
+
if good_positions:
|
193 |
+
insert_pos = random.choice(good_positions)
|
194 |
+
# Choose appropriate filler based on context
|
195 |
+
if words[insert_pos-1].lower() in ['is', 'are', 'was', 'were']:
|
196 |
+
filler = random.choice(['really', 'actually', 'definitely', 'certainly', 'quite'])
|
197 |
+
elif words[insert_pos-1].lower() in ['can', 'could', 'will', 'would']:
|
198 |
+
filler = random.choice(['probably', 'definitely', 'certainly', 'likely', 'possibly'])
|
199 |
+
else:
|
200 |
+
filler = random.choice(['quite', 'rather', 'pretty', 'fairly', 'somewhat'])
|
201 |
+
|
202 |
+
words.insert(insert_pos, filler)
|
203 |
+
sent = ' '.join(words)
|
204 |
+
|
205 |
+
# Add varied sentence starters (15% chance) - MORE LOGICAL
|
206 |
+
if i > 0 and random.random() < 0.15 and len(words) > 10:
|
207 |
+
# Choose starters based on sentence content
|
208 |
+
if any(w in sent.lower() for w in ['research', 'study', 'data', 'evidence']):
|
209 |
+
starter = random.choice(["Research shows", "Studies indicate", "Evidence suggests", "Data reveals"])
|
210 |
+
elif any(w in sent.lower() for w in ['important', 'crucial', 'vital', 'essential']):
|
211 |
+
starter = random.choice(["It's worth noting that", "Keep in mind", "Bear in mind that", "The key here is"])
|
212 |
+
else:
|
213 |
+
starter = random.choice(["When it comes to", "As for", "Regarding", "In terms of"])
|
214 |
+
|
215 |
sent = starter + " " + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
|
216 |
|
217 |
+
# Occasionally use contractions (35% chance)
|
218 |
if random.random() < 0.35:
|
219 |
sent = self.apply_contractions(sent)
|
220 |
|
221 |
+
# Add occasional comma splices (10% chance) - ONLY WHERE IT MAKES SENSE
|
222 |
+
if random.random() < 0.1 and ',' in sent and len(words) > 10:
|
223 |
+
# Only do this with independent clauses
|
224 |
parts = sent.split(', ')
|
225 |
+
if len(parts) == 2:
|
226 |
+
# Check if both parts could be sentences
|
227 |
+
if (len(parts[0].split()) > 4 and len(parts[1].split()) > 4 and
|
228 |
+
any(v in parts[1].lower().split()[:3] for v in ['it', 'this', 'that', 'they', 'we', 'i', 'you'])):
|
229 |
+
sent = parts[0] + ', ' + parts[1] # Keep the comma splice
|
230 |
+
|
231 |
+
# NEW: Add parenthetical thoughts (8% chance) - CONTEXT-AWARE
|
232 |
+
if random.random() < 0.08 and len(words) > 15:
|
233 |
+
# Find natural break points (after complete thoughts)
|
234 |
+
break_points = []
|
235 |
+
for idx, word in enumerate(words):
|
236 |
+
if idx > len(words)//3 and idx < 2*len(words)//3:
|
237 |
+
if word.endswith(',') or words[idx-1].lower() in ['is', 'are', 'was', 'were']:
|
238 |
+
break_points.append(idx)
|
239 |
+
|
240 |
+
if break_points:
|
241 |
+
insert_pos = random.choice(break_points)
|
242 |
+
# Choose relevant parenthetical
|
243 |
+
if any(w in sent.lower() for w in ['surprising', 'interesting', 'amazing']):
|
244 |
+
parenthetical = random.choice(["(and that's saying something)", "(believe it or not)", "(surprisingly enough)"])
|
245 |
+
elif any(w in sent.lower() for w in ['obvious', 'clear', 'evident']):
|
246 |
+
parenthetical = random.choice(["(obviously)", "(clearly)", "(of course)"])
|
247 |
+
else:
|
248 |
+
parenthetical = random.choice(["(which makes sense)", "(for good reason)", "(as you'd expect)"])
|
249 |
+
|
250 |
+
words.insert(insert_pos, parenthetical)
|
251 |
+
sent = ' '.join(words)
|
252 |
+
|
253 |
+
# NEW: Occasionally add rhetorical questions (5% chance) - ONLY AT PARAGRAPH ENDS
|
254 |
+
if random.random() < 0.05 and i == len(sentences) - 1:
|
255 |
+
# Choose question based on sentence content
|
256 |
+
if any(w in sent.lower() for w in ['amazing', 'incredible', 'fantastic']):
|
257 |
+
question = random.choice(["Pretty cool, right?", "Amazing, isn't it?", "Impressive, huh?"])
|
258 |
+
elif any(w in sent.lower() for w in ['important', 'crucial', 'essential']):
|
259 |
+
question = random.choice(["Makes sense, right?", "See what I mean?", "Important to remember, yeah?"])
|
260 |
+
else:
|
261 |
+
question = random.choice(["Interesting, right?", "Makes you think, doesn't it?", "Sound familiar?"])
|
262 |
+
|
263 |
+
sent = sent + " " + question
|
|
|
|
|
|
|
|
|
|
|
264 |
|
265 |
modified_sentences.append(sent)
|
266 |
|
|
|
296 |
return text
|
297 |
|
298 |
def add_minor_errors(self, text):
|
299 |
+
"""Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
|
300 |
# Occasionally miss Oxford comma (15% chance)
|
301 |
if random.random() < 0.15:
|
302 |
+
# Only in lists, not random commas
|
303 |
+
text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
|
304 |
|
305 |
# Sometimes use 'which' instead of 'that' (8% chance)
|
306 |
if random.random() < 0.08:
|
307 |
+
# Only for non-restrictive clauses
|
308 |
+
matches = re.finditer(r'\b(\w+) that (\w+)', text)
|
309 |
+
for match in list(matches)[:1]: # Only first occurrence
|
310 |
+
if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
|
311 |
+
text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
|
312 |
|
313 |
+
# NEW: Add very occasional typos (2% chance per sentence) - REDUCED AND CONTROLLED
|
314 |
sentences = text.split('. ')
|
315 |
for i, sent in enumerate(sentences):
|
316 |
+
if random.random() < 0.02 and len(sent.split()) > 15: # Only in longer sentences
|
317 |
words = sent.split()
|
318 |
# Pick a random word to potentially typo
|
319 |
+
word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
|
320 |
word = words[word_idx].lower()
|
321 |
|
322 |
+
# Only typo common words where typo won't break meaning
|
323 |
+
safe_typos = {
|
324 |
+
'the': 'teh',
|
325 |
+
'and': 'adn',
|
326 |
+
'that': 'taht',
|
327 |
+
'with': 'wtih',
|
328 |
+
'from': 'form',
|
329 |
+
'because': 'becuase'
|
330 |
+
}
|
331 |
+
|
332 |
+
if word in safe_typos and random.random() < 0.5:
|
333 |
+
typo = safe_typos[word]
|
334 |
# Preserve original capitalization
|
335 |
if words[word_idx][0].isupper():
|
336 |
typo = typo[0].upper() + typo[1:]
|
|
|
339 |
|
340 |
text = '. '.join(sentences)
|
341 |
|
342 |
+
# Skip double words - too distracting
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
|
344 |
+
# Mix up common homophones occasionally (2% chance) - ONLY SAFE ONES
|
345 |
+
if random.random() < 0.02:
|
346 |
+
safe_homophones = [
|
347 |
+
('its', "it's"), # Very common mistake
|
348 |
+
('your', "you're"), # Another common one
|
349 |
]
|
350 |
+
for pair in safe_homophones:
|
351 |
+
# Check context to avoid breaking meaning
|
352 |
+
if f" {pair[0]} " in text and random.random() < 0.3:
|
353 |
+
# Find one instance and check it's safe to replace
|
354 |
+
pattern = rf'\b{pair[0]}\s+(\w+ing|\w+ed)\b' # its + verb = likely should be it's
|
355 |
+
if re.search(pattern, text):
|
356 |
+
text = re.sub(pattern, f"{pair[1]} \\1", text, count=1)
|
357 |
+
break
|
358 |
|
359 |
return text
|
360 |
|
|
|
1204 |
return text
|
1205 |
|
1206 |
def apply_sentence_variation(self, text):
|
1207 |
+
"""Apply natural sentence structure variations - MORE INTELLIGENT"""
|
1208 |
sentences = self.split_into_sentences_advanced(text)
|
1209 |
varied_sentences = []
|
1210 |
|
|
|
1213 |
if not sentence.strip():
|
1214 |
continue
|
1215 |
|
1216 |
+
words = sentence.split()
|
1217 |
+
|
1218 |
+
# Combine short sentences more often (50% chance) - BUT SMARTLY
|
1219 |
if (i < len(sentences) - 1 and
|
1220 |
+
len(words) < 15 and
|
1221 |
len(sentences[i+1].split()) < 15 and
|
1222 |
random.random() < 0.5):
|
1223 |
|
|
|
|
|
|
|
|
|
|
|
1224 |
next_sent = sentences[i+1].strip()
|
1225 |
if next_sent:
|
1226 |
+
# Check if sentences are related (share common words or themes)
|
1227 |
+
current_words = set(w.lower() for w in words if len(w) > 3)
|
1228 |
+
next_words = set(w.lower() for w in next_sent.split() if len(w) > 3)
|
1229 |
+
|
1230 |
+
# Only combine if they share context or one follows from the other
|
1231 |
+
if current_words & next_words or any(w in next_sent.lower() for w in ['this', 'that', 'these', 'those', 'it']):
|
1232 |
+
# Choose appropriate connector based on relationship
|
1233 |
+
if any(w in next_sent.lower().split()[:3] for w in ['however', 'but', 'yet', 'although']):
|
1234 |
+
connector = random.choice([', but', '; however,', ', yet', ' - though'])
|
1235 |
+
elif any(w in next_sent.lower().split()[:3] for w in ['therefore', 'thus', 'so', 'hence']):
|
1236 |
+
connector = random.choice([', so', '. Therefore,', ', which means', ' - thus'])
|
1237 |
+
elif any(w in next_sent.lower().split()[:3] for w in ['also', 'additionally', 'furthermore']):
|
1238 |
+
connector = random.choice([', and', '. Also,', '. Plus,', ' - additionally,'])
|
1239 |
+
else:
|
1240 |
+
connector = random.choice([', and', ', which', ' - '])
|
1241 |
+
|
1242 |
+
combined = f"{sentence.rstrip('.')}{connector} {next_sent[0].lower()}{next_sent[1:]}"
|
1243 |
+
varied_sentences.append(combined)
|
1244 |
+
sentences[i+1] = "" # Mark as processed
|
1245 |
+
else:
|
1246 |
+
varied_sentences.append(sentence)
|
1247 |
+
else:
|
1248 |
+
varied_sentences.append(sentence)
|
1249 |
|
1250 |
elif sentence: # Only process non-empty sentences
|
1251 |
+
# Split very long sentences more intelligently
|
1252 |
+
if len(words) > 18:
|
1253 |
+
# Look for natural break points
|
1254 |
+
break_words = ['however', 'therefore', 'moreover', 'furthermore', 'additionally', 'consequently']
|
1255 |
+
conjunctions = [', and', ', but', ', so', ', yet', ', for', ', or', ', nor']
|
1256 |
+
|
1257 |
+
# Find the best break point
|
1258 |
+
best_break = -1
|
1259 |
+
for idx, word in enumerate(words):
|
1260 |
+
if word.lower().rstrip(',') in break_words and idx > len(words)//3:
|
1261 |
+
best_break = idx
|
1262 |
+
break
|
1263 |
+
|
1264 |
+
# If no break word found, look for conjunctions
|
1265 |
+
if best_break == -1:
|
1266 |
+
text_lower = sentence.lower()
|
1267 |
+
for conj in conjunctions:
|
1268 |
+
if conj in text_lower:
|
1269 |
+
# Find position in word list
|
1270 |
+
conj_pos = text_lower.find(conj)
|
1271 |
+
word_count = len(text_lower[:conj_pos].split())
|
1272 |
+
if len(words)//3 < word_count < 2*len(words)//3:
|
1273 |
+
best_break = word_count
|
1274 |
+
break
|
1275 |
+
|
1276 |
+
# Split if good break point found
|
1277 |
+
if best_break > 0 and random.random() < 0.7:
|
1278 |
+
part1 = ' '.join(words[:best_break])
|
1279 |
+
part2 = ' '.join(words[best_break:])
|
1280 |
+
|
1281 |
+
# Clean up punctuation
|
1282 |
+
part1 = part1.rstrip(',') + '.'
|
1283 |
+
# Capitalize second part appropriately
|
1284 |
+
if part2 and part2[0].islower() and not part2.startswith(('however', 'therefore', 'moreover')):
|
1285 |
+
part2 = part2[0].upper() + part2[1:]
|
1286 |
+
|
1287 |
+
varied_sentences.append(part1)
|
1288 |
+
varied_sentences.append(part2)
|
1289 |
else:
|
1290 |
varied_sentences.append(sentence)
|
1291 |
else:
|
1292 |
+
# Add natural variations more often (35% chance) - BUT CONTEXTUALLY
|
1293 |
if i > 0 and random.random() < 0.35:
|
1294 |
+
# Check previous sentence ending to choose appropriate transition
|
1295 |
+
if varied_sentences and len(varied_sentences) > 0:
|
1296 |
+
prev_sent = varied_sentences[-1]
|
1297 |
+
|
1298 |
+
# Choose transition based on relationship
|
1299 |
+
if any(w in sentence.lower() for w in ['however', 'but', 'although', 'despite']):
|
1300 |
+
transition = random.choice(['However, ', 'On the other hand, ', 'That said, ', 'Nevertheless, '])
|
1301 |
+
elif any(w in sentence.lower() for w in ['example', 'instance', 'such as', 'like']):
|
1302 |
+
transition = random.choice(['For instance, ', 'For example, ', 'To illustrate, ', 'Consider this: '])
|
1303 |
+
elif any(w in prev_sent.lower() for w in ['first', 'second', 'finally', 'lastly']):
|
1304 |
+
transition = random.choice(['Next, ', 'Additionally, ', 'Furthermore, ', 'Also, '])
|
1305 |
+
else:
|
1306 |
+
transition = random.choice(['Furthermore, ', 'Additionally, ', 'Moreover, ', 'Also, '])
|
1307 |
+
|
1308 |
+
if sentence[0].isupper():
|
1309 |
+
sentence = transition + sentence[0].lower() + sentence[1:]
|
1310 |
|
1311 |
+
# Add mid-sentence interruptions (10% chance) - ONLY WHERE NATURAL
|
1312 |
+
if random.random() < 0.1 and len(words) > 12:
|
1313 |
+
# Find natural pause points (after commas, before "which", etc.)
|
1314 |
+
pause_points = []
|
1315 |
+
for idx, word in enumerate(words):
|
1316 |
+
if word.endswith(',') and idx > len(words)//4 and idx < 3*len(words)//4:
|
1317 |
+
pause_points.append(idx + 1)
|
1318 |
+
elif word.lower() in ['which', 'that', 'who', 'where'] and idx > len(words)//3:
|
1319 |
+
pause_points.append(idx)
|
1320 |
+
|
1321 |
+
if pause_points:
|
1322 |
+
pos = random.choice(pause_points)
|
1323 |
+
interruption = random.choice([
|
1324 |
+
" - and this is important - ",
|
1325 |
+
" - mind you - ",
|
1326 |
+
" - interestingly - ",
|
1327 |
+
" (worth noting) ",
|
1328 |
+
" - by the way - "
|
1329 |
+
])
|
1330 |
+
words.insert(pos, interruption)
|
1331 |
+
sentence = ' '.join(words)
|
1332 |
|
1333 |
varied_sentences.append(sentence)
|
1334 |
|
1335 |
# Post-process for additional human patterns
|
1336 |
result = ' '.join([s for s in varied_sentences if s])
|
1337 |
|
1338 |
+
# Add occasional fragments for human touch (5% chance) - ONLY AT APPROPRIATE PLACES
|
1339 |
+
if random.random() < 0.05 and len(varied_sentences) > 3:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1340 |
sentences = result.split('. ')
|
1341 |
+
# Add fragment after sentences that set up for it
|
1342 |
+
for idx, sent in enumerate(sentences[:-1]):
|
1343 |
+
if any(w in sent.lower() for w in ['amazing', 'incredible', 'surprising', 'interesting']):
|
1344 |
+
fragments = ["Truly remarkable.", "Quite something.", "Really makes you think."]
|
1345 |
+
sentences.insert(idx + 1, random.choice(fragments))
|
1346 |
+
break
|
1347 |
+
elif any(w in sent.lower() for w in ['difficult', 'challenging', 'complex', 'complicated']):
|
1348 |
+
fragments = ["Not easy, for sure.", "Tough stuff.", "Challenging indeed."]
|
1349 |
+
sentences.insert(idx + 1, random.choice(fragments))
|
1350 |
+
break
|
1351 |
+
|
1352 |
+
result = '. '.join(sentences)
|
1353 |
|
1354 |
return result
|
1355 |
|