@@ -92,6 +92,7 @@ def _format_duration(seconds):
9292
9393
9494AUDIO_DURATIONS_CACHE_FILENAME = '.audio_durations.json'
95+ CHUNKS_META_CACHE_FILENAME = '.chunks_meta_cache.json'
9596
9697
9798def _audio_duration_sec (path ):
@@ -186,72 +187,152 @@ def _audio_files_with_durations(audio_extensions, audio_folder):
186187
187188
188189
190+ def _chunks_meta_cache_path ():
191+ cache_dir = STATS_DIR or CHUNK_FOLDER
192+ return os .path .join (cache_dir , CHUNKS_META_CACHE_FILENAME )
193+
194+
195+ def _load_chunks_meta_cache ():
196+ path = _chunks_meta_cache_path ()
197+ if not os .path .isfile (path ):
198+ return {}
199+ try :
200+ with open (path , 'r' ) as f :
201+ data = json .load (f )
202+ return data .get ('entries' ) or {}
203+ except (json .JSONDecodeError , OSError ):
204+ return {}
205+
206+
207+ def _save_chunks_meta_cache (entries ):
208+ path = _chunks_meta_cache_path ()
209+ cache_dir = os .path .dirname (path )
210+ if cache_dir and not os .path .isdir (cache_dir ):
211+ return
212+ try :
213+ with open (path , 'w' ) as f :
214+ json .dump ({'entries' : entries , 'updated_at' : time .time ()}, f )
215+ except OSError :
216+ pass
217+
218+
219+ def _normalize_source_video (item ):
220+ """Normalize a single source_videos entry (handle old string format + nested channel objects)."""
221+ if isinstance (item , str ):
222+ return {'path' : item , 'model' : None , 'thumbnail_url' : None , 'title' : None , 'channel' : None }
223+ if isinstance (item , dict ) and 'path' in item :
224+ ch = item .get ('channel' )
225+ if isinstance (ch , dict ):
226+ ch = ch .get ('channel_name' ) or ch .get ('channel' ) or ''
227+ return {
228+ 'path' : item ['path' ],
229+ 'model' : item .get ('model' ),
230+ 'thumbnail_url' : item .get ('thumbnail_url' ),
231+ 'title' : item .get ('title' ),
232+ 'channel' : ch or None ,
233+ }
234+ return None
235+
236+
237+ def _parse_chunk_meta (meta_path ):
238+ """Read and normalize a .meta.json file. Returns dict or None."""
239+ import json as _json
240+ try :
241+ with open (meta_path , 'r' ) as f :
242+ meta = _json .load (f )
243+ raw_sources = meta .get ('source_videos' ) or []
244+ source_videos = [s for s in (_normalize_source_video (item ) for item in raw_sources ) if s ]
245+ return {
246+ 'source_videos' : source_videos ,
247+ 'model_info' : meta .get ('model_info' ) or [],
248+ 'video_codec' : meta .get ('video_codec' ),
249+ 'width' : meta .get ('width' ),
250+ 'height' : meta .get ('height' ),
251+ 'created_at' : meta .get ('created_at' ),
252+ }
253+ except (ValueError , OSError ):
254+ return None
255+
256+
189257def _build_chunks_list (settings = None ):
190- """Build chunks list (no ffprobe). settings used for days_to_expire ."""
258+ """Build chunks list. Uses per-chunk metadata cache — only re-reads .meta.json when mtime changes ."""
191259 from datetime import datetime
192- import json as _json
193260 import math
194261 settings = settings or {}
195262 chunks = []
196- if os .path .exists (CHUNK_FOLDER ):
197- for f in os .listdir (CHUNK_FOLDER ):
198- if f .endswith ('.mp4' ) and not f .startswith ('chunk_temp' ):
199- filepath = os .path .join (CHUNK_FOLDER , f )
200- stat = os .stat (filepath )
201- meta_path = os .path .join (CHUNK_FOLDER , f .replace ('.mp4' , '.meta.json' ))
202- source_videos = []
203- model_info = []
204- video_codec = None
205- width = None
206- height = None
207- created_at_str = None
208- if os .path .isfile (meta_path ):
209- try :
210- with open (meta_path , 'r' ) as _f :
211- meta = _json .load (_f )
212- raw_sources = meta .get ('source_videos' ) or []
213- # Normalize: support old [path, ...] and new [{path, model}, ...]
214- source_videos = []
215- for item in raw_sources :
216- if isinstance (item , str ):
217- source_videos .append ({'path' : item , 'model' : None , 'thumbnail_url' : None , 'title' : None , 'channel' : None })
218- elif isinstance (item , dict ) and 'path' in item :
219- source_videos .append ({
220- 'path' : item ['path' ],
221- 'model' : item .get ('model' ),
222- 'thumbnail_url' : item .get ('thumbnail_url' ),
223- 'title' : item .get ('title' ),
224- 'channel' : item .get ('channel' ),
225- })
226- model_info = meta .get ('model_info' ) or []
227- video_codec = meta .get ('video_codec' )
228- width = meta .get ('width' )
229- height = meta .get ('height' )
230- created_at_str = meta .get ('created_at' )
231- except (ValueError , OSError ):
232- pass
233- if created_at_str :
234- try :
235- dt = datetime .fromisoformat (created_at_str .replace ('Z' , '+00:00' ))
236- created_at_display = dt .strftime ('%Y-%m-%d %H:%M:%S' )
237- timestamp = dt .timestamp ()
238- except (ValueError , TypeError ):
239- created_at_display = datetime .fromtimestamp (stat .st_ctime ).strftime ('%Y-%m-%d %H:%M:%S' )
240- timestamp = stat .st_ctime
241- else :
242- created_at_display = datetime .fromtimestamp (stat .st_ctime ).strftime ('%Y-%m-%d %H:%M:%S' )
243- timestamp = stat .st_ctime
244- chunks .append ({
245- 'name' : f ,
246- 'created_at' : created_at_display ,
247- 'timestamp' : timestamp ,
248- 'size_mb' : round (stat .st_size / (1024 * 1024 ), 2 ),
249- 'source_videos' : source_videos ,
250- 'model_info' : model_info ,
251- 'video_codec' : video_codec ,
252- 'width' : width ,
253- 'height' : height ,
254- })
263+ if not os .path .exists (CHUNK_FOLDER ):
264+ return chunks
265+
266+ cache = _load_chunks_meta_cache ()
267+ cache_dirty = False
268+
269+ for f in os .listdir (CHUNK_FOLDER ):
270+ if not f .endswith ('.mp4' ) or f .startswith ('chunk_temp' ):
271+ continue
272+ filepath = os .path .join (CHUNK_FOLDER , f )
273+ try :
274+ stat = os .stat (filepath )
275+ except OSError :
276+ continue
277+
278+ meta_path = os .path .join (CHUNK_FOLDER , f .replace ('.mp4' , '.meta.json' ))
279+ cached_entry = cache .get (f )
280+ meta_mtime = 0
281+ if os .path .isfile (meta_path ):
282+ try :
283+ meta_mtime = os .path .getmtime (meta_path )
284+ except OSError :
285+ pass
286+
287+ if cached_entry and cached_entry .get ('meta_mtime' ) == meta_mtime and meta_mtime > 0 :
288+ parsed = cached_entry
289+ else :
290+ parsed = _parse_chunk_meta (meta_path ) if meta_mtime > 0 else None
291+ if parsed :
292+ parsed ['meta_mtime' ] = meta_mtime
293+ cache [f ] = parsed
294+ cache_dirty = True
295+ else :
296+ parsed = {'source_videos' : [], 'model_info' : [], 'video_codec' : None ,
297+ 'width' : None , 'height' : None , 'created_at' : None , 'meta_mtime' : 0 }
298+ if f in cache :
299+ del cache [f ]
300+ cache_dirty = True
301+
302+ created_at_str = parsed .get ('created_at' )
303+ if created_at_str :
304+ try :
305+ dt = datetime .fromisoformat (created_at_str .replace ('Z' , '+00:00' ))
306+ created_at_display = dt .strftime ('%Y-%m-%d %H:%M:%S' )
307+ timestamp = dt .timestamp ()
308+ except (ValueError , TypeError ):
309+ created_at_display = datetime .fromtimestamp (stat .st_ctime ).strftime ('%Y-%m-%d %H:%M:%S' )
310+ timestamp = stat .st_ctime
311+ else :
312+ created_at_display = datetime .fromtimestamp (stat .st_ctime ).strftime ('%Y-%m-%d %H:%M:%S' )
313+ timestamp = stat .st_ctime
314+
315+ chunks .append ({
316+ 'name' : f ,
317+ 'created_at' : created_at_display ,
318+ 'timestamp' : timestamp ,
319+ 'size_mb' : round (stat .st_size / (1024 * 1024 ), 2 ),
320+ 'source_videos' : parsed .get ('source_videos' , []),
321+ 'model_info' : parsed .get ('model_info' , []),
322+ 'video_codec' : parsed .get ('video_codec' ),
323+ 'width' : parsed .get ('width' ),
324+ 'height' : parsed .get ('height' ),
325+ })
326+
327+ stale_keys = set (cache .keys ()) - {c ['name' ] for c in chunks }
328+ if stale_keys :
329+ for k in stale_keys :
330+ del cache [k ]
331+ cache_dirty = True
332+
333+ if cache_dirty :
334+ _save_chunks_meta_cache (cache )
335+
255336 chunks .sort (key = lambda x : x ['timestamp' ], reverse = True )
256337 if chunks :
257338 max_chunks = int (settings .get ('MAX_CHUNKS' , '56' ))
@@ -540,46 +621,26 @@ def _extract_video_id(path):
540621 return None
541622
542623
624+ def _normalize_model (url ):
625+ """Thin wrapper so app.py can reuse ClipPusher's URL normalizer."""
626+ return ClipPusher ._normalize_model_url (url )
627+
628+
543629def _find_video_id_for_model (model ):
544- """Scan chunk metas for a source video that has this model; return video_id (random if multiple) ."""
630+ """Find a YouTube video_id for a model using the chunks meta cache ."""
545631 import random as _random
632+ norm = _normalize_model (model )
633+ cache = _load_chunks_meta_cache ()
546634 candidates = []
547- if not os .path .isdir (CHUNK_FOLDER ):
548- return None
549- for f in os .listdir (CHUNK_FOLDER ):
550- if not f .endswith ('.mp4' ) or f .startswith ('chunk_temp' ):
551- continue
552- meta_path = os .path .join (CHUNK_FOLDER , f .replace ('.mp4' , '.meta.json' ))
553- if not os .path .isfile (meta_path ):
554- continue
555- try :
556- with open (meta_path , 'r' ) as fp :
557- meta = json .load (fp )
558- except (json .JSONDecodeError , OSError ):
559- continue
560- sources = meta .get ('source_videos' ) or []
561- model_info = meta .get ('model_info' ) or []
562- chunk_candidates = []
635+ for _name , entry in cache .items ():
636+ sources = entry .get ('source_videos' ) or []
563637 for item in sources :
564638 if not isinstance (item , dict ):
565639 continue
566- m = item .get ('model' )
567- if m and m == model :
568- path = item .get ('path' )
569- if path :
570- vid = _extract_video_id (path )
571- if vid :
572- chunk_candidates .append (vid )
573- if not chunk_candidates and model in model_info :
574- for item in sources :
575- if not isinstance (item , dict ):
576- continue
577- path = item .get ('path' )
578- if path :
579- vid = _extract_video_id (path )
580- if vid :
581- chunk_candidates .append (vid )
582- candidates .extend (chunk_candidates )
640+ if _normalize_model (item .get ('model' , '' )) == norm :
641+ vid = _extract_video_id (item .get ('path' , '' ))
642+ if vid :
643+ candidates .append (vid )
583644 return _random .choice (candidates ) if candidates else None
584645
585646
@@ -629,6 +690,31 @@ def _get_youtube_thumbnail_for_model(model, video_id_from_play_counts, stored_th
629690 return None
630691
631692
693+ def _parse_model_platform (model_str ):
694+ """Detect platform and extract username from a model URL string."""
695+ m = (model_str or '' ).strip ()
696+ clean = re .sub (r'^https?://(www\.)?' , '' , m )
697+ if re .search (r'instagram\.com/' , clean , re .I ):
698+ name = re .sub (r'.*instagram\.com/@?' , '' , clean , flags = re .I ).split ('/' )[0 ].split ('?' )[0 ].split ('#' )[0 ]
699+ return 'instagram' , name or clean , model_str if model_str .startswith ('http' ) else 'https://' + clean
700+ if re .search (r'tiktok\.com' , clean , re .I ):
701+ match = re .search (r'@([a-zA-Z0-9_.]+)' , clean )
702+ name = '@' + match .group (1 ) if match else clean .split ('/' )[- 1 ].split ('?' )[0 ] or clean
703+ return 'tiktok' , name , model_str if model_str .startswith ('http' ) else 'https://' + clean
704+ return 'other' , clean [:36 ], model_str if model_str .startswith ('http' ) else 'https://' + clean
705+
706+
707+ def _find_channel_for_model (model ):
708+ """Find the channel name for a model from the chunks meta cache."""
709+ norm = _normalize_model (model )
710+ cache = _load_chunks_meta_cache ()
711+ for _name , entry in cache .items ():
712+ for item in (entry .get ('source_videos' ) or []):
713+ if isinstance (item , dict ) and _normalize_model (item .get ('model' , '' )) == norm and item .get ('channel' ):
714+ return item ['channel' ]
715+ return None
716+
717+
632718def _stats_context ():
633719 """Build stream_stats and play_counts for stats page."""
634720 current_status = clip_pusher .get_status ()
@@ -644,15 +730,18 @@ def _stats_context():
644730 model , count = item [0 ], item [1 ]
645731 video_id = item [2 ] if len (item ) > 2 else None
646732 stored_thumb = item [3 ] if len (item ) > 3 else None
647- url = model if model .startswith ('http' ) else 'https://' + model
648- meta = _fetch_og_meta (url )
649- title = html .unescape (meta .get ('title' ) or url )
733+ platform , username , url = _parse_model_platform (model )
734+ channel = _find_channel_for_model (model )
650735 thumbnail = _get_youtube_thumbnail_for_model (model , video_id , stored_thumb )
736+ yt_vid = video_id if (video_id and len (video_id ) == 11 ) else _find_video_id_for_model (model )
651737 models_enriched .append ({
652738 'url' : url ,
653739 'count' : count ,
654- 'title' : title ,
740+ 'username' : username ,
741+ 'platform' : platform ,
742+ 'channel' : channel ,
655743 'image' : thumbnail ,
744+ 'yt' : f"https://www.youtube.com/watch?v={ yt_vid } " if yt_vid else None ,
656745 })
657746 play_counts = dict (play_counts , models = models_enriched )
658747 return {'stream_stats' : stream_stats , 'play_counts' : play_counts }
0 commit comments