[mythtvnz] tv_grab_dvd

Andrew Bruce mythtvnz@lists.linuxnut.co.nz
Mon, 04 Sep 2006 13:44:13 +1200


This is a multi-part message in MIME format.
--------------020601060203090305070700
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

This information was provided to me and I was asked to repost the 
following to this list, in the hope that it may be of some use to the 
community:

------------------------------------------------------------------

The experimentation of another PVR group in NZ a while ago uncovered that
the submitted patches on the darkskiez site solved the issue of unknown 
tags.

With a small bit of tweaking the xml became ...

<programme channel="{some channel info}" start="20060821230000" 
stop="20060821233000">
       <title lang="eng">The Coach Trip</title>
       <desc lang="eng">{some description info}</desc>
       <category>tourism/travel</category>
       <user>0xa1</user>
</programme>

There has also been much use of the attached python script which wraps 
the dvbsnoop package.

I place this code into the public domain here, with permission of the 
author, if it is of any use. It
seems dvbsnoop is coded to recognise at least the stardard DVB 
interpretations and some rating info.

The xml output of this script is something like ...

<programme channel="{some random channel}" start="20060817103000" 
stop="20060817230000">
       <title lang="eng">The Racing Show</title>
       <desc length="197">{description info was here}</desc>
       <duration mjd="0xd2cb223000" from="2006-08-17 10:30:00" 
to="2006-08-17 23:00:00">00:30:00</duration>
       <rating country="NZL" parental="minimum age: 5 years">2</rating>
       <content genre="equestrian">0x4a</content>
</programme>

Code as attached.

--------------020601060203090305070700
Content-Type: text/plain;
 name="epgsnoop.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="epgsnoop.py"

#!/usr/bin/python

import sys
import shutil
import os, os.path
import popen2, signal
import datetime
import string
import cgi
import re


# Convert DVB dates, ETSI EN 300 468 (DVB SI), Annex C
def mjd_to_date(dvb):

	# bcd hour/min/sec
	hour = int(dvb[-6:-4])
	minute = int(dvb[-4:-2])
	second = int(dvb[-2:])

	# Date or duration
	mjd = int(dvb[:-6], 16)
	if mjd == 0:
		return (int(hour), int(minute), int(second))

	# Intermediate calcs
	y = int((mjd - 15078.2) / 365.25)
	m = int((mjd - 14956.1 - int(y * 365.25)) / 30.6001)
	if m == 14 or m == 15:
		k = 1
	else:
		k = 0

	# Date
	m_year = y + k + 1900
	m_month = m - 1 - k * 12
	m_day = mjd - 14956 - int(y * 365.25) - int(m * 30.6001)

	return (int(m_year), int(m_month), int(m_day), int(hour), int(minute), int(second))


# Output the programe XML
def output_programme(f, p, hoffset=0):
	# Get the programme start
	offset = datetime.timedelta(hours=hoffset)

	# Create the datetime representations
	s = mjd_to_date(p['start'])
	d = mjd_to_date(p['duration'])
	startdt = datetime.datetime(s[0], s[1], s[2], s[3], s[4], s[5])
	duration = datetime.timedelta(hours=d[0], minutes=d[1], seconds=d[2])

	# Calculate programme date stuff
	startdt = startdt + offset
	stopdt = startdt + duration + offset

	# Convert to xml string forms
	startstr = startdt.strftime("%Y%m%d%H%M%S")
	stopstr = stopdt.strftime("%Y%m%d%H%M%S")
	durationstr = "%02d:%02d:%02d" % d

	# Start programme record
	print >>f, '<programme channel="%s.%s" start="%s" stop="%s">' % (p['channel_id'], p['channel'], startstr, stopstr)

	# We will always have a title
	title = '\t<title'
	if p.has_key("language"):
		title = title + ' lang="%s"' % p['language']
	title = title + '>%s</title>' % cgi.escape(p['title'])
	print >>f, title

	# Sort out the description (if there is one)
	description = ""
	if p.has_key("description"):
		description = cgi.escape(p['description'])
	print >>f, '\t<desc length="%d">%s</desc>' % (len(description), description)

	# Duration .. informational
	print >>f, '\t<duration mjd="%s" from="%s" to="%s">%s</duration>' % (p['start'], startdt, stopdt, durationstr)

	# Rating
	if p.has_key("rating"):
		rating = '\t<rating'
		if p.has_key("country"):
			rating = rating + ' country="%s"' % p['country']
		if p.has_key("ratinginfo"):
			rating = rating + ' parental="%s"' % cgi.escape(p['ratinginfo'], quote=True)
		rating = rating + '>' + p['rating'] + '</rating>'
		print >>f, rating

	# Content
	if p.has_key("content_1") and p.has_key("content_2"):
		hexvalue = "%x%x" % (int(p["content_1"]), int(p["content_2"]))
		content = '\t<content'
		if p.has_key("contentinfo"):
			content = content + ' genre="%s"' % cgi.escape(p['contentinfo'], quote=True)
		content = content + '>0x' + hexvalue + '</content>'
		print >>f, content

	# Done
	print >>f, '</programme>'


# Process packet
def process_packet(pkt):
	global events, packets
	found = 0

	# Create some regex's for extraction
	decode_regex = re.compile("""\[= (.*?)\]$""")
	detail_regex = re.compile("""[char|name]: "(.*?)"  -- Charset""")

	# Process the packet
	packets += 1
	event_id = None
	for data in pkt:
		#print data

		# Log key details from top of packet
		if data[:8] == "Table_ID":
			print ".", data.split('  [=')[0]
			table_id = data.split(': ')[1:][0].split()[0]
			continue
		if data[:14] == "Version_number":
			print ".", data
			version = data.split(': ')[1:][0].split()[0]
			continue
		if data[:10] == "Service_ID":
			print ".", data.split('  [=')[0]
			channel_id = data.split(': ')[1:][0].split()[0]
			if channels.has_key(channel_id):
				channel_name = channels[channel_id]
			else:
				channel_name = "UNKNOWN"
			continue

		# Are we in en event
		if data[:8] == "Event_ID":
			print "* Found", data
			# Store old event and create a new one
			if event_id != None:
				key = channel_id + "|" + event_id
				if not unique.has_key(key):
					unique[key] = version
					programmes.append(event)
					print "= events", events, "programmes", len(programmes)
					found += 1
			event_id = data.split(': ')[1:][0].split()[0]
			event = {}
			event['eventid'] = event_id
			event['version'] = version
			event['channel_id'] = channel_id
			event['channel'] = channel_name
			events += 1
			continue

		# End of packet store last event
		if data[:3] == "CRC":
			if event_id != None:
				key = channel_id + "|" + event_id
				if not unique.has_key(key):
					unique[key] = version
					programmes.append(event)
					print "= events", events, "programmes", len(programmes)
					found += 1

		# Check for event data
		if event_id != None:
			# Starttime
			if data[:11] == "Start_time:":
				event['start'] = string.join(data.split(': ')[1:]).split()[0]
				event['startinfo'] = decode_regex.findall(data)[0].strip()
				continue
			# Duration
			if data[:9] == "Duration:":
				event['duration'] = string.join(data.split(': ')[1:]).split()[0]
				event['durationinfo'] = decode_regex.findall(data)[0].strip()
				continue
			# Name
			if data[:11] == "event_name:":
				event['title'] = detail_regex.findall(data)[0]
				print "+", event['title'], channel_name
				continue
			# Description
			if data[:10] == "text_char:":
				event['description'] = detail_regex.findall(data)[0]
				continue
			# Rating
			if data[:7] == "Rating:":
				event['rating'] = string.join(data.split(': ')[1:]).split()[0]
				event['ratinginfo'] = decode_regex.findall(data)[0].strip()
				continue
			# Country
			if data[:13] == "Country_code:":
				event['country'] = string.join(data.split(': ')[1:]).strip()
				continue
			# Language
			if data.find("language_code:") >= 0:
				event['language'] = string.join(data.split(': ')[1:]).strip()
				continue
			# Content
			if data[:23] == "Content_nibble_level_1:":
				event['content_1'] = string.join(data.split(': ')[1:]).split()[0]
				continue
			if data[:23] == "Content_nibble_level_2:":
				event['content_2'] = string.join(data.split(': ')[1:]).split()[0]
				continue
			if data[:3] == "[= ":
				event['contentinfo'] = decode_regex.findall(data)[0].strip()
				continue
			# User
			if data[:14] == "User_nibble_1:":
				event['user_1'] = string.join(data.split(': ')[1:]).split()[0]
				continue
			if data[:14] == "User_nibble_2:":
				event['user_2'] = string.join(data.split(': ')[1:]).split()[0]
				continue
	# Found how many shows?
	return found


# ensure we don't leave process
def snoop_finished(signum, frame):
	os.kill(snoop.pid, signal.SIGTERM)
	if os.path.exists(pidfile):
		os.remove(pidfile)
	sys.exit(1)


# Key information arrays
programmes = []
channels = {}
unique = {}

# Key counters
events= 0
packets = 0

# Setup maximums
nilpkts = 2500

# Setup files
epgfile = "epgsnoop.xml"
pidfile = "/tmp/epgsnoop.pid"

# test and or write the pid file
if os.path.exists(pidfile):
	print >>sys.stderr, "Aborted: already snooping..."
	sys.exit(2)
else:
	pidf = open(pidfile, "w")
	print >>pidf, os.getpid()
	pidf.close()

# Setup sigint handler, kill subprocess on ^C
signal.signal(signal.SIGINT, snoop_finished)

# open stream
#snoop = popen2.Popen3("dvbsnoop -nph 0x12 | /bin/tee data.dmp")
snoop = popen2.Popen3("dvbsnoop -softcrc -nph 0x12")

# Read in the channel idents
chanidents = open('chanidents', 'r').readlines()
for chan in chanidents:
	channel = chan.split()
	if channel[0] != '#' and len(channel) == 2:
		channels[channel[0]] = channel[1]

# Loop packets
check = i = 0
while check < nilpkts:
	i = i + 1
	print 
	print "Packet %010d" % i

	# Get packet start
	out = snoop.fromchild.readline()
	while out and out[:11] != "SECT-Packet":
		out = snoop.fromchild.readline()

	# Packetize
	pkt = []
	pkt.append(out.strip())
	while out and out[:3] != "CRC":
		out = snoop.fromchild.readline()
		pkt.append(out.strip())

	# Process the packet
	found = process_packet(pkt)
	if found > 0:
		check = 0
	else:
		check += 1

	# Had enoungh
	if check >= nilpkts:
		print 
		break

print "Nil shows in last", nilpkts, "packets, consumed", packets, "total"

try:
	print snoop.childerr.readlines()
except:
	print "No errors reported"

# Natural completion ... kill snoop
os.kill(snoop.pid, signal.SIGTERM)

# Statistics
print
print "Packets", packets
print "Events", events
print "Programmes", len(programmes)

# Unleash the output
bad = []
f = open(epgfile,'w')

# start 
print >>f, '<?xml version="1.0" encoding="ISO-8859-1"?>'
print >>f, '<tv generator-info-name="%s">' % sys.argv[0].split('/')[-1]
print >>f 

# Output the programme records
for p in programmes:
	try:
		output_programme(f, p, hoffset=12)
	except:
		bad.append(p)

# Finish
print >>f
print >>f, '</tv>'

# Bad events
print len(bad), "Bad events"
if len(bad) > 0:
	print >>f
	print >>f, "<!--"
	for b in  bad:
		print >>f, "\t", b
	print >>f, "-->"

# Done
f.close()
print

# Is there a request to copy the file somewhere?
if len(sys.argv) > 1:
	print "Copying output to", sys.argv[1]
	shutil.copy2(epgfile, sys.argv[1])

# clean up the pid file
if os.path.exists(pidfile):
	os.remove(pidfile)

sys.exit(0)


--------------020601060203090305070700--