/*
 * Apple Trailer Download
 * Copyright © 2008-2010 by Jacob Laursen <jacob at
 *                                         vindvejr
 * Version 1.2                              dot dk>
 *
 * Requirements:
 * - libcurl, http://curl.haxx.se/libcurl/
 * - ffmpeg, http://ffmpeg.mplayerhq.hu/
 *
 * Build with: g++ atd.cpp -lcurl -o dl_apple_trailers
 *             -I/usr/local/include -L/usr/local/lib
 *
 * TODO:
 * - Correct JSON parser with charset support etc. Could be
 *   TinyJSON.
 * - Save all metadata in the MPEG4 file or MySQL. Useful for
 *   UPnP server indexing.
 * - Use boost date class for parsing release date.
 * - CTRL-C signal: improve robustness (ffmpeg running...)
 */

#include <iostream>
#include <sstream>
#include <fstream>
#include <cstdio>
#include <map>
#include <list>

#include <sys/stat.h>
#include <sys/types.h>
#include <signal.h>
#include <utime.h>
#include <dirent.h>

#include <curl/curl.h>

using namespace std;

#define AT_CMD_TITLE   "dl_apple_trailers"
#define AT_APPLE_ROOT  "http://www.apple.com"
#define AT_APPLE_INDEX AT_APPLE_ROOT "/trailers/home/feeds/studios.json"
#define AT_USER_AGENT  "QuickTime"

// Globals
stringstream curl_buffer;
ofstream     curl_file_buffer;
string       cleanup_filename;

map<string, int> month_index;

class Movie {
public:
	string title;
	string location;
	string poster;
	bool highdef;
	time_t releasedate;
};

class Trailer {
public:
	string title;
	string location;
	int resolution;
};

// Callback function to receive data in stringstream from libcurl.
size_t receive_data(void *ptr, size_t size, size_t nmemb, void *stream)
{
	curl_buffer.write(static_cast<const char *>(ptr), size * nmemb);
	return size * nmemb;
}

// Callback function to receive data in filestream from libcurl.
size_t write_data_to_file(void *ptr, size_t size, size_t nmemb, void *stream)
{
	curl_file_buffer.write(static_cast<const char *>(ptr), size * nmemb);
	return size * nmemb;
}

// Check if file exists.
bool file_exists(string filename)
{
	struct stat info;

	// Attempt to get file attributes. If we succeed,
	// the file must exist.
	return (stat(filename.c_str(), &info) == 0);
}

// Check if file exists in `path' or any subdir hereof.
bool file_exists_recur(string path, string filename)
{
	if (file_exists(path + filename))
		return true;

	DIR *dir = opendir(path.c_str());
	if (!dir)
		return false;

	struct dirent *dp;
	while ((dp = readdir(dir)))
	{
		if (dp->d_type != DT_DIR)
			continue;

		string name = dp->d_name;
		if (name == "." || name == "..")
			continue;

		if (file_exists_recur(path + name + "/", filename))
			return true;
	}

	closedir(dir);
	return false;
}

// Check if file exists, either straightforward or recursively.
bool file_exists_cond(string path, string filename, bool recur)
{
	if (recur)
		return file_exists_recur(path, filename);
	else
		return file_exists(path + filename);
}

void sighandler(int sig)
{
	if (curl_file_buffer)
	{
		curl_file_buffer.close();
		remove(cleanup_filename.c_str());
	}
	exit(0);
}

// Parse datetime string in format "Fri, 14 Dec 2007 00:00:00 -0800"
time_t str2time(string str)
{
	struct tm time;
	istringstream ss;
	int n;

	if (str.size() != 31)
		return -1;

	ss.str(str.substr(5, 2));
	ss >> n;
	time.tm_mday = n;

	time.tm_mon = month_index[str.substr(8, 3)];

	ss.clear();
	ss.str(str.substr(12, 4));
	ss >> n;
	time.tm_year = n - 1900;

	time.tm_isdst = -1;

	ss.clear();
	ss.str(str.substr(17, 2));
	ss >> n;
	time.tm_hour = n;

	ss.clear();
	ss.str(str.substr(20, 2));
	ss >> n;
	time.tm_min = n;

	ss.clear();
	ss.str(str.substr(23, 2));
	ss >> n;
	time.tm_sec = n;

	return mktime(&time);
}

string get_value(string line, string key)
{
	int pos = line.find("\"" + key + "\":\"");

	if (pos != string::npos)
	{
		// Skip key + start quote of value
		line = line.substr(pos + key.size() + 4);
		
		// Find end quote of value
		pos = line.find_first_of("\"");
		if (pos != string::npos)
		{
			return line.substr(0, pos);
		}
	}

	return "";
}

string escape_filename(string name)
{
	string esc;

	for (int i = 0; i < name.size(); i++)
	{
		if (name[i] == '"')
			esc += "\\\"";
		else
			esc += name[i];
	}

	return esc;
}

// Replace all occurences of `find' with `rep' in string `str'.
void str_replace(string &str, string find, string rep)
{
	int pos = 0, len = find.size();

	while ((pos = str.find(find, pos)) != string::npos)
	{
		str.replace(pos, len, rep);
	}
}

// Return string found between `pre' and `post'.
string find_between_r(string str, string pre, string post)
{
	int left, right;

	if ((right = str.rfind(post)) != string::npos &&
		(left = str.rfind(pre, right)) != string::npos)
	{
		int len = pre.size();

		return str.substr(left + len, right - left - len);
	}

	return "";
}

void display_help()
{
	cout << "Apple Trailer Download, © 2008-2009 Jacob Laursen" << endl <<
		"Usage: " << AT_CMD_TITLE << " [options] DIRECTORY" << endl <<
		endl <<
		"Options:" << endl <<
		"  -r, --recursive      Check existing files in all subdirs" << endl <<
		"  -o, --overwrite      Overwrite existing files (complete download)" << endl <<
		"      --posters[=DIR]  Fetch posters as well" << endl <<
		"      --tstamp         Copy release date to file timestamp" << endl <<
		"      --prefres=RES    Prefered resolution: {480p|720p|1080p}" << endl <<
		"      --minres=RES     Minimum resolution: {480p|720p|1080p}" << endl <<
		"  -v, --verbose        Explain what is being done" << endl <<
		"  -d, --debug          Display extended debug information" << endl <<
		"  -h, --help           Display this help and exit" << endl;
}

int main(int argc, char *argv[])
{
	string dest_dir, poster_dir;
	char curlerr[CURL_ERROR_SIZE];

	bool arg_recur     = false;
	bool arg_overwrite = false;
	bool arg_posters   = false;
	bool arg_tstamp    = false;
	bool arg_verbose   = false;
	bool arg_debug     = false;

	int arg_prefres = 1080;
	int arg_minres  =  720;

	if (argc <= 1)
	{
		display_help();
		return 1;
	}

	// Parse arguments
	for (int i = 1; i < argc; i++)
	{
		string arg(argv[i]);

		if (arg.substr(0, 1) == "-")
		{
			if (arg == "--help" || arg == "-h")
			{
				display_help();
				return 1;
			}
			else if (arg == "--recursive" || arg == "-r")
			{
				arg_recur = true;
			}
			else if (arg == "--overwrite" || arg == "-o")
			{
				arg_overwrite = true;
			}
			else if (arg == "--posters")
			{
				arg_posters = true;
			}
			else if (arg == "--tstamp")
			{
				arg_tstamp = true;
			}
			else if (arg == "--verbose" || arg == "-v")
			{
				arg_verbose = true;
			}
			else if (arg == "--debug" || arg == "-d")
			{
				arg_debug = true;
			}
			else if (arg.size() > 10 && arg.substr(0, 10) == "--posters=")
			{
				arg_posters = true;

				poster_dir = arg.substr(10);
				if (poster_dir.substr(poster_dir.size() - 1) != "/")
					poster_dir += "/";
			}
			else if (arg.size() > 10 && arg.substr(0, 10) == "--prefres=")
			{
				string res = arg.substr(10);
				if (res == "480p")
					arg_prefres = 480;
				else if (res == "720p")
					arg_prefres = 720;
				else if (res == "1080p")
					arg_prefres = 1080;
				else
				{
					display_help();
					cout << AT_CMD_TITLE << ": invalid prefered resolution `" << res << "'" << endl;
					return 1;
				}
			}
			else if (arg.size() > 9 && arg.substr(0, 9) == "--minres=")
			{
				string res = arg.substr(9);
				if (res == "480p")
					arg_minres = 480;
				else if (res == "720p")
					arg_minres = 720;
				else if (res == "1080p")
					arg_minres = 1080;
				else
				{
					display_help();
					cout << AT_CMD_TITLE << ": invalid minimum resolution `" << res << "'" << endl;
					return 1;
				}
			}
			else
			{
				display_help();
				cout << AT_CMD_TITLE << ": invalid option `" << arg << "'" << endl;
				return 1;
			}
		}
		else
		{
			if (!dest_dir.empty())
			{
				display_help();
				cout << AT_CMD_TITLE << ": wrong number of arguments" << endl;
				return 1;
			}

			dest_dir = arg;
			if (dest_dir.substr(dest_dir.size() - 1) != "/")
				dest_dir += "/";
		}
	}

	if (dest_dir.empty())
	{
		display_help();
		cout << AT_CMD_TITLE << ": missing directory" << endl;
		return 1;
	}

	if (arg_posters && poster_dir.empty())
	{
		poster_dir = dest_dir;
	}

	// Initialize month index.
	month_index["Jan"] = 0; month_index["Feb"] = 1;  month_index["Mar"] = 2;
	month_index["Apr"] = 3; month_index["May"] = 4;  month_index["Jun"] = 5;
	month_index["Jul"] = 6; month_index["Aug"] = 7;  month_index["Sep"] = 8;
	month_index["Oct"] = 9; month_index["Nov"] = 10; month_index["Dec"] = 11;

	signal(SIGINT, &sighandler); // CTRL-C

	CURL *ch = curl_easy_init();

	curl_easy_setopt(ch, CURLOPT_VERBOSE,    arg_debug);
	curl_easy_setopt(ch, CURLOPT_HEADER,     0);
	curl_easy_setopt(ch, CURLOPT_NOPROGRESS, 1);
	curl_easy_setopt(ch, CURLOPT_NOSIGNAL,   1);

	curl_easy_setopt(ch, CURLOPT_ERRORBUFFER, curlerr);
	curl_easy_setopt(ch, CURLOPT_URL, AT_APPLE_INDEX);
	curl_easy_setopt(ch, CURLOPT_WRITEFUNCTION, receive_data);

	CURLcode code = curl_easy_perform(ch);
	if (code != 0)
	{
		cerr << "Error retrieving " << AT_APPLE_INDEX << ": " << curlerr << endl;
		return 1;
	}

	string line;
	int linenum = 0;
	list<Movie> movies;

	if (arg_verbose)
		cout << "Parsing index " << AT_APPLE_INDEX << endl;

	while (getline(curl_buffer, line))
	{
		Movie entry;

		linenum++;

		// Title
		entry.title = get_value(line, "title");
		if (entry.title.empty())
		{
			continue;
		}

		str_replace(entry.title, "\\u2019", "'"); // Left apostrophe
		str_replace(entry.title, "\\u2022", "-"); // Bullet
		str_replace(entry.title, "&amp;",   "&"); // Ampersand
		str_replace(entry.title, ":",      " -"); // Filesystem security
		str_replace(entry.title, "/",       "-"); // Filesystem security
		str_replace(entry.title, "?",        ""); // Filesystem security
		str_replace(entry.title, "..",      "."); // Filesystem security

		// Location
		entry.location = get_value(line, "location");
		if (entry.location.empty())
		{
			cerr << "Error parsing line " << linenum << ": no location" << endl;
			continue;
		}

		// Release date
		entry.releasedate = str2time(get_value(line, "releasedate"));

		// Poster
		if (arg_posters)
			entry.poster = get_value(line, "poster");

		// High definition
		int pos = line.find("\"hd\":");
		if (pos != string::npos)
		{
			line = line.substr(pos + 5);
			pos = line.find_first_of("}");
			if (line.substr(0, pos) == "true")
				entry.highdef = true;
			else
				entry.highdef = false;

			movies.push_back(entry);
		}
		else
		{
			cerr << "Error parsing line " << linenum << ": no HD flag" << endl;
		}
	}

	curl_buffer.clear();
	curl_buffer.str("");

	for (list<Movie>::iterator iter = movies.begin(); iter != movies.end(); iter++)
	{
		if (arg_minres >= 720 && iter->highdef == false)
			continue;

		// Get poster
		if (!iter->poster.empty())
		{
			string poster_filename = poster_dir + iter->title + ".jpg";

			if (!arg_overwrite && file_exists_cond(poster_dir, iter->title + ".jpg", arg_recur))
			{
				if (arg_verbose)
					cout << "Skipping existing file " << iter->title << ".jpg" << endl;
			}
			else
			{
				if (arg_verbose)
					cout << "Downloading file " << iter->poster << endl;

				cleanup_filename = poster_filename;
				curl_file_buffer.open(poster_filename.c_str());
				if (curl_file_buffer)
				{
					curl_easy_setopt(ch, CURLOPT_WRITEFUNCTION, write_data_to_file);
					curl_easy_setopt(ch, CURLOPT_URL, iter->poster.c_str());
					code = curl_easy_perform(ch);
					curl_file_buffer.close();
					cleanup_filename.clear();
	
					if (code != 0)
					{
						cerr << "Error downloading " << iter->poster << ": " << curlerr << endl;
						curl_buffer.clear();
						curl_buffer.str("");
					}
				}
			}
		}

		if (!arg_overwrite && file_exists_cond(dest_dir, iter->title + ".mp4", arg_recur))
		{
			if (arg_verbose)
				cout << "Skipping existing file " << iter->title << ".mp4" << endl;
			continue;
		}

		if (arg_verbose)
			cout << "Parsing " << AT_APPLE_ROOT << iter->location << endl;

		curl_easy_setopt(ch, CURLOPT_WRITEFUNCTION, receive_data);
		curl_easy_setopt(ch, CURLOPT_URL, string(AT_APPLE_ROOT + iter->location).c_str());
		code = curl_easy_perform(ch);

		if (code != 0)
		{
			cerr << "Error retrieving " << AT_APPLE_ROOT << iter->location << ": " << curlerr << endl;
			curl_buffer.clear();
			curl_buffer.str("");
			continue;
		}

		list<Trailer> trailers;
		Trailer entry;
		bool found_1080 = false, found_720 = false, found_480 = false;

		// Parse trailer webpage
		while (getline(curl_buffer, line))
		{
			string title = find_between_r(line, "<h3>", ":</h3>");
			if (!title.empty())
			{
				entry.title = title;
				continue;
			}

			// Look for 1080p trailers only if this is the prefered resolution
			if (arg_prefres == 1080)
			{
				string location = find_between_r(line, "http://", "_1080p.mov");
				if (!location.empty())
				{
					entry.location = "http://" + location + "_h1080p.mov";
					entry.resolution = 1080;
					trailers.push_back(entry);
					found_1080 = true;
				}
			}

			// Look for 720p trailers if we prefer 720p or 1080p and the
			// minimum accepted resolution isn't above 720p.
			if (arg_prefres >= 720 && arg_minres <= 720)
			{
				string location = find_between_r(line, "http://", "_720p.mov");
				if (!location.empty())
				{
					entry.location = "http://" + location + "_h720p.mov";
					entry.resolution = 720;
					trailers.push_back(entry);
					found_720 = true;
				}
			}

			// Look for 480p trailers only if we accept it - it doesn't
			// matter if we prefer this resolution.
			if (arg_minres == 480)
			{
				string location = find_between_r(line, "http://", "_480p.mov");
				if (!location.empty())
				{
					entry.location = "http://" + location + "_h480p.mov";
					entry.resolution = 480;
					trailers.push_back(entry);
					found_480 = true;
				}
			}
		}

		int keep_res = 0;

		// If we found trailers in our prefered resolution, keep this.
		// We already skipped trailers not fulfilling our minimum resolution.
		if ((found_1080 && arg_prefres == 1080) ||
			(found_720 && arg_prefres == 720) ||
			(found_480 && arg_prefres == 480))
		{
			keep_res = arg_prefres;
		}
		// We didn't get the best, fallback to 720p from 1080p
		else if (found_720 && arg_prefres == 1080)
		{
			keep_res = 720;
		}
		// Ultimate fallback.
		else if (found_480)
		{
			keep_res = 480;
		}

		// Get rid of the trailers in irrelevant resolutions.
		for (list<Trailer>::iterator i = trailers.begin(); i != trailers.end(); i++)
		{
			if (i->resolution != keep_res)
			{
				i = trailers.erase(i);
				i--;
			}
		}

		curl_buffer.clear();
		curl_buffer.str("");

		for (list<Trailer>::iterator trailer = trailers.begin(); trailer != trailers.end(); trailer++)
		{
			int pos = trailer->location.rfind("/");
			string mov_filename = dest_dir + trailer->location.substr(pos + 1);
			string mp4_filename;

			if (trailers.size() == 1)
			{
				// Only one trailer. We already checked if the file exists, so just generate
				// the filename.
				mp4_filename = iter->title + ".mp4";
			}
			else
			{
				// More than one trailer. We have to check for existing files now, since
				// we didn't know the filenames before parsing the index webpage.
				mp4_filename = iter->title + " (" + trailer->title + ").mp4";

				if (!arg_overwrite && file_exists_cond(dest_dir, mp4_filename, arg_recur))
				{
					if (arg_verbose)
						cout << "Skipping existing file " << mp4_filename << endl;
					continue;
				}
			}

			if (arg_verbose)
				cout << "Downloading file " << trailer->location << endl;

			// Download trailer
			cleanup_filename = mov_filename;
			curl_file_buffer.open(mov_filename.c_str());
			if (curl_file_buffer)
			{
				curl_easy_setopt(ch, CURLOPT_WRITEFUNCTION, write_data_to_file);
				curl_easy_setopt(ch, CURLOPT_URL, trailer->location.c_str());
				curl_easy_setopt(ch, CURLOPT_USERAGENT, AT_USER_AGENT);
				code = curl_easy_perform(ch);
				curl_file_buffer.close();
				cleanup_filename.clear();

				if (code != 0)
				{
					cerr << "Error downloading " << trailer->location << ": " << curlerr << endl;
					remove(mov_filename.c_str());
					continue;
				}

				if (arg_verbose)
					cout << "Extracting MPEG4 file to " << escape_filename(mp4_filename) << endl;

				string cmd = "ffmpeg ";
				if (!arg_debug)
					cmd += ">/dev/null 2>&1 ";
				if (arg_overwrite)
					cmd += "-y ";
				cmd += "-i \"" + mov_filename + "\"" +
					" -acodec copy -vcodec copy \"" + escape_filename(dest_dir + mp4_filename) + "\"";

				if (system(cmd.c_str()) != 0)
				{
					cerr << "Error in system call: " << cmd << endl;
				}
				else if (arg_tstamp && iter->releasedate != -1)
				{
					struct utimbuf times;
					times.modtime = times.actime = iter->releasedate;
					utime(mp4_filename.c_str(), &times);
				}

				remove(mov_filename.c_str());
			}
		}
	}

	curl_easy_cleanup(ch);

	return 0;
}
