nesvideos-piece.hh
This is the interface to nesvideos-piece. It includes functions
and global variables that are to be accessed by the emulator.
#ifndef NESVPIECEhh
#define NESVPIECEhh
#define NESVIDEOS_LOGGING 1
#ifdef __cplusplus
extern "C" {
#endif
/* Is video logging enabled? 0=no, 1=yes, 2=active. Default value: 0 */
extern int LoggingEnabled;
/* Get and set the video recording command (shell command) */
extern const char* NESVideoGetVideoCmd(void);
extern void NESVideoSetVideoCmd(const char *cmd);
/* Save 1 frame of video. (Assumed to be 16-bit RGB) */
/* FPS is scaled by 24 bits (*0x1000000) */
/* Does not do anything if LoggingEnabled<2. */
extern void NESVideoLoggingVideo
(const void*data, unsigned width, unsigned height,
unsigned fps_scaled,
unsigned bpp);
/* Save N bytes of audio. bytes_per_second is required on the first call. */
/* Does not do anything if LoggingEnabled<2. */
/* The interval of calling this function is not important, as long as all the audio
* data is eventually written without too big delay (5 seconds is too big)
* This function may be called multiple times per video frame, or once per a few video
* frames, or anything in between. Just that all audio data must be written exactly once,
* and in order. */
extern void NESVideoLoggingAudio
(const void*data,
unsigned rate, unsigned bits, unsigned chans,
unsigned nsamples);
/* nsamples*chans*(bits/8) = bytes in *data. */
/* Requests current AVI to be closed and new be started */
/* Use when encoding parameters have changed */
extern void NESVideoNextAVI();
#ifdef __cplusplus
}
#endif
#endif
nesvideos-piece.cc
This is the source code of the functionality behind nesvideos-piece.
#define THREAD_SAFETY
#include <cmath>
#include <string>
#include <vector>
#include <deque>
#include <list>
#include <map>
#include <unistd.h> // mknod, unlink, write
#include <stdio.h>
#include <sys/stat.h> // S_IFIFO
#include <fcntl.h> // fcntl
#include <sys/poll.h> // poll
#include <stdlib.h> // setenv
#include <string.h> // strrchr
#include <errno.h>
#include <glob.h>
#include <gd.h>
#ifdef THREAD_SAFETY
# include <pthread.h>
#endif
/* Note: This module assumes everyone uses BGR16 as display depth */
//#define LOGO_LENGTH_HEADER (1.2)
//#define LOGO_LENGTH_OVERLAP (10.0-LOGO_LENGTH_HEADER)
//#define LOGO_LENGTH_HEADER (1.1)
//#define LOGO_LENGTH_OVERLAP (6.3-LOGO_LENGTH_HEADER)
//#define LOGO_LENGTH_HEADER (1.4)
#define LOGO_LENGTH_OVERLAP (0)
#define LOGO_LENGTH_HEADER (0)
static std::string VIDEO_CMD = "";
/*
-rawvideo on:fps=60:format=0x42475220:w=256:h=224:size=$[1024*224]
-audiofile "+AUDIO_FN+"
*/
static std::string AUDIO_FN = "s.log";
static bool Terminate=false;
static unsigned videonumber = 0;
#ifdef THREAD_SAFETY
static pthread_mutex_t APIlock = PTHREAD_MUTEX_INITIALIZER;
#endif
static unsigned NonblockWrite(FILE* fp, const unsigned char*buf, unsigned length)
{
Retry:
int result = write(fileno(fp), buf, length);
if(result == -1 && errno==EAGAIN)
{
return 0;
}
if(result == -1 && errno==EINTR) goto Retry;
if(result == -1)
{
perror("write");
Terminate=true;
return 0;
}
return result;
}
static int WaitUntilOneIsWritable(FILE*f1, FILE*f2)
{
struct pollfd po[2] = { {fileno(f1),POLLOUT,0}, {fileno(f2),POLLOUT,0} };
poll(po, 2, -1);
return ((po[0].revents & POLLOUT) ? 1 : 0)
| ((po[1].revents & POLLOUT) ? 2 : 0);
}
#define BGR32 0x42475220 // BGR32 fourcc
#define BGR24 0x42475218 // BGR24 fourcc
#define BGR16 0x42475210 // BGR16 fourcc
#define BGR15 0x4247520F // BGR15 fourcc
#define I420 0x30323449 // I420 fourcc
#define YUY2 0x32595559 // YUY2 fourcc
static unsigned USE_FOURCC = BGR16;
static unsigned INPUT_BPP = 16;
#define u32(n) (n)&255,((n)>>8)&255,((n)>>16)&255,((n)>>24)&255
#define u16(n) (n)&255,((n)>>8)&255
#define s4(s) s[0],s[1],s[2],s[3]
static const unsigned FPS_SCALE = 0x1000000;
static struct Construct
{
Construct()
{
char Buf[4096];
getcwd(Buf,sizeof(Buf));
Buf[sizeof(Buf)-1]=0;
AUDIO_FN = Buf + std::string("/") + AUDIO_FN;
}
} Construct;
class AVI
{
FILE* vidfp;
FILE* audfp;
bool KnowVideo;
unsigned vid_width;
unsigned vid_height;
unsigned vid_fps_scaled;
std::list<std::vector<unsigned char> > VideoBuffer;
unsigned VidBufSize;
bool KnowAudio;
unsigned aud_rate;
unsigned aud_chans;
unsigned aud_bits;
std::list<std::vector<unsigned char> > AudioBuffer;
unsigned AudBufSize;
public:
AVI() :
vidfp(NULL),
audfp(NULL),
KnowVideo(false), VidBufSize(0),
KnowAudio(false), AudBufSize(0)
{
}
~AVI()
{
while(VidBufSize && AudBufSize)
{
CheckFlushing();
}
if(audfp) fclose(audfp);
if(vidfp) pclose(vidfp);
unlink(AUDIO_FN.c_str());
}
void Audio(unsigned r,unsigned b,unsigned c,
const unsigned char*d, unsigned nsamples)
{
if(Terminate) return;
if(!KnowAudio)
{
aud_rate = r;
aud_chans = c;
aud_bits = b;
KnowAudio = true;
}
CheckFlushing();
unsigned bytes = nsamples * aud_chans * (aud_bits / 8);
unsigned wrote = 0;
if(KnowVideo && AudioBuffer.empty())
{
//fprintf(stderr, "Writing %u of %s from %p to %p\t", bytes, "aud", (void*)d, (void*)audfp);
wrote = NonblockWrite(audfp, d, bytes);
//fprintf(stderr, "Wrote %u\n", wrote);
}
if(wrote < bytes)
{
unsigned remain = bytes-wrote;
//fprintf(stderr, "Buffering %u of %s (%p..%p)\n", remain, "aud", d+wrote, d+bytes);
AudioBuffer.push_back(std::vector<unsigned char>(d+wrote, d+bytes));
AudBufSize += remain;
}
CheckFlushing();
}
void Video(unsigned w,unsigned h,unsigned f, const unsigned char*d)
{
if(Terminate) return;
if(!KnowVideo)
{
vid_width = w;
vid_height = h;
vid_fps_scaled = f;
KnowVideo = true;
}
CheckFlushing();
unsigned bpp = INPUT_BPP; if(bpp == 15 || bpp == 17) bpp = 16;
unsigned bytes = vid_width * vid_height * bpp / 8;
//std::vector<unsigned char> tmp(bytes, 'k');
//d = &tmp[0];
unsigned wrote = 0;
if(KnowAudio && VideoBuffer.empty())
{
CheckBegin();
//fprintf(stderr, "Writing %u of %s from %p to %p\t", bytes, "vid", (void*)d, (void*)vidfp);
wrote = NonblockWrite(vidfp, d, bytes);
//fprintf(stderr, "Wrote %u\n", wrote);
}
if(wrote < bytes)
{
unsigned remain = bytes-wrote;
//fprintf(stderr, "Buffering %u of %s (%p..%p)\n", remain, "vid", d+wrote, d+bytes);
VideoBuffer.push_back(std::vector<unsigned char>(d+wrote, d+bytes));
VidBufSize += remain;
}
CheckFlushing();
}
private:
/* fp is passed as a reference because it may be NULL
* prior to calling, and this function changes it. */
template<typename BufType>
void FlushBufferSome(BufType& List, unsigned& Size, FILE*& fp, const char* what)
{
what=what;
Retry:
if(List.empty() || Terminate) return;
typename BufType::iterator i = List.begin();
std::vector<unsigned char>& buf = *i;
if(buf.empty())
{
List.erase(i);
goto Retry;
}
unsigned bytes = buf.size();
CheckBegin();
//fprintf(stderr, "Writing %u of %s from %p to %p\t", bytes, what, (void*)&buf[0], (void*)fp);
unsigned ate = NonblockWrite(fp, &buf[0], bytes);
//fprintf(stderr, "Wrote %u\n", ate);
buf.erase(buf.begin(), buf.begin()+ate);
Size -= ate;
if(buf.empty())
{
List.erase(i);
}
}
void CheckFlushing()
{
//AudioBuffer.clear();
//VideoBuffer.clear();
if(KnowAudio && KnowVideo && !Terminate)
{
if(!AudioBuffer.empty() && !VideoBuffer.empty())
{
do {
/* vidfp = &1, audfp = &2 */
int attempt = WaitUntilOneIsWritable(vidfp, audfp);
if(attempt <= 0) break; /* Some kind of error can cause this */
// Flush Video
if(attempt&1) FlushBufferSome(VideoBuffer, VidBufSize, vidfp, "vid");
// Flush Audio
if(attempt&2) FlushBufferSome(AudioBuffer, AudBufSize, audfp, "aud");
} while (!AudioBuffer.empty() && !VideoBuffer.empty());
}
else
{
FlushBufferSome(VideoBuffer, VidBufSize, vidfp, "vid");
FlushBufferSome(AudioBuffer, AudBufSize, audfp, "aud");
}
/*
fprintf(stderr, "Buffer Sizes: Audio %u(%u) video %u(%u)\n",
(unsigned)AudioBuffer.size(), AudBufSize,
(unsigned)VideoBuffer.size(), VidBufSize);
*/
}
}
std::string GetMEncoderRawvideoParam() const
{
char Buf[512];
unsigned bpp = INPUT_BPP; if(bpp == 15 || bpp == 17) bpp = 16;
sprintf(Buf, "fps=%g:format=0x%04X:w=%u:h=%u:size=%u",
vid_fps_scaled / (double)FPS_SCALE,
USE_FOURCC,
vid_width,
vid_height,
vid_width*vid_height * bpp/8);
return Buf;
}
std::string GetMEncoderRawaudioParam() const
{
char Buf[512];
sprintf(Buf, "channels=%u:rate=%u:samplesize=%u:bitrate=%u",
aud_chans,
aud_rate,
aud_bits/8,
aud_rate*aud_chans*(aud_bits/8) );
return Buf;
}
std::string GetMEncoderCommand() const
{
std::string mandatory = "-audiofile " + AUDIO_FN
+ " -audio-demuxer rawaudio"
+ " -demuxer rawvideo"
+ " -rawvideo " + GetMEncoderRawvideoParam()
+ " -rawaudio " + GetMEncoderRawaudioParam()
;
std::string cmd = VIDEO_CMD;
std::string::size_type p = cmd.find("NESV""SETTINGS");
if(p != cmd.npos)
cmd = cmd.replace(p, 4+8, mandatory);
else
fprintf(stderr, "Warning: NESVSETTINGS not found in videocmd\n");
char videonumstr[64];
sprintf(videonumstr, "%u", videonumber);
for(;;)
{
p = cmd.find("VIDEO""NUMBER");
if(p == cmd.npos) break;
cmd = cmd.replace(p, 5+6, videonumstr);
}
fprintf(stderr, "Launch: %s\n", cmd.c_str()); fflush(stderr);
return cmd;
}
void CheckBegin()
{
if(!audfp)
{
unlink(AUDIO_FN.c_str());
mknod(AUDIO_FN.c_str(), S_IFIFO|0666, 0);
}
if(!vidfp)
{
/* Note: popen does not accept b/t in mode param */
setenv("LD_PRELOAD", "", 1);
vidfp = popen(GetMEncoderCommand().c_str(), "w");
if(!vidfp)
{
perror("Launch failed");
}
else
{
fcntl(fileno(vidfp), F_SETFL, O_WRONLY | O_NONBLOCK);
}
}
if(!audfp)
{
Retry:
audfp = fopen(AUDIO_FN.c_str(), "wb");
if(!audfp)
{
perror(AUDIO_FN.c_str());
if(errno == ESTALE) goto Retry;
}
else
{
fcntl(fileno(audfp), F_SETFL, O_WRONLY | O_NONBLOCK);
}
}
}
};
static AVI* AVI = 0;
namespace LogoInfo
{
unsigned width;
unsigned height;
}
#include "quantize.hh"
#include "rgbtorgb.hh"
extern "C"
{
int LoggingEnabled = 0; /* 0=no, 1=yes, 2=recording! */
const char* NESVideoGetVideoCmd()
{
return VIDEO_CMD.c_str();
}
void NESVideoSetVideoCmd(const char *cmd)
{
#ifdef THREAD_SAFETY
struct ScopedLock
{ ScopedLock() {
pthread_mutex_lock(&APIlock);
//fprintf(stderr, "audio start\n"); fflush(stderr);
}
~ScopedLock() {
//fprintf(stderr, "audio end\n"); fflush(stderr);
pthread_mutex_unlock(&APIlock); }
} ScopedLock;
#endif
VIDEO_CMD = cmd;
}
static class AVI& GetAVIptr()
{
if(!AVI)
{
fprintf(stderr, "Starting new AVI (num %u)\n", videonumber);
AVI = new class AVI;
}
return *AVI;
}
void NESVideoNextAVI()
{
#ifdef THREAD_SAFETY
struct ScopedLock
{ ScopedLock() {
pthread_mutex_lock(&APIlock);
//fprintf(stderr, "audio start\n"); fflush(stderr);
}
~ScopedLock() {
//fprintf(stderr, "audio end\n"); fflush(stderr);
pthread_mutex_unlock(&APIlock); }
} ScopedLock;
#endif
if(AVI)
{
fprintf(stderr, "Closing AVI (next will be started)\n");
delete AVI;
AVI = 0;
++videonumber;
}
}
static void Overlay32With32(unsigned char* target, const unsigned char* source, int alpha)
{
target[0] += ((int)(source[0] - target[0])) * alpha / 255;
target[1] += ((int)(source[1] - target[1])) * alpha / 255;
target[2] += ((int)(source[2] - target[2])) * alpha / 255;
}
static void OverlayLogoFrom(const char* fn, std::vector<unsigned char>& data)
{
FILE*fp = fopen(fn, "rb");
if(!fp) perror(fn);
if(!fp) return; /* Silently ignore missing frames */
gdImagePtr im = gdImageCreateFromPng(fp);
if(!gdImageTrueColor(im))
{
fprintf(stderr, "'%s': Only true color images are supported\n", fn);
goto CloseIm;
}
{/*scope begin*/
unsigned new_width = gdImageSX(im);
unsigned new_height= gdImageSY(im);
if(new_width != LogoInfo::width
|| new_height != LogoInfo::height)
{
if(new_height < LogoInfo::height || new_height > LogoInfo::height+20)
fprintf(stderr, "'%s': ERROR, expected %dx%d, got %dx%d\n", fn,
LogoInfo::width, LogoInfo::height,
new_width, new_height);
}
for(unsigned y=0; y<LogoInfo::height; ++y)
{
unsigned char pixbuf[4] = {0,0,0,0};
for(unsigned x = 0; x < LogoInfo::width; ++x)
{
int color = gdImageTrueColorPixel(im, x,y);
int alpha = 255-gdTrueColorGetAlpha(color)*256/128;
pixbuf[2] = gdTrueColorGetRed(color);
pixbuf[1] = gdTrueColorGetGreen(color);
pixbuf[0] = gdTrueColorGetBlue(color);
Overlay32With32(&data[(y*LogoInfo::width+x)*3], pixbuf, alpha);
}
}
}/* close scope */
CloseIm:
gdImageDestroy(im);
fclose(fp);
}
static const std::string GetLogoFileName(unsigned frameno)
{
std::string avdir = "/home/bisqwit/povray/nesvlogov5/";
//std::string avdir = "/home/bisqwit/povray/nesvlogov6/cv2/";
//std::string avdir = "/home/bisqwit/povray/nesvlogov6/kuros/";
char AvName[512];
sprintf(AvName, "logo_%d_%d_f%03u.png",
LogoInfo::width,
LogoInfo::height,
frameno);
std::string want = avdir + AvName;
int ac = access(want.c_str(), R_OK);
if(ac != 0)
{
/* No correct avatar file? Check if there's an approximate match. */
static std::map<int, std::vector<std::string> > files;
if(files.empty()) /* Cache the list of logo files. */
{
static const char GlobPat[] = "logo_*_*_f*.png";
glob_t globdata;
globdata.gl_offs = 0;
fprintf(stderr, "Loading list of usable logo animation files in %s...\n", avdir.c_str());
int globres = glob( (avdir + GlobPat).c_str(), GLOB_NOSORT, NULL, &globdata);
if(globres == 0)
{
for(size_t n=0; n<globdata.gl_pathc; ++n)
{
const char* fn = globdata.gl_pathv[n];
const char* slash = strrchr(fn, '/');
if(slash) fn = slash+1;
int gotw=0, goth=0, gotf=0;
sscanf(fn, "logo_%d_%d_f%d", &gotw,&goth,&gotf);
files[gotf].push_back(fn);
}
}
globfree(&globdata);
}
std::map<int, std::vector<std::string> >::const_iterator
i = files.find(frameno);
if(i != files.end())
{
std::string best;
int bestdist = -1;
const std::vector<std::string>& fnames = i->second;
for(size_t b=fnames.size(), a=0; a<b; ++a)
{
unsigned gotw=0, goth=0;
sscanf(fnames[a].c_str(), "logo_%u_%u", &gotw,&goth);
if(gotw < LogoInfo::width || goth < LogoInfo::height) continue;
int dist = std::max(gotw - LogoInfo::width,
goth - LogoInfo::height);
if(bestdist == -1 || dist < bestdist)
{ bestdist = dist; best = fnames[a]; }
}
if(bestdist >= 0) want = avdir + best;
}
}
return want;
}
static const std::vector<unsigned char> NVConvert24To16Frame
(const std::vector<unsigned char>& logodata)
{
std::vector<unsigned char> result(LogoInfo::width * LogoInfo::height * 2);
Convert24To16Frame(&logodata[0], &result[0], LogoInfo::width * LogoInfo::height, LogoInfo::width);
return result;
}
static const std::vector<unsigned char> NVConvert24To15Frame
(const std::vector<unsigned char>& logodata)
{
std::vector<unsigned char> result(LogoInfo::width * LogoInfo::height * 2);
Convert24To15Frame(&logodata[0], &result[0], LogoInfo::width * LogoInfo::height, LogoInfo::width);
return result;
}
static const std::vector<unsigned char> NVConvert24To_I420Frame
(const std::vector<unsigned char>& logodata)
{
std::vector<unsigned char> result(LogoInfo::width * LogoInfo::height * 3 / 2);
Convert24To_I420Frame(&logodata[0], &result[0], LogoInfo::width * LogoInfo::height, LogoInfo::width);
return result;
}
static const std::vector<unsigned char> NVConvert24To_YUY2Frame
(const std::vector<unsigned char>& logodata)
{
std::vector<unsigned char> result(LogoInfo::width * LogoInfo::height * 3 / 2);
Convert24To_YUY2Frame(&logodata[0], &result[0], LogoInfo::width * LogoInfo::height, LogoInfo::width);
return result;
}
static const std::vector<unsigned char> NVConvert16To24Frame
(const void* data, unsigned npixels)
{
std::vector<unsigned char> logodata(npixels*3); /* filled with black. */
Convert16To24Frame(data, &logodata[0], npixels);
return logodata;
}
static const std::vector<unsigned char> NVConvert15To24Frame
(const void* data, unsigned npixels)
{
std::vector<unsigned char> logodata(npixels*3); /* filled with black. */
Convert15To24Frame(data, &logodata[0], npixels);
return logodata;
}
static const std::vector<unsigned char> NVConvert_I420To24Frame
(const void* data, unsigned npixels)
{
std::vector<unsigned char> logodata(npixels*3); /* filled with black. */
Convert_I420To24Frame(data, &logodata[0], npixels, LogoInfo::width);
return logodata;
}
static const std::vector<unsigned char> NVConvert_YUY2To24Frame
(const void* data, unsigned npixels)
{
std::vector<unsigned char> logodata(npixels*3); /* filled with black. */
Convert_YUY2To24Frame(data, &logodata[0], npixels, LogoInfo::width);
return logodata;
}
static void SubstituteWithBlackIfNeeded(const void*& data)
{
/* If the first frames of the animation consist of a
* single color (such as gray for NES), replace them
* with black to avoid ugly backgrounds on logo animations
*/
static bool Deviate = false;
static short* Replacement = 0;
static unsigned wid=0, hei=0;
if(Deviate)
{
if(Replacement) { delete[] Replacement; Replacement=0; }
return;
}
unsigned dim = LogoInfo::width * LogoInfo::height;
const short* p = (const short*)data;
for(unsigned a=0; a<dim; ++a)
if(p[a] != p[0])
{
Deviate = true;
return;
}
if(Replacement && (wid != LogoInfo::width || hei != LogoInfo::height))
{
delete[] Replacement;
Replacement = 0;
}
wid = LogoInfo::width;
hei = LogoInfo::height;
if(!Replacement)
{
Replacement = new short[dim];
for(unsigned a=0; a<dim; ++a) Replacement[a]=0x0000;
}
data = (void*)Replacement;
}
void NESVideoLoggingVideo
(const void*data, unsigned width,unsigned height,
unsigned fps_scaled,
unsigned bpp
)
{
if(LoggingEnabled < 2) return;
#ifdef THREAD_SAFETY
struct ScopedLock
{ ScopedLock() {
pthread_mutex_lock(&APIlock);
//fprintf(stderr, "video start\n"); fflush(stderr);
}
~ScopedLock() {
//fprintf(stderr, "video end\n"); fflush(stderr);
pthread_mutex_unlock(&APIlock); }
} ScopedLock;
#endif
if(bpp == 32) /* Convert 32 to 24 */
{
bpp = 24;
static std::vector<unsigned char> VideoBuf;
VideoBuf.resize(width*height * 3);
Convert32To24Frame(data, &VideoBuf[0], width*height);
data = (void*)&VideoBuf[0];
}
if(bpp) INPUT_BPP = bpp;
switch(INPUT_BPP)
{
case 32: USE_FOURCC = BGR32; break;
case 24: USE_FOURCC = BGR24; break;
case 16: USE_FOURCC = BGR16; break;
case 15: USE_FOURCC = BGR15; break;
case 12: USE_FOURCC = I420; break;
case 17: USE_FOURCC = YUY2; break;
}
//USE_FOURCC = BGR24; // FIXME TEMPORARY
const int LogoFramesHeader = (int)( (LOGO_LENGTH_HEADER * fps_scaled) / (1 << 24) );
const int LogoFramesOverlap = (int)( (LOGO_LENGTH_OVERLAP * fps_scaled) / (1 << 24) );
LogoInfo::width = width;
LogoInfo::height = height;
if(INPUT_BPP == 16 || INPUT_BPP == 15)
{
SubstituteWithBlackIfNeeded(data);
}
else if(INPUT_BPP != 24 && INPUT_BPP != 12 && INPUT_BPP != 17)
{
fprintf(stderr, "NESVIDEOS_PIECE only supports 16 and 24 bpp, you gave %u bpp\n",
bpp);
return;
}
static bool LogoHeaderPartSent = false;
if(!LogoHeaderPartSent)
{
/* Send animation frames that do not involve source video? */
LogoHeaderPartSent=true;
if(LogoFramesHeader > 0)
{
for(int frame = 0; frame < LogoFramesHeader; ++frame)
{
std::vector<unsigned char> logodata(width*height*3); /* filled with black. */
std::string fn = GetLogoFileName(frame);
/*fprintf(stderr, "wid=%d(%d), hei=%d(%d),fn=%s\n",
width, LogoInfo::width,
height, LogoInfo::height,
fn.c_str());*/
OverlayLogoFrom(fn.c_str(), logodata);
//INPUT_BPP = 24; USE_FOURCC = BGR24; // FIXME TEMPORARY
if(INPUT_BPP == 16)
{
std::vector<unsigned char> result = NVConvert24To16Frame(logodata);
GetAVIptr().Video(width,height,fps_scaled, &result[0]);
}
else if(INPUT_BPP == 15)
{
std::vector<unsigned char> result = NVConvert24To15Frame(logodata);
GetAVIptr().Video(width,height,fps_scaled, &result[0]);
}
else if(INPUT_BPP == 12)
{
std::vector<unsigned char> result = NVConvert24To_I420Frame(logodata);
GetAVIptr().Video(width,height,fps_scaled, &result[0]);
}
else if(INPUT_BPP == 17)
{
std::vector<unsigned char> result = NVConvert24To_YUY2Frame(logodata);
GetAVIptr().Video(width,height,fps_scaled, &result[0]);
}
else
{
GetAVIptr().Video(width,height,fps_scaled, &logodata[0]);
}
}
}
}
static int LogoOverlapSent = 0;
if(LogoOverlapSent < LogoFramesOverlap)
{
/* Send animation frames that mix source and animation? */
std::string fn = GetLogoFileName(LogoOverlapSent + LogoFramesHeader);
/*
fprintf(stderr, "wid=%d(%d), hei=%d(%d),fn=%s\n",
width, LogoInfo::width,
height, LogoInfo::height,
fn.c_str());*/
std::vector<unsigned char> logodata;
if(INPUT_BPP == 16)
{
logodata = NVConvert16To24Frame(data, width*height);
}
else if(INPUT_BPP == 15)
{
logodata = NVConvert15To24Frame(data, width*height);
}
else if(INPUT_BPP == 17)
{
logodata = NVConvert_YUY2To24Frame(data, width*height);
}
else if(INPUT_BPP == 12)
{
logodata = NVConvert_I420To24Frame(data, width*height);
}
else
{
logodata.resize(width*height*3); /* filled with black. */
memcpy(&logodata[0], data, width*height*3);
}
OverlayLogoFrom(fn.c_str(), logodata);
// INPUT_BPP = 24; USE_FOURCC = BGR24; // FIXME TEMPORARY
if(INPUT_BPP == 16)
{
std::vector<unsigned char> result = NVConvert24To16Frame(logodata);
GetAVIptr().Video(width,height,fps_scaled, &result[0]);
}
else if(INPUT_BPP == 15)
{
std::vector<unsigned char> result = NVConvert24To15Frame(logodata);
GetAVIptr().Video(width,height,fps_scaled, &result[0]);
}
else if(INPUT_BPP == 12)
{
std::vector<unsigned char> result = NVConvert24To_I420Frame(logodata);
GetAVIptr().Video(width,height,fps_scaled, &result[0]);
}
else if(INPUT_BPP == 17)
{
std::vector<unsigned char> result = NVConvert24To_YUY2Frame(logodata);
GetAVIptr().Video(width,height,fps_scaled, &result[0]);
}
else
{
GetAVIptr().Video(width,height,fps_scaled, &logodata[0]);
}
++LogoOverlapSent;
return;
}
GetAVIptr().Video(width,height,fps_scaled, (const unsigned char*) data);
}
void NESVideoLoggingAudio
(const void*data,
unsigned rate, unsigned bits, unsigned chans,
unsigned nsamples)
{
if(LoggingEnabled < 2) return;
#ifdef THREAD_SAFETY
struct ScopedLock
{ ScopedLock() {
pthread_mutex_lock(&APIlock);
//fprintf(stderr, "audio start\n"); fflush(stderr);
}
~ScopedLock() {
//fprintf(stderr, "audio end\n"); fflush(stderr);
pthread_mutex_unlock(&APIlock); }
} ScopedLock;
#endif
static bool LogoHeaderPartSent = false;
if(!LogoHeaderPartSent && LOGO_LENGTH_HEADER > 0)
{
LogoHeaderPartSent=true;
double HdrLength = LOGO_LENGTH_HEADER; // N64 workaround
const long n = (long)(rate * HdrLength)/*
- (rate * 0.11)*/;
if(n > 0) {
unsigned bytes = n*chans*(bits/8);
unsigned char* buf = (unsigned char*)malloc(bytes);
if(buf)
{
memset(buf,0,bytes);
GetAVIptr().Audio(rate,bits,chans, buf, n);
free(buf);
} }
}
/*
fprintf(stderr, "Writing %u samples (%u bits, %u chans, %u rate)\n",
nsamples, bits, chans, rate);*/
/*
static FILE*fp = fopen("audiodump.wav", "wb");
fwrite(data, 1, nsamples*(bits/8)*chans, fp);
fflush(fp);*/
GetAVIptr().Audio(rate,bits,chans, (const unsigned char*) data, nsamples);
}
} /* extern "C" */
quantize.hh
This include file provides functions for a single R/G/B value from some bit depth to a lesser bit depth, such as RGB32 to RGB15, with
ordered dithering.
/*
Ordered dithering methods provided for:
8x8 (Quantize8x8)
4x4 (Quantize4x4)
3x3 (Quantize3x3)
4x2 (Quantize4x2)
3x2 (Quantize3x2)
2x2 (Quantize2x2)
The functions are:
template<int m, int in_max>
int QuantizeFunc(size_t quant_pos, double value)
- Quantizes value, assumed to be in range 0..in_max, to range 0..m
- quant_pos tells the coordinate into the dithering matrix
template<int m, int in_max>
int QuantizeFunc(size_t quant_pos, unsigned value)
- Quantizes value, assumed to be in range 0..in_max, to range 0..m
- quant_pos tells the coordinate into the dithering matrix
Copyright (C) 1992,2008 Joel Yliluoma (http://iki.fi/bisqwit/)
*/
#define OrderedDitherDecl(n) \
static const double flts[n]; \
static const int ints[n]; \
enum { mul = n+1, \
maxin = in_max, \
even = !(maxin % mul), \
intmul = even ? 1 : mul };
/* macroes for initializing dither tables */
#define d(n) (n)/double(mul) - 0.5
#define i(n) even ? (n*in_max/mul - (int)in_max/2) \
: (n*in_max - (int)mul*in_max/2)
template<int m, int in_max = 255>
struct QuantizeNoDither
{
int res;
template<typename IntType>
QuantizeNoDither(IntType v) : res(v * m / in_max) { }
operator int() const { return res; }
};
template<int m, typename Base>
struct QuantizeFuncBase: private Base
{
int res;
QuantizeFuncBase(size_t quant_pos, double v) : res(0)
{
if(v > 0.0)
{
const double dither_threshold = Base::flts[quant_pos];
res = (int)(v * (m / double(Base::maxin)) + dither_threshold);
if(res > m) res = m;
}
}
QuantizeFuncBase(size_t quant_pos, unsigned char v) : res(v)
{
if(m == Base::maxin) return;
if(m < Base::maxin)
{
// With dithering
const int dither_threshold = Base::ints[quant_pos];
const int intmul = Base::intmul;
res = (res * (m * intmul) + dither_threshold) / (Base::maxin * intmul);
}
else
{
// Without dithering
res = QuantizeNoDither<m, Base::maxin> (res);
}
}
};
#define QuantizeFuncDecl(name, base) \
template<int m, int in_max=255> \
struct name: private QuantizeFuncBase<m, base<in_max> > \
{ \
typedef QuantizeFuncBase<m, base<in_max> > Base; \
template<typename A, typename B> name(A a, B b) : Base(a, b) { } \
operator int() const { return Base::res; } \
}
/******* Quantizing with 8x8 ordered dithering ********/
template<int in_max> struct OrderedDither_8x8 { OrderedDitherDecl(8*8) };
template<int in_max>
const double OrderedDither_8x8<in_max>::flts[] /* A table for 8x8 ordered dithering */
= { d(1 ), d(49), d(13), d(61), d( 4), d(52), d(16), d(64),
d(33), d(17), d(45), d(29), d(36), d(20), d(48), d(32),
d(9 ), d(57), d( 5), d(53), d(12), d(60), d( 8), d(56),
d(41), d(25), d(37), d(21), d(44), d(28), d(40), d(24),
d(3 ), d(51), d(15), d(63), d( 2), d(50), d(14), d(62),
d(35), d(19), d(47), d(31), d(34), d(18), d(46), d(30),
d(11), d(59), d( 7), d(55), d(10), d(58), d( 6), d(54),
d(43), d(27), d(39), d(23), d(42), d(26), d(38), d(22) };
template<int in_max>
const int OrderedDither_8x8<in_max>::ints[]
= { i(1 ), i(49), i(13), i(61), i( 4), i(52), i(16), i(64),
i(33), i(17), i(45), i(29), i(36), i(20), i(48), i(32),
i(9 ), i(57), i( 5), i(53), i(12), i(60), i( 8), i(56),
i(41), i(25), i(37), i(21), i(44), i(28), i(40), i(24),
i(3 ), i(51), i(15), i(63), i( 2), i(50), i(14), i(62),
i(35), i(19), i(47), i(31), i(34), i(18), i(46), i(30),
i(11), i(59), i( 7), i(55), i(10), i(58), i( 6), i(54),
i(43), i(27), i(39), i(23), i(42), i(26), i(38), i(22) };
QuantizeFuncDecl(Quantize8x8, OrderedDither_8x8);
/******* Quantizing with 4x4 ordered dithering ********/
template<int in_max> struct OrderedDither_4x4 { OrderedDitherDecl(4*4) };
template<int in_max>
const double OrderedDither_4x4<in_max>::flts[] /* A table for 4x4 ordered dithering */
= { d( 1), d( 9), d( 3), d(11),
d(13), d( 5), d(15), d( 7),
d( 4), d(12), d( 2), d(10),
d(16), d( 8), d(14), d( 6) };
template<int in_max>
const int OrderedDither_4x4<in_max>::ints[]
= { i( 1), i( 9), i( 3), i(11),
i(13), i( 5), i(15), i( 7),
i( 4), i(12), i( 2), i(10),
i(16), i( 8), i(14), i( 6) };
QuantizeFuncDecl(Quantize4x4, OrderedDither_4x4);
/******* Quantizing with 3x3 ordered dithering ********/
template<int in_max> struct OrderedDither_3x3 { OrderedDitherDecl(3*3) };
template<int in_max>
const double OrderedDither_3x3<in_max>::flts[] /* A table for 3x3 ordered dithering */
= { d(1), d(7), d(3),
d(6), d(4), d(9),
d(8), d(2), d(5) };
template<int in_max>
const int OrderedDither_3x3<in_max>::ints[]
= { i(1), i(7), i(3),
i(6), i(4), i(9),
i(8), i(2), i(5) };
QuantizeFuncDecl(Quantize3x3, OrderedDither_3x3);
/******* Quantizing with 4x2 ordered dithering ********/
template<int in_max> struct OrderedDither_4x2 { OrderedDitherDecl(4*2) };
template<int in_max>
const double OrderedDither_4x2<in_max>::flts[] /* A table for 4x2 ordered dithering */
= { d(1), d(5), d(2), d(6),
d(7), d(3), d(8), d(4) };
template<int in_max>
const int OrderedDither_4x2<in_max>::ints[]
= { i(1), i(5), i(2), i(6),
i(7), i(3), i(8), i(4) };
QuantizeFuncDecl(Quantize4x2, OrderedDither_4x2);
/******* Quantizing with 3x2 ordered dithering ********/
template<int in_max> struct OrderedDither_3x2 { OrderedDitherDecl(3*2) };
template<int in_max>
const double OrderedDither_3x2<in_max>::flts[] /* A table for 3x2 ordered dithering */
= { d(1), d(5), d(3),
d(4), d(2), d(6) };
template<int in_max>
const int OrderedDither_3x2<in_max>::ints[]
= { i(1), i(5), i(3),
i(4), i(2), i(6) };
QuantizeFuncDecl(Quantize3x2, OrderedDither_3x2);
/******* Quantizing with 2x2 ordered dithering ********/
template<int in_max> struct OrderedDither_2x2 { OrderedDitherDecl(2*2) };
template<int in_max>
const double OrderedDither_2x2<in_max>::flts[] /* A table for 2x2 ordered dithering */
= { d(1), d(4),
d(3), d(2) };
template<int in_max>
const int OrderedDither_2x2<in_max>::ints[]
= { i(1), i(4),
i(3), i(2) };
QuantizeFuncDecl(Quantize2x2, OrderedDither_2x2);
#undef OrderedDitherDecl
#undef QuantizeFuncDecl
#undef i
#undef d
simd.hh
#if defined(__MMX__) && !defined(__x86_64)
#define USE_MMX
#endif
#if defined(__SSE__)
#define USE_SSE
#endif
/* SIMD interface (MMX) written by Bisqwit
* Copyright (C) 1992,2008 Joel Yliluoma (http://iki.fi/bisqwit/)
*/
#ifdef __3dNOW__
# include <mm3dnow.h> /* Note: not available on ICC */
#elif defined(__MMX__)
# include <mmintrin.h>
#endif
#ifdef __SSE__
#include <xmmintrin.h>
#ifdef __ICC
typedef __m128 __v4sf;
#endif
#endif
struct c64_common
{
static signed char clamp_s8(int_fast64_t v)
{ return v<-128 ? -128 : (v > 127 ? 127 : v); }
static unsigned char clamp_u8(int_fast64_t v)
{ return v<0 ? 0 : (v > 255 ? 255 : v); }
static short clamp_s16(int_fast64_t v)
{ return v<-32768 ? -32768 : (v > 32767 ? 32767 : v); }
static inline uint_fast64_t expand32_8(uint_fast32_t a)
{
// 0000abcd -> 0a0b0c0d
typedef uint_fast64_t v;
return (a&0xFFU)
| ((a&0xFF00U)<<8) // base: 8+8 = 16
| ((v)(a&0xFF0000U)<<16) // base: 16+16 = 32
| ((v)(a&0xFF000000UL)<<24); // base: 24+24 = 48
}
static inline uint_fast64_t expand32_16(uint_fast32_t a)
{
// 0000abcd -> 00ab00cd
typedef uint_fast64_t v;
return (a&0xFFFFU)
| ((v)(a&0xFFFF0000UL)<<16); // base: 16+16 = 32
}
};
#ifdef __MMX__
/* 64-bit integers that use MMX / 3Dnow operations where relevant */
struct c64_MMX: public c64_common
{
typedef c64_MMX c64;
__m64 value;
inline c64_MMX() { }
inline c64_MMX(__m64 v) : value(v) { }
inline c64_MMX(const uint64_t& v) : value( *(const __m64*)& v) { }
inline c64 operator<< (int b) const { if(b < 0) return *this >> -b; return shl64(b); }
inline c64 operator>> (int b) const { if(b < 0) return *this << -b; return shr64(b); }
c64& operator<<= (int n) { return *this = shl64(n); }
c64& operator>>= (int n) { return *this = shr64(n); }
c64 conv_s16_u8() const { return conv_s16_u8(*this); }
c64 conv_s16_s8() const { return conv_s16_s8(*this); }
void Get(const unsigned char* p) { value = *(const __m64*)p; }
void Put( unsigned char* p)const { *(__m64*)p = value; }
void Init16(short a,short b,short c, short d)
{ value = _mm_setr_pi16(a,b,c,d); }
void Init16(short a)
{ value = _mm_set1_pi16(a); }
void GetD(const unsigned char* p) { value = *(const __m64*)p; }
template<int n>
short Extract16() const { return ((const short*)&value)[n]; }
template<int n>
int Extract32() const { return ((const int*)&value)[n]; }
short Extract88_from_1616lo() const
{
const unsigned char* data = (const unsigned char*)&value;
// bytes: 76543210
// shorts: 33221100
// take: H L
return data[0] | *(short*)(data+1);
//return data[0] | ((*(const unsigned int*)data) >> 8);
}
short Extract88_from_1616hi() const
{
const unsigned char* data = 4+(const unsigned char*)&value;
// bytes: 76543210
// shorts: 33221100
// take: H L
return data[0] | *(short*)(data+1);
//return data[0] | ((*(const unsigned int*)data) >> 8);
}
c64& operator&= (const c64& b) { value=_mm_and_si64(value,b.value); return *this; }
c64& operator|= (const c64& b) { value=_mm_or_si64(value,b.value); return *this; }
c64& operator^= (const c64& b) { value=_mm_xor_si64(value,b.value); return *this; }
c64& operator+= (const c64& b) { return *this = *this + b; }
c64& operator-= (const c64& b) { return *this = *this - b; }
/* psllqi: p = packed
s = shift
r = right, l = left
l = shift in zero, a = shift in sign bit
q = 64-bit, d = 32-bit, w = 16-bit
[i = immed amount]
*/
c64 operator& (const c64& b) const { return c64(_mm_and_si64(value,b.value)); }
c64 operator| (const c64& b) const { return c64(_mm_or_si64(value,b.value)); }
c64 operator^ (const c64& b) const { return c64(_mm_xor_si64(value,b.value)); }
c64 operator- (const c64& b) const
{
#ifdef __SSE2__
return _mm_sub_si64(value, b.value);
#else
return (const uint64_t&)value - (const uint64_t&)b.value;
#endif
}
c64 operator+ (const c64& b) const
{
#ifdef __SSE2__
return _mm_add_si64(value, b.value);
#else
return (const uint64_t&)value + (const uint64_t&)b.value;
#endif
}
c64 shl64(int b) const { return _mm_slli_si64(value, b); }
c64 shr64(int b) const { return _mm_srli_si64(value, b); }
c64 shl16(int b) const { return _mm_slli_pi16(value, b); }
c64 shr16(int b) const { return _mm_srli_pi16(value, b); }
c64 sar32(int b) const { return _mm_srai_pi32(value, b); }
c64 sar16(int b) const { return _mm_srai_pi16(value, b); }
c64 add32(const c64& b) const { return _mm_add_pi32(value, b.value); }
c64 add16(const c64& b) const { return _mm_add_pi16(value, b.value); }
c64 sub32(const c64& b) const { return _mm_sub_pi32(value, b.value); }
c64 sub16(const c64& b) const { return _mm_sub_pi16(value, b.value); }
c64 mul16(const c64& b) const { return _mm_mullo_pi16(value, b.value); }
c64 mul16hi(const c64& b) const { return _mm_mulhi_pi16(value, b.value); }
//c64 mul32(const c64& b) const { return _mm_mullo_pi32(value, b.value); }
c64 add8(const c64& b) const { return _mm_add_pi8(value, b.value); }
c64 sub8(const c64& b) const { return _mm_sub_pi8(value, b.value); }
c64 unpacklbw(const c64& b) const { return _mm_unpacklo_pi8(b.value,value); }
c64 unpacklwd(const c64& b) const { return _mm_unpacklo_pi16(b.value,value); }
c64 unpackhbw(const c64& b) const { return _mm_unpackhi_pi8(b.value,value); }
c64 unpackhwd(const c64& b) const { return _mm_unpackhi_pi16(b.value,value); }
c64 unpackldq(const c64& b) const { return _mm_unpacklo_pi32(b.value,value); }
c64 unpackldq() const { return _mm_unpacklo_pi32(value,value); }
c64 operator& (const uint64_t& v) { return c64(_mm_and_si64(value, *(const __m64*)& v)); }
c64 conv_s32_s16(const c64& b) const { return _mm_packs_pi32(value, b.value); }
c64 conv_s16_u8(const c64& b) const { return _mm_packs_pu16(value, b.value); }
c64 conv_s16_s8(const c64& b) const { return _mm_packs_pi16(value, b.value); }
};
#endif
struct c64_nonMMX: public c64_common
{
typedef c64_nonMMX c64;
uint_least64_t value;
inline c64_nonMMX() { }
inline c64_nonMMX(uint64_t v) : value(v) { }
c64 operator<< (int b) const { if(b < 0) return *this >> -b; return shl64(b); }
c64 operator>> (int b) const { if(b < 0) return *this << -b; return shr64(b); }
c64& operator<<= (int n) { return *this = shl64(n); }
c64& operator>>= (int n) { return *this = shr64(n); }
c64 conv_s16_u8() const { return conv_s16_u8(*this); }
c64 conv_s16_s8() const { return conv_s16_s8(*this); }
void Init16(short a,short b,short c, short d)
{ uint_fast64_t aa = (unsigned short)a,
bb = (unsigned short)b,
cc = (unsigned short)c,
dd = (unsigned short)d;
value = aa | (bb << 16) | (cc << 32) | (dd << 48); }
void Init16(short a)
{ Init16(a,a,a,a); }
void Init8(unsigned char a,unsigned char b,unsigned char c,unsigned char d,
unsigned char e,unsigned char f,unsigned char g,unsigned char h)
{
value = ((uint_fast64_t)(a | (b << 8) | (c << 16) | (d << 24)))
| (((uint_fast64_t)e) << 32)
| (((uint_fast64_t)f) << 40)
| (((uint_fast64_t)g) << 48)
| (((uint_fast64_t)h) << 56);
}
void Get(const unsigned char* p) { value = *(const uint_least64_t*)p; }
void Put( unsigned char* p)const { *(uint_least64_t*)p = value; }
c64& operator&= (const c64& b) { value&=b.value; return *this; }
c64& operator|= (const c64& b) { value|=b.value; return *this; }
c64& operator^= (const c64& b) { value^=b.value; return *this; }
c64& operator+= (const c64& b) { value+=b.value; return *this; }
c64& operator-= (const c64& b) { value-=b.value; return *this; }
c64 operator& (const c64& b) const { return value & b.value; }
c64 operator| (const c64& b) const { return value | b.value; }
c64 operator^ (const c64& b) const { return value ^ b.value; }
c64 operator- (const c64& b) const { return value - b.value; }
c64 operator+ (const c64& b) const { return value + b.value; }
c64 operator& (uint_fast64_t b) const { return value & b; }
#define usimdsim(type, count, op) \
type* p = (type*)&res.value; \
for(int n=0; n<count; ++n) p[n] = (p[n] op b)
#define simdsim(type, count, op) \
type* p = (type*)&res.value; \
const type* o = (const type*)&b.value; \
for(int n=0; n<count; ++n) p[n] = (p[n] op o[n])
c64 shl64(int b) const { return value << b; }
c64 shr64(int b) const { return value >> b; }
c64 shl16(int b) const { c64 res = *this; usimdsim(short, 2, <<); return res; }
c64 shr16(int b) const { c64 res = *this; usimdsim(unsigned short, 2, >>); return res; }
c64 sar32(int b) const { c64 res = *this; usimdsim(int, 2, >>); return res; }
c64 sar16(int b) const { c64 res = *this; usimdsim(short, 2, >>); return res; }
c64 add16(const c64& b) const { c64 res = *this; simdsim(short, 4, +); return res; }
c64 sub16(const c64& b) const { c64 res = *this; simdsim(short, 4, -); return res; }
c64 add32(const c64& b) const { c64 res = *this; simdsim(int, 2, +); return res; }
c64 sub32(const c64& b) const { c64 res = *this; simdsim(int, 2, -); return res; }
c64 mul16(const c64& b) const { c64 res = *this; simdsim(short, 4, *); return res; }
c64 mul16hi(const c64& b) const { c64 res = *this; simdsim(short, 4, *) >> 16; return res; }
c64 add8(const c64& b) const { c64 res = *this; simdsim(unsigned char, 8, +); return res; }
c64 sub8(const c64& b) const { c64 res = *this; simdsim(unsigned char, 8, -); return res; }
#undef simdsim
#undef usimdsim
c64 conv_s32_s16(const c64& b) const
{
c64 res; res.
Init16(clamp_s16(value & 0xFFFFFFFFU),
clamp_s16(value >> 32),
clamp_s16(b.value & 0xFFFFFFFFU),
clamp_s16(b.value >> 32));
return res;
}
c64 conv_s16_u8(const c64& b) const
{
c64 res; res.
Init8(clamp_u8(value & 0xFFFF),
clamp_u8((value >> 16) & 0xFFFF),
clamp_u8((value >> 32) & 0xFFFF),
clamp_u8((value >> 48) & 0xFFFF),
clamp_u8(b.value & 0xFFFF),
clamp_u8((b.value >> 16) & 0xFFFF),
clamp_u8((b.value >> 32) & 0xFFFF),
clamp_u8((b.value >> 48) & 0xFFFF));
return res;
}
c64 conv_s16_s8(const c64& b) const
{
c64 res; res.
Init8(clamp_s8(value & 0xFFFF),
clamp_s8((value >> 16) & 0xFFFF),
clamp_s8((value >> 32) & 0xFFFF),
clamp_s8((value >> 48) & 0xFFFF),
clamp_s8(b.value & 0xFFFF),
clamp_s8((b.value >> 16) & 0xFFFF),
clamp_s8((b.value >> 32) & 0xFFFF),
clamp_s8((b.value >> 48) & 0xFFFF));
return res;
}
/* TODO: Verify that these are correct (though they should never be used anyway) */
c64 unpacklbw(const c64& p) const
{
#if defined(__MMX__) && !defined(__ICC)
/* ICC says [error: type of cast must be integral or enum]
* on the return value cast,
* so we cannot use this code on ICC. Fine for GCC. */
return (uint_least64_t)_m_punpcklbw(*(__m64*)&p.value, *(__m64*)&value);
#else
uint_fast64_t a=value, b=p.value;
return expand32_8(a) | (expand32_8(b) << 8);
#endif
}
c64 unpackhbw(const c64& p) const
{
#if defined(__MMX__) && !defined(__ICC)
return (uint_least64_t)_m_punpckhbw(*(__m64*)&p.value, *(__m64*)&value);
#else
uint_fast64_t a=value, b=p.value;
return expand32_8(a>>32) | (expand32_8(b>>32) << 8);
#endif
}
c64 unpacklwd(const c64& p) const
{
#if defined(__MMX__) && !defined(__ICC)
return (uint_least64_t)_m_punpcklwd(*(__m64*)&p.value, *(__m64*)&value);
#else
uint_fast64_t a=value, b=p.value;
return expand32_16(a) | (expand32_16(b) << 16);
#endif
}
c64 unpackhwd(const c64& p) const
{
#if defined(__MMX__) && !defined(__ICC)
return (uint_least64_t)_m_punpckhwd(*(__m64*)&p.value, *(__m64*)&value);
#else
uint_fast64_t a=value, b=p.value;
return expand32_16(a>>32) | (expand32_16(b>>32) << 16);
#endif
}
c64 unpackldq() const { return unpackldq(*this); }
c64 unpackldq(const c64& p) const
{
#if defined(__MMX__) && !defined(__ICC)
return (uint_least64_t)_m_punpckldq(*(__m64*)&p.value, *(__m64*)&value);
#else
return value | (p.value << 32);
#endif
}
};
#ifdef USE_MMX
typedef c64_MMX c64;
#else
typedef c64_nonMMX c64;
#endif
static inline void MMX_clear()
{
#ifdef __3dNOW__
_m_femms(); /* Note: not available on ICC */
#elif defined(__MMX__)
_mm_empty();
#endif
}
rgbtorgb.hh
This include file provides functions for converting images of a certain bit depth to another bit depth. The functions use MMX optimizations where possible.
Note: If your compiler does not support
attribute((noinline)), you can remove that expression with no ill effects. It is there simply for my debugging.
#ifdef __cplusplus
extern "C" {
#define defaulttrue =true
#else
#define defaulttrue
#define bool int
#endif
/* RGB to RGB and RGB from/to YCbRr (YUV) conversions written by Bisqwit
* Copyright (C) 1992,2008 Joel Yliluoma (http://iki.fi/bisqwit/)
*
* Concepts:
* 15 = RGB15 or BGR15
* 16 = RGB16 or BGR16
* 24 = RGB24 or BGR24
* 32 = RGB32 or BGR32
* I420 = YCbCr where Y is issued for each pixel,
* followed by Cr for 2x2 pixels,
* followed by Cb for 2x2 pixels
* YUY2 = YCbCr where for each pixel, Y is issued,
* followed by Cr for 2x1 pixels (if even pixel)
* or Cb for 2x1 pixels (if odd pixel)
*
* Note: Not all functions honor the swap_red_blue setting.
*/
void Convert32To24Frame(const void* data, unsigned char* dest, unsigned npixels)
__attribute__((noinline));
void Convert15To24Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue defaulttrue)
__attribute__((noinline));
void Convert16To24Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue defaulttrue)
__attribute__((noinline));
void Convert15To32Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue defaulttrue)
__attribute__((noinline));
void Convert16To32Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue defaulttrue)
__attribute__((noinline));
void Convert24To16Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
void Convert24To15Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
void Convert_I420To24Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width, bool swap_red_blue defaulttrue)
__attribute__((noinline));
void Convert15To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
void Convert16To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
void Convert24To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
void Convert32To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
void Convert_YUY2To24Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width, bool swap_red_blue defaulttrue)
__attribute__((noinline));
void Convert15To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
void Convert16To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
void Convert24To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
void Convert32To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width);
#ifdef __cplusplus
}
#undef defaulttrue
#else
#undef defaulttrue
#undef bool
#endif
rgbtorgb.cc
This is the source code of the functionality behind rgbtorgb.hh.
#include <stdint.h>
#include <stdlib.h> // for size_t
#include <vector>
#include <cmath>
/* RGB to RGB and RGB from/to I420 conversions written by Bisqwit
* Copyright (C) 1992,2008 Joel Yliluoma (http://iki.fi/bisqwit/)
*/
typedef uint_least64_t uint64_t;
#include "quantize.hh"
#include "rgbtorgb.hh"
#include "simd.hh"
/* For BPP conversions */
static const uint64_t zero64 __attribute__((aligned(8))) = 0ULL;
static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
static const uint64_t mask24hh __attribute__((aligned(8))) = 0xffff000000000000ULL;
static const uint64_t mask24hhh __attribute__((aligned(8))) = 0xffffffff00000000ULL;
static const uint64_t mask24hhhh __attribute__((aligned(8))) = 0xffffffffffff0000ULL;
static const uint64_t mask64h __attribute__((aligned(8))) = 0xFF00FF00FF00FF00ULL;
static const uint64_t mask64l __attribute__((aligned(8))) = 0x00FF00FF00FF00FFULL;
static const uint64_t mask64hw __attribute__((aligned(8))) = 0xFFFF0000FFFF0000ULL;
static const uint64_t mask64lw __attribute__((aligned(8))) = 0x0000FFFF0000FFFFULL;
static const uint64_t mask64hd __attribute__((aligned(8))) = 0xFFFFFFFF00000000ULL;
static const uint64_t mask64ld __attribute__((aligned(8))) = 0x00000000FFFFFFFFULL;
/* For RGB2YUV: */
static const int RGB2YUV_SHIFT = 15; /* highest value where [RGB][YUV] fit in signed short */
static const int RY = 8414; // ((int)(( 65.738/256.0)*(1<<RGB2YUV_SHIFT)+0.5));
static const int RV = 14392; // ((int)((112.439/256.0)*(1<<RGB2YUV_SHIFT)+0.5));
static const int RU = -4856; // ((int)((-37.945/256.0)*(1<<RGB2YUV_SHIFT)+0.5));
static const int GY = 16519; // ((int)((129.057/256.0)*(1<<RGB2YUV_SHIFT)+0.5));
static const int GV = -12051;// ((int)((-94.154/256.0)*(1<<RGB2YUV_SHIFT)+0.5));
static const int GU = -9534; // ((int)((-74.494/256.0)*(1<<RGB2YUV_SHIFT)+0.5));
static const int BY = 3208; // ((int)(( 25.064/256.0)*(1<<RGB2YUV_SHIFT)+0.5));
static const int BV = -2339; // ((int)((-18.285/256.0)*(1<<RGB2YUV_SHIFT)+0.5));
static const int BU = 14392; // ((int)((112.439/256.0)*(1<<RGB2YUV_SHIFT)+0.5));
static const int Y_ADD = 16;
static const int U_ADD = 128;
static const int V_ADD = 128;
/* For YUV2RGB: */
static const int YUV2RGB_SHIFT = 13; /* highest value where UB still fits in signed short */
static const int Y_REV = 9539; // ((int)( ( 255 / 219.0 ) * (1<<YUV2RGB_SHIFT)+0.5));
static const int VR = 14688; // ((int)( ( 117504 / 65536.0 ) * (1<<YUV2RGB_SHIFT)+0.5));
static const int VG = -6659; // ((int)( ( -53279 / 65536.0 ) * (1<<YUV2RGB_SHIFT)+0.5));
static const int UG = -3208; // ((int)( ( -25675 / 65536.0 ) * (1<<YUV2RGB_SHIFT)+0.5));
static const int UB = 16525; // ((int)( ( 132201 / 65536.0 ) * (1<<YUV2RGB_SHIFT)+0.5));
/****************/
template<typename c64>
static inline void Convert32To24_32bytes(c64 w0, c64 w1, c64 w2, c64 w3, unsigned char* dest)
{
c64 r0 = (w0 & mask24l) | ((w0 >> 8) & mask24h); /* bbbaaa */
c64 r1 = (w1 & mask24l) | ((w1 >> 8) & mask24h); /* dddccc */
c64 r2 = (w2 & mask24l) | ((w2 >> 8) & mask24h); /* fffeee */
c64 r3 = (w3 & mask24l) | ((w3 >> 8) & mask24h); /* hhhggg */
/* ccbbbaaa */
((r0 ) | ((r1 << 48) & mask24hh)).Put(dest+0);
/* feeedddc */
((r1 >> 16) | ((r2 << 32) & mask24hhh)).Put(dest+8);
/* hhhgggff */
((r2 >> 32) | ((r3 << 16) & mask24hhhh)).Put(dest+16);
}
#if defined(__x86_64) || defined(USE_MMX)
static void Convert32To24_32bytes(const unsigned char* src,
unsigned char* dest)
{
c64 w0; w0.Get(src+0);
c64 w1; w1.Get(src+8);
c64 w2; w2.Get(src+16);
c64 w3; w3.Get(src+24);
Convert32To24_32bytes(w0,w1,w2,w3, dest);
}
#endif
void Convert32To24Frame(const void* data, unsigned char* dest, unsigned npixels)
{
const unsigned char* src = (const unsigned char*)data;
#if defined(__x86_64) || defined(USE_MMX)
while(npixels >= 8)
{
Convert32To24_32bytes(src, dest);
src += 4*8;
dest += 3*8;
npixels -= 8;
}
#ifdef USE_MMX
MMX_clear();
#endif
#endif
for(unsigned pos=0; pos<npixels; ++pos)
{
dest[3*pos+0] = src[4*pos+0];
dest[3*pos+1] = src[4*pos+1];
dest[3*pos+2] = src[4*pos+2];
}
}
static void Unbuild16(unsigned char* target, unsigned rgb16)
{
unsigned B = (rgb16%32)*256/32;
unsigned G = ((rgb16/32)%64)*256/64;
unsigned R = ((rgb16/(32*64))%32)*256/32;
target[0] = R;
target[1] = G;
target[2] = B;
}
static void Unbuild15(unsigned char* target, unsigned rgb16)
{
unsigned B = (rgb16%32)*256/32;
unsigned G = ((rgb16/32)%32)*256/32;
unsigned R = ((rgb16/(32*32))%32)*256/32;
target[0] = R;
target[1] = G;
target[2] = B;
}
template<int basevalue_lo, int basevalue_hi>
struct Bits16const
{
static const uint64_t value;
};
template<int basevalue_lo, int basevalue_hi>
const uint64_t Bits16const<basevalue_lo, basevalue_hi>::value =
(( ((uint64_t)(unsigned short) basevalue_lo) << 0)
| ( ((uint64_t)(unsigned short) basevalue_hi) << 16)
| ( ((uint64_t)(unsigned short) basevalue_lo) << 32)
| ( ((uint64_t)(unsigned short) basevalue_hi) << 48));
template<int basevalue_lo, int basevalue_hi>
struct Bits32const
{
static const uint64_t value;
};
template<int basevalue_lo, int basevalue_hi>
const uint64_t Bits32const<basevalue_lo, basevalue_hi>::value =
(( ((uint64_t)(unsigned int) basevalue_lo) << 0)
| ( ((uint64_t)(unsigned int) basevalue_hi) << 32));
template<uint64_t basevalue_lo, uint64_t basevalue_hi>
struct Bits8const
{
static const uint64_t value =
((basevalue_lo << 0)
| (basevalue_hi << 8)
| (basevalue_lo << 16)
| (basevalue_hi << 24)
| (basevalue_lo << 32)
| (basevalue_hi << 40)
| (basevalue_lo << 48)
| (basevalue_hi << 56));
};
template<int lowbitcount, int highbitcount, int leftshift>
struct MaskBconst
{
static const uint64_t basevalue_lo = (1 << lowbitcount) - 1;
static const uint64_t basevalue_hi = (1 << highbitcount) - 1;
static const uint64_t value = Bits8const<basevalue_lo,basevalue_hi>::value << leftshift;
};
template<int bits>
struct Convert_2byte_consts
{
static const uint64_t mask_lo;// = MaskBconst<bits,0, 0>::value;
static const uint64_t mask_hi;// = MaskBconst<bits,0, 8>::value;
static const uint64_t mask_frac;// = MaskBconst<8-bits,8-bits, 0>::value;
};
template<int bits>
const uint64_t Convert_2byte_consts<bits>::mask_lo = MaskBconst<bits, 0, 0>::value;
template<int bits>
const uint64_t Convert_2byte_consts<bits>::mask_hi = MaskBconst<bits, 0, 8>::value;
template<int bits>
const uint64_t Convert_2byte_consts<bits>::mask_frac = MaskBconst<8-bits, 8-bits, 0>::value;
template<int offs, int bits>
struct Convert_2byte_helper
{
c64 lo, hi;
Convert_2byte_helper(c64 p4a, c64 p4b)
{
const uint64_t& mask_lo = Convert_2byte_consts<bits>::mask_lo;
const uint64_t& mask_hi = Convert_2byte_consts<bits>::mask_hi;
const uint64_t& mask_frac = Convert_2byte_consts<bits>::mask_frac;
/* STEP 1: SEPARATE THE PIXELS INTO RED, GREEN AND BLUE COMPONENTS */
/* 000BBBBB 000bbbbb 000BBBBB 000bbbbb 000BBBBB 000bbbbb 000BBBBB 000bbbbb */
c64 s5 = ((p4a >> offs) & mask_lo) | ((p4b << (8-offs)) & mask_hi);
/* STEP 2: SCALE THE COLOR COMPONENTS TO 256 RANGE */
/* BBBBB000 bbbbb000 BBBBB000 bbbbb000 BBBBB000 bbbbb000 BBBBB000 bbbbb000 */
/* 00000BBB 00000bbb 00000BBB 00000bbb 00000BBB 00000bbb 00000BBB 00000bbb */
c64 v8 = (s5 << (8-bits)) | ((s5 >> (bits-(8-bits))) & mask_frac);
/* v8:
*
* BBBBBBBB bbbbbbbb BBBBBBBB bbbbbbbb BBBBBBBB bbbbbbbb BBBBBBBB bbbbbbbb *
*/
/* STEP 3: DEINTERLACE THE PIXELS */
lo = (v8 ) & mask64l;
hi = (v8 >> 8) & mask64l;
}
};
/*
template<int roffs,int rbits, int goffs,int gbits, int boffs,int bbits>
static void Convert_2byte_to_24Common(const unsigned char* src, unsigned char* dest)
__attribute((noinline));
*/
template<int roffs,int rbits, int goffs,int gbits, int boffs,int bbits, bool rgb24>
static void Convert_2byte_to_24or32Common(const unsigned char* src, unsigned char* dest)
{
c64 p4a; p4a.Get(src+0); // four pixels
c64 p4b; p4b.Get(src+8); // another four pixels
/* in: In both registers: */
Convert_2byte_helper<roffs,rbits> r(p4a,p4b);
Convert_2byte_helper<boffs,bbits> b(p4a,p4b);
Convert_2byte_helper<goffs,gbits> g(p4a,p4b);
/* STEP 4: CONVERT PIXELS INTO RGB32 */
/* Now we have:
* b.lo = 0j0g0d0a
* g.lo = 0k0h0e0b
* r.lo = 0l0i0f0c
* b.hi = 0J0G0D0A
* g.hi = 0K0H0E0B
* r.hi = 0L0I0F0C
* We want:
* w1 = 0fed0cba
* w2 = 0lkj0ihg
* w3 = 0FED0CBA
* w4 = 0LKJ0IHG
*/
#if 0 && defined(__MMX__) /* FIXME why is this 0&&? */
// punpcklbw 0k0h0e0b, 0j0g0d0a -> 00ed00ba
// punpcklwd 0l0i0f0c, ________ -> 0f__0c__
c64 w1 = r.lo.unpacklwd(zero64) | g.lo.unpacklbw(b.lo); // pix 0,1
// punpckhbw 0k0h0e0b, 0j0g0d0a -> 00kj00hg
// punpckhwd 0l0i0f0c, ________ -> 0l__0i__
c64 w2 = r.lo.unpackhwd(zero64) | g.lo.unpackhbw(b.lo); // pix 2,3
c64 w3 = r.hi.unpacklwd(zero64) | g.hi.unpacklbw(b.hi); // pix 4,5
c64 w4 = r.hi.unpackhwd(zero64) | g.hi.unpackhbw(b.hi); // pix 6,7
#ifndef USE_MMX
MMX_clear();
#endif
#else
/* With 64-bit registers, this code is greatly simpler than
* the emulation of unpack opcodes. However, when the
* unpack opcodes is available, using them is shorter.
* Which way is faster? FIXME: Find out
*/
// mask64lw: 00**00**
// mask64hw: **00**00
// b.lo & mask64lw: 000g000a
// g.lo & mask64lw: 000h000b
// r.lo & mask64lw: 000i000c
// b.lo & mask64hw: 0j000d00
// g.lo & mask64hw: 0k000e00
// r.lo & mask64hw: 0l000f00
c64 tlo1 = ((b.lo & mask64lw) ) | ((g.lo & mask64lw) << 8) | ((r.lo & mask64lw) << 16);
c64 tlo2 = ((b.lo & mask64hw) >>16) | ((g.lo & mask64hw) >> 8) | ((r.lo & mask64hw) );
c64 thi1 = ((b.hi & mask64lw) ) | ((g.hi & mask64lw) << 8) | ((r.hi & mask64lw) << 16);
c64 thi2 = ((b.hi & mask64hw) >>16) | ((g.hi & mask64hw) >> 8) | ((r.hi & mask64hw) );
/*
* tlo1 = 0ihg0cba
* tlo2 = 0lkj0fed
* thi1 = 0IHG0CBA
* thi2 = 0LKJ0FED
* mask64ld = 0000****
* mask64hd = ****0000
*/
c64 w1 = (tlo1 & mask64ld) | ((tlo2 & mask64ld) << 32); // 00000cba | 00000fed = 0fed0bca
c64 w2 = (tlo2 & mask64hd) | ((tlo1 & mask64hd) >> 32); // 0lkj0000 | 0ihg0000 = 0lkj0ihg
c64 w3 = (thi1 & mask64ld) | ((thi2 & mask64ld) << 32);
c64 w4 = (thi2 & mask64hd) | ((thi1 & mask64hd) >> 32);
#endif
if(rgb24)
{
/* STEP 5A: CONVERT PIXELS INTO RGB24 */
Convert32To24_32bytes(w1,w2,w3,w4, dest);
}
else
{
/* STEP 5B: STORE RGB32 */
w1.Put(dest+0);
w2.Put(dest+8);
w3.Put(dest+16);
w4.Put(dest+24);
}
/*
punpcklbw ____ABCD, ____abcd = AaBbCcDd
punpcklwd ____ABCD, ____abcd = ABabCDcd
punpckldq ____ABCD, ____abcd = ABCDabcd
punpckhbw ABCD____, abcd____ = AaBbCcDd
punpckhwd ABCD____, abcd____ = ABabCDcd
punpckhdq ABCD____, abcd____ = ABCDabcd
*/
}
void Convert15To24Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue)
{
const unsigned char* src = (const unsigned char*)data;
if(swap_red_blue)
for(; npixels >= 8; src += 8*2, dest += 8*3, npixels -= 8)
Convert_2byte_to_24or32Common<0,5, 5,5, 10,5, true> (src, dest);
else
for(; npixels >= 8; src += 8*2, dest += 8*3, npixels -= 8)
Convert_2byte_to_24or32Common<10,5, 5,5, 0,5, true> (src, dest);
#ifdef USE_MMX
MMX_clear();
#endif
for(unsigned a=0; a<npixels; ++a)
{
unsigned short v = ((const unsigned short*)src)[a];
Unbuild15(&dest[a*3], v);
}
}
void Convert16To24Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue)
{
const unsigned char* src = (const unsigned char*)data;
if(swap_red_blue)
for(; npixels >= 8; src += 8*2, dest += 8*3, npixels -= 8)
Convert_2byte_to_24or32Common<0,5, 5,6, 11,5, true> (src, dest);
else
for(; npixels >= 8; src += 8*2, dest += 8*3, npixels -= 8)
Convert_2byte_to_24or32Common<11,5, 5,6, 0,5, true> (src, dest);
#ifdef USE_MMX
MMX_clear();
#endif
for(unsigned a=0; a<npixels; ++a)
{
unsigned short v = ((const unsigned short*)src)[a];
Unbuild16(&dest[a*3], v);
}
}
void Convert15To32Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue)
{
const unsigned char* src = (const unsigned char*)data;
if(swap_red_blue)
for(; npixels >= 8; src += 8*2, dest += 8*4, npixels -= 8)
Convert_2byte_to_24or32Common<0,5, 5,5, 10,5, false> (src, dest);
else
for(; npixels >= 8; src += 8*2, dest += 8*4, npixels -= 8)
Convert_2byte_to_24or32Common<10,5, 5,5, 0,5, false> (src, dest);
#ifdef USE_MMX
MMX_clear();
#endif
for(unsigned a=0; a<npixels; ++a)
{
unsigned short v = ((const unsigned short*)src)[a];
Unbuild15(&dest[a*4], v);
}
}
void Convert16To32Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue)
{
const unsigned char* src = (const unsigned char*)data;
if(swap_red_blue)
for(; npixels >= 8; src += 8*2, dest += 8*4, npixels -= 8)
Convert_2byte_to_24or32Common<0,5, 5,6, 11,5, false> (src, dest);
else
for(; npixels >= 8; src += 8*2, dest += 8*4, npixels -= 8)
Convert_2byte_to_24or32Common<11,5, 5,6, 0,5, false> (src, dest);
#ifdef USE_MMX
MMX_clear();
#endif
for(unsigned a=0; a<npixels; ++a)
{
unsigned short v = ((const unsigned short*)src)[a];
Unbuild16(&dest[a*4], v);
}
}
static inline unsigned Build16(unsigned x,unsigned y, const unsigned char* rgbdata)
{
unsigned o16 = (x + 4*y) % 16;
return (Quantize4x4<31>(o16, rgbdata[2]) << 0)
| (Quantize4x4<63>(o16, rgbdata[1]) << 5)
| (Quantize4x4<31>(o16, rgbdata[0]) << 11);
}
static inline unsigned Build15(unsigned x,unsigned y, const unsigned char* rgbdata)
{
unsigned o16 = (x + 4*y) % 16;
return (Quantize4x4<31>(o16, rgbdata[2]) << 0)
| (Quantize4x4<31>(o16, rgbdata[1]) << 5)
| (Quantize4x4<31>(o16, rgbdata[0]) << 10);
}
void Convert24To16Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
const unsigned char* logodata = (const unsigned char*) data;
unsigned short* result = (unsigned short*) dest;
unsigned x=0,y=0;
for(unsigned pos=0; pos<npixels; ++pos)
{
result[pos] = Build16(x,y, &logodata[pos*3]);
if(++x >= width) { x=0; ++y; }
}
}
void Convert24To15Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
const unsigned char* logodata = (const unsigned char*) data;
unsigned short* result = (unsigned short*) dest;
unsigned x=0,y=0;
for(unsigned pos=0; pos<npixels; ++pos)
{
result[pos] = Build15(x,y, &logodata[pos*3]);
if(++x >= width) { x=0; ++y; }
}
}
#ifdef __MMX__
static inline void Convert_I420_MMX_Common
(c64_MMX p0_1, c64_MMX p2_3,
unsigned char* dest_y0,
unsigned char* dest_y1,
unsigned char* dest_u,
unsigned char* dest_v)
{
c64_MMX p0 = c64_MMX(zero64).unpacklbw(p0_1); // expand to 64-bit (4*16)
c64_MMX p1 = c64_MMX(zero64).unpackhbw(p0_1);
c64_MMX p2 = c64_MMX(zero64).unpacklbw(p2_3);
c64_MMX p3 = c64_MMX(zero64).unpackhbw(p2_3);
c64_MMX ry_gy_by; ry_gy_by.Init16(RY,GY,BY, 0);
c64_MMX rgb_u; rgb_u.Init16(RU,GU,BU, 0);
c64_MMX rgb_v; rgb_v.Init16(RV,GV,BV, 0);
c64_MMX ctotal = p0.add16(
p2.add16(
p1.add16(
p3)));
p0 = _mm_madd_pi16(ry_gy_by.value, p0.value);
p1 = _mm_madd_pi16(ry_gy_by.value, p1.value);
p2 = _mm_madd_pi16(ry_gy_by.value, p2.value);
p3 = _mm_madd_pi16(ry_gy_by.value, p3.value);
c64_MMX yy;
yy.Init16( ((p0.Extract32<0>() + p0.Extract32<1>()) >> (RGB2YUV_SHIFT)),
((p1.Extract32<0>() + p1.Extract32<1>()) >> (RGB2YUV_SHIFT)),
((p2.Extract32<0>() + p2.Extract32<1>()) >> (RGB2YUV_SHIFT)),
((p3.Extract32<0>() + p3.Extract32<1>()) >> (RGB2YUV_SHIFT)) );
yy = yy.add16( Bits16const<Y_ADD,Y_ADD>::value );
// Because we're writing to adjacent pixels, we optimize this by
// writing two 8-bit values at once in both cases.
*(short*)dest_y0 = yy.Extract88_from_1616lo();
*(short*)dest_y1 = yy.Extract88_from_1616hi();
c64_MMX u_total32 = _mm_madd_pi16(rgb_u.value, ctotal.value);
c64_MMX v_total32 = _mm_madd_pi16(rgb_v.value, ctotal.value);
*dest_u = U_ADD + ((u_total32.Extract32<0>() + u_total32.Extract32<1>()) >> (RGB2YUV_SHIFT+2));
*dest_v = V_ADD + ((v_total32.Extract32<0>() + v_total32.Extract32<1>()) >> (RGB2YUV_SHIFT+2));
}
static inline void Convert_YUY2_MMX_Common
(c64_MMX p0_1, c64_MMX p2_3,
unsigned char* dest_yvyu)
{
c64_MMX p0 = c64_MMX(zero64).unpacklbw(p0_1); // expand to 64-bit (4*16)
c64_MMX p1 = c64_MMX(zero64).unpackhbw(p0_1);
c64_MMX p2 = c64_MMX(zero64).unpacklbw(p2_3); // expand to 64-bit (4*16)
c64_MMX p3 = c64_MMX(zero64).unpackhbw(p2_3);
c64_MMX ry_gy_by; ry_gy_by.Init16(RY,GY,BY, 0);
c64_MMX rgb_u; rgb_u.Init16(RU,GU,BU, 0);
c64_MMX rgb_v; rgb_v.Init16(RV,GV,BV, 0);
c64_MMX ctotal0 = p0.add16(p1);
c64_MMX ctotal2 = p2.add16(p3);
p0 = _mm_madd_pi16(ry_gy_by.value, p0.value);
p1 = _mm_madd_pi16(ry_gy_by.value, p1.value);
p2 = _mm_madd_pi16(ry_gy_by.value, p2.value);
p3 = _mm_madd_pi16(ry_gy_by.value, p3.value);
c64_MMX yy;
yy.Init16( ((p0.Extract32<0>() + p0.Extract32<1>()) >> (RGB2YUV_SHIFT)),
((p1.Extract32<0>() + p1.Extract32<1>()) >> (RGB2YUV_SHIFT)),
((p2.Extract32<0>() + p2.Extract32<1>()) >> (RGB2YUV_SHIFT)),
((p3.Extract32<0>() + p3.Extract32<1>()) >> (RGB2YUV_SHIFT)) );
yy = yy.add16( Bits16const<Y_ADD,Y_ADD>::value );
c64_MMX u_total32_0 = _mm_madd_pi16(rgb_u.value, ctotal0.value);
c64_MMX v_total32_0 = _mm_madd_pi16(rgb_v.value, ctotal0.value);
c64_MMX u_total32_2 = _mm_madd_pi16(rgb_u.value, ctotal2.value);
c64_MMX v_total32_2 = _mm_madd_pi16(rgb_v.value, ctotal2.value);
c64_MMX quadword = yy; // four y values: at 0, 2, 4 and 6
c64_MMX uv; uv.Init16(
((v_total32_0.Extract32<0>() + v_total32_0.Extract32<1>()) >> (RGB2YUV_SHIFT+1)),
((u_total32_0.Extract32<0>() + u_total32_0.Extract32<1>()) >> (RGB2YUV_SHIFT+1)),
((v_total32_2.Extract32<0>() + v_total32_2.Extract32<1>()) >> (RGB2YUV_SHIFT+1)),
((u_total32_2.Extract32<0>() + u_total32_2.Extract32<1>()) >> (RGB2YUV_SHIFT+1)) );
c64_MMX uv_adds; uv_adds.Init16(V_ADD, U_ADD, V_ADD, U_ADD);
uv = uv.add16(uv_adds);
quadword |= uv << 8; // two u and v values: at 1, 3, 5 and 7.
quadword.Put(dest_yvyu); // write four y values: at 0, 2, 4 and 6
}
#endif
/*template<int PixStride>
void Convert_4byte_To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
__attribute__((noinline));*/
template<int PixStride>
void Convert_4byte_To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
const unsigned char* src = (const unsigned char*) data;
unsigned height = npixels / width;
unsigned pos = 0;
unsigned ypos = 0;
unsigned vpos = npixels;
unsigned upos = vpos + npixels / 4;
unsigned stride = width*PixStride;
/* This function is based on code from x264 svn version 711 */
/* TODO: Apply MMX optimization for 24-bit pixels */
for(unsigned y=0; y<height; y += 2)
{
for(unsigned x=0; x<width; x += 2)
{
#ifdef __MMX__
if(PixStride == 4)
{
c64_MMX p0_1; p0_1.Get(&src[pos]); // two 32-bit pixels (4*8)
c64_MMX p2_3; p2_3.Get(&src[pos+stride]); // two 32-bit pixels
pos += PixStride*2;
Convert_I420_MMX_Common(p0_1, p2_3,
dest+ypos,
dest+ypos+width,
dest+upos++,
dest+vpos++);
}
else
#endif
{
int c[3], rgb[3][4];
/* luma */
for(int n=0; n<3; ++n) c[n] = rgb[n][0] = src[pos + n];
for(int n=0; n<3; ++n) c[n] += rgb[n][1] = src[pos + n + stride];
pos += PixStride;
for(int n=0; n<3; ++n) c[n] += rgb[n][2] = src[pos + n];
for(int n=0; n<3; ++n) c[n] += rgb[n][3] = src[pos + n + stride];
pos += PixStride;
unsigned destpos[4] = { ypos, ypos+width, ypos+1, ypos+width+1 };
for(int n=0; n<4; ++n)
{
dest[destpos[n]]
= Y_ADD + ((RY * rgb[0][n]
+ GY * rgb[1][n]
+ BY * rgb[2][n]
) >> RGB2YUV_SHIFT); // y
}
dest[upos++] = (U_ADD + ((RU * c[0] + GU * c[1] + BU * c[2]) >> (RGB2YUV_SHIFT+2)) );
dest[vpos++] = (V_ADD + ((RV * c[0] + GV * c[1] + BV * c[2]) >> (RGB2YUV_SHIFT+2)) );
}
ypos += 2;
}
pos += stride;
ypos += width;
}
#ifdef __MMX__
MMX_clear();
#endif
}
template<int PixStride>
void Convert_4byte_To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
const unsigned char* src = (const unsigned char*) data;
unsigned height = npixels / width;
unsigned pos = 0;
unsigned ypos = 0;
unsigned stride = width*PixStride;
/* This function is based on code from x264 svn version 711 */
/* TODO: Apply MMX optimization for 24-bit pixels */
for(unsigned y=0; y<height; ++y)
{
for(unsigned x=0; x<width; x += 2)
{
#ifdef __MMX__
if(PixStride == 4)
{
c64_MMX p0_1; p0_1.Get(&src[pos]); // two 32-bit pixels (4*8)
pos += PixStride*2;
c64_MMX p2_3; p2_3.Get(&src[pos]); // two 32-bit pixels (4*8)
pos += PixStride*2;
x += 2;
Convert_YUY2_MMX_Common(p0_1, p2_3,
dest+ypos);
ypos += 4;
}
else
#endif
{
int c[3], rgb[3][2];
/* luma */
for(int n=0; n<3; ++n) c[n] = rgb[n][0] = src[pos + n];
pos += PixStride;
for(int n=0; n<3; ++n) c[n] += rgb[n][1] = src[pos + n];
pos += PixStride;
for(int n=0; n<2; ++n)
{
dest[ypos + n*2]
= Y_ADD + ((RY * rgb[0][n]
+ GY * rgb[1][n]
+ BY * rgb[2][n]
) >> RGB2YUV_SHIFT); // y
}
dest[ypos+3] = (U_ADD + ((RU * c[0] + GU * c[1] + BU * c[2]) >> (RGB2YUV_SHIFT+1)) );
dest[ypos+1] = (V_ADD + ((RV * c[0] + GV * c[1] + BV * c[2]) >> (RGB2YUV_SHIFT+1)) );
}
ypos += 4;
}
}
#ifdef __MMX__
MMX_clear();
#endif
}
/*template<int roffs,int rbits, int goffs,int gbits, int boffs,int bbits>
void Convert_2byte_To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
__attribute__((noinline));*/
template<int roffs,int rbits, int goffs,int gbits, int boffs,int bbits>
void Convert_2byte_To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
const unsigned PixStride = 2;
const unsigned char* src = (const unsigned char*) data;
unsigned height = npixels / width;
unsigned pos = 0;
unsigned ypos = 0;
unsigned vpos = npixels;
unsigned upos = vpos + npixels / 4;
unsigned stride = width*PixStride;
/* This function is based on code from x264 svn version 711 */
for(unsigned y=0; y<height; y += 2)
{
for(unsigned x=0; x<width; x += 8)
{
unsigned char Rgb2byteBuf[2][8][4];
/* Convert 8 pixels from two scanlines (16 in total)
* from RGB15 / RGB16 to RGB32
* (Not RGB32, because RGB32 conversion is faster)
*/
Convert_2byte_to_24or32Common
<roffs,rbits, goffs,gbits, boffs,bbits, false>
(src+pos, Rgb2byteBuf[0][0]);
Convert_2byte_to_24or32Common
<roffs,rbits, goffs,gbits, boffs,bbits, false>
(src+pos+stride, Rgb2byteBuf[1][0]);
pos += 16;
for(int x8 = 0; x8 < 8; x8 += 2)
{
#ifdef __MMX__
c64_MMX p0_1; p0_1.Get(&Rgb2byteBuf[0][x8][0]); // two 32-bit pixels (4*8)
c64_MMX p2_3; p2_3.Get(&Rgb2byteBuf[1][x8][0]); // two 32-bit pixels
Convert_I420_MMX_Common(p0_1, p2_3,
dest+ypos,
dest+ypos+width,
dest+upos++,
dest+vpos++);
#else
int c[3];
/* TODO: Some faster means than using pointers */
unsigned char* rgb[4] =
{
Rgb2byteBuf[0][x8+0],
Rgb2byteBuf[0][x8+1],
Rgb2byteBuf[1][x8+0],
Rgb2byteBuf[1][x8+1]
};
for(int m=0; m<3; ++m) c[m] = 0;
for(int n=0; n<4; ++n)
for(int m=0; m<3; ++m)
c[m] += rgb[n][m];
unsigned destpos[4] = { ypos, ypos+1, ypos+width, ypos+width+1 };
for(int n=0; n<4; ++n)
{
dest[destpos[n]]
= Y_ADD + ((RY * rgb[n][0]
+ GY * rgb[n][1]
+ BY * rgb[n][2]
) >> RGB2YUV_SHIFT); // y
}
/*c[0] /= 4; c[1] /= 4; c[2] /= 4;*/
// Note: +2 is because c[] contains 4 values
dest[upos++] = U_ADD + ((RU * c[0] + GU * c[1] + BU * c[2]) >> (RGB2YUV_SHIFT+2));
dest[vpos++] = V_ADD + ((RV * c[0] + GV * c[1] + BV * c[2]) >> (RGB2YUV_SHIFT+2));
#endif
ypos += 2;
}
}
pos += stride;
ypos += width;
}
#ifdef __MMX__
MMX_clear();
#endif
}
template<int roffs,int rbits, int goffs,int gbits, int boffs,int bbits>
void Convert_2byte_To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
const unsigned PixStride = 2;
const unsigned char* src = (const unsigned char*) data;
unsigned height = npixels / width;
unsigned pos = 0;
unsigned ypos = 0;
unsigned stride = width*PixStride;
for(unsigned y=0; y<height; ++y)
{
for(unsigned x=0; x<width; x += 8)
{
unsigned char Rgb2byteBuf[8][4];
/* Convert 8 pixels from a scanline
* from RGB15 / RGB16 to RGB32
* (Not RGB32, because RGB32 conversion is faster)
*/
Convert_2byte_to_24or32Common
<roffs,rbits, goffs,gbits, boffs,bbits, false>
(src+pos, Rgb2byteBuf[0]);
pos += 16;
for(int x8 = 0; x8 < 8; )
{
#ifdef __MMX__
c64_MMX p0_1; p0_1.Get(&Rgb2byteBuf[x8 ][0]); // two 32-bit pixels (4*8)
c64_MMX p2_3; p2_3.Get(&Rgb2byteBuf[x8+2][0]); // two 32-bit pixels (4*8)
Convert_YUY2_MMX_Common(p0_1, p2_3, dest+ypos);
x8 += 4;
ypos += 8;
#else
int c[3];
/* TODO: Some faster means than using pointers */
unsigned char* rgb[2] =
{
Rgb2byteBuf[x8+0],
Rgb2byteBuf[x8+1],
};
for(int m=0; m<3; ++m) c[m] = 0;
for(int n=0; n<2; ++n)
for(int m=0; m<3; ++m)
c[m] += rgb[n][m];
for(int n=0; n<2; ++n)
{
dest[ypos + n*2]
= Y_ADD + ((RY * rgb[n][0]
+ GY * rgb[n][1]
+ BY * rgb[n][2]
) >> RGB2YUV_SHIFT); // y
}
/*c[0] /= 4; c[1] /= 4; c[2] /= 4;*/
// Note: +2 is because c[] contains 4 values
dest[ypos+3] = U_ADD + ((RU * c[0] + GU * c[1] + BU * c[2]) >> (RGB2YUV_SHIFT+1));
dest[ypos+1] = V_ADD + ((RV * c[0] + GV * c[1] + BV * c[2]) >> (RGB2YUV_SHIFT+1));
x8 += 2;
ypos += 4;
#endif
}
}
}
#ifdef __MMX__
MMX_clear();
#endif
}
/***/
void Convert_I420To24Frame(const void* data, unsigned char* dest,
unsigned npixels, unsigned width, bool swap_red_blue)
{
const unsigned char* src = (const unsigned char*) data;
unsigned height = npixels / width;
unsigned pos = 0;
unsigned ypos = 0;
unsigned vpos = npixels;
unsigned upos = vpos + npixels / 4;
#ifdef __MMX__
c64_MMX rgb[4], yy[4];
#endif
/*
Y input: 16..235
U input: 16..240
V input: 16..240
*/
#pragma omp parallel for
for(unsigned y=0; y<height; y += 2)
{
for(unsigned x=0; x<width; )
{
#ifdef __MMX__
/* Load 4 U and V values and subtract U_ADD and V_ADD from them. */
uint64_t tmp_u = *(uint32_t*)&src[upos];
uint64_t tmp_v = *(uint32_t*)&src[vpos];
c64_MMX uuq = c64_MMX(zero64)
.unpacklbw(tmp_u) // 8-bit to 16-bit
.sub16(Bits16const<U_ADD,U_ADD>::value)
.shl16(16 - YUV2RGB_SHIFT); // shift them so that *13bitconst results in upper 16 bits having the actual value
c64_MMX vvq = c64_MMX(zero64)
.unpacklbw(tmp_v)
.sub16(Bits16const<V_ADD,V_ADD>::value)
.shl16(16 - YUV2RGB_SHIFT); // shift them so that *13bitconst results in upper 16 bits having the actual value
const short* uu = (const short*)&uuq;
const short* vv = (const short*)&vvq;
#if 1
c64_MMX vmul; vmul.Init16(VR, VG, 0, 0); // R,G,B,0 * vmul = V
c64_MMX umul; umul.Init16(0, UG, UB, 0); // R,G,B,0 * umul = U
#else
// pmaddw does: A,B,C,D and E,F,G,H, A*E + B*F, C*G + D*H
// we do: R= VR*v + 0*u, G= VG*v + UG*u
// B= 0*v + UB*u, 0*0 + 0*0
c64_MMX vumul1; vumul1.Init16(VR, 0, VG, UG);
c64_MMX vumul2; vumul2.Init16(0, UB, 0, 0);
#endif
/* c64_MMX rgb[4]; // four sets of 4*int16, each representing 1 rgb value */
for(int n=0; n<4; ++n)
{
#if 1
/* vv is shifted by 3 bits, vmul is shifted by 13 bits
* 16 bits in total, so mul16hi gets the 16-bit downscaled part */
c64_MMX v; v.Init16(vv[n]);
c64_MMX u; u.Init16(uu[n]);
rgb[n] = v.mul16hi(vmul).add16(
u.mul16hi(umul) );
#else
c64_MMX vuvu; vuvu.Init16(vv[n], uu[n], vv[n], uu[n]);
c64_MMX madd1 = _mm_madd_pi16(vumul1.value, vuvu.value);
c64_MMX madd2 = _mm_madd_pi16(vumul2.value, vuvu.value);
rgb[n] = madd1.sar32(YUV2RGB_SHIFT)
.conv_s32_s16(
madd2.sar32(YUV2RGB_SHIFT));
#endif
}
/* rgb[0] : U,V increment of RGB32 for x0,y0 - x1,y1
* rgb[1] : U,V increment of RGB32 for x2,y0 - x3,y1
* rgb[2] : U,V increment of RGB32 for x4,y0 - x5,y1
* rgb[3] : U,V increment of RGB32 for x6,y0 - x7,y1
*/
unsigned yyoffs[4] = { ypos, ypos+1, ypos+width, ypos+width+1 };
/* c64_MMX yy[4]; // four sets of 4*int16, each representing four Y values */
for(int n=0; n<4; ++n)
{
c64_MMX luma; luma.Init16(
src[yyoffs[0]+n*2],
src[yyoffs[1]+n*2],
src[yyoffs[2]+n*2],
src[yyoffs[3]+n*2]
);
luma = luma.sub16(Bits16const<Y_ADD,Y_ADD>::value);
luma = luma.shl16(16 - YUV2RGB_SHIFT);
yy[n] = luma.mul16hi(Bits16const<Y_REV,Y_REV>::value);
}
const short* const yyval = (const short*) &yy[0].value;
/*
values in order:
x0y0 x1y0 x0y1 x1y1
x2y0 x3y0 x2y1 x3y1
x4y0 x5y0 x4y1 x5y1
x6y0 x7y0 x6y1 x7y1
*/
int tmppos = pos;
for(int ny = 0; ny < 4; ny += 2)
{
/* Note: We must use 16-bit pixels here instead of 8-bit,
* because the rgb+Y addition can overflow. conv_s16_u8()
* does the necessary clamping, which would not be done
* if the values were 8-bit.
*/
// 8 pixels for one scanline, repeated twice
/* Note: C++ has no named constructors, so we
* use statement blocks here as substitutes.
*/
c64_MMX r0
= rgb[0].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+0]); tmp; }) )
.conv_s16_u8(
rgb[0].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+1]); tmp; }) ));
c64_MMX r1
= rgb[1].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+4]); tmp; }) )
.conv_s16_u8(
rgb[1].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+5]); tmp; }) ));
c64_MMX r2
= rgb[2].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+8]); tmp; }) )
.conv_s16_u8(
rgb[2].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+9]); tmp; }) ));
c64_MMX r3
= rgb[3].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+12]); tmp; }) )
.conv_s16_u8(
rgb[3].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+13]); tmp; }) ));
Convert32To24_32bytes(r0,r1,r2,r3, &dest[tmppos]);
tmppos += width*3; // next line
}
upos += 4;
vpos += 4;
ypos += 8; // eight bytes for this line (and eight from next too)
pos += 8*3; // eight triplets generated on this line
x += 8; // eight yy values used on this line
#else /* non-MMX */
int u = src[upos] - U_ADD;
int v = src[vpos] - V_ADD;
int rgb[3] =
{
(VR * v ) >> (YUV2RGB_SHIFT),
(VG * v + UG * u) >> (YUV2RGB_SHIFT),
( + UB * u) >> (YUV2RGB_SHIFT)
};
unsigned incr[4] = {0,1,width,width+1};
for(unsigned r=0; r<4; ++r)
for(unsigned doffs=pos + incr[r]*3, yoffs=ypos + incr[r],
yy = (Y_REV * (src[yoffs] - Y_ADD)) >> YUV2RGB_SHIFT,
n=0; n<3; ++n)
dest[doffs+n] = c64::clamp_u8(rgb[n] + (int)yy);
upos += 1;
vpos += 1;
ypos += 2; // two bytes for this line (two from next line)
pos += 2*3; // two triplets generated on this line
x += 2; // two yy values used on this line
#endif
}
ypos += width;
pos += 3*width;
}
#ifdef __MMX__
MMX_clear();
#endif
}
void Convert_YUY2To24Frame(const void* data, unsigned char* dest,
unsigned npixels, unsigned width, bool swap_red_blue)
{
const unsigned char* src = (const unsigned char*) data;
unsigned height = npixels / width;
unsigned pos = 0;
unsigned ypos = 0;
/* TODO: MMX optimization */
/*
Y input: 16..235
U input: 16..240
V input: 16..240
*/
#pragma omp parallel for
for(unsigned y=0; y<height; ++y)
{
for(unsigned x=0; x<width; x += 2)
{
/* non-MMX */
int u = src[ypos+1] - U_ADD;
int v = src[ypos+3] - V_ADD;
int rgb[3] =
{
(VR * v ) >> (YUV2RGB_SHIFT),
(VG * v + UG * u) >> (YUV2RGB_SHIFT),
( + UB * u) >> (YUV2RGB_SHIFT)
};
for(unsigned r=0; r<2; ++r)
for(unsigned doffs=pos + r*3, yoffs=ypos+r*2,
yy = (Y_REV * (src[yoffs] - Y_ADD)) >> YUV2RGB_SHIFT,
n=0; n<3; ++n)
dest[doffs+n] = c64::clamp_u8(rgb[n] + (int)yy);
ypos += 4; // four bytes for this line (y,u,y,v)
pos += 2*3; // two triplets generated on this line
x += 2; // two yy values used on this line
}
}
}
/***/
void Convert24To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
Convert_4byte_To_I420Frame<3>(data,dest,npixels,width);
}
void Convert32To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
Convert_4byte_To_I420Frame<4>(data,dest,npixels,width);
}
void Convert15To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
Convert_2byte_To_I420Frame<10,5, 5,5, 0,5>(data,dest,npixels,width);
}
void Convert16To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
Convert_2byte_To_I420Frame<11,5, 5,6, 0,5>(data,dest,npixels,width);
}
/***/
void Convert24To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
Convert_4byte_To_YUY2Frame<3>(data,dest,npixels,width);
}
void Convert32To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
Convert_4byte_To_YUY2Frame<4>(data,dest,npixels,width);
}
void Convert15To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
Convert_2byte_To_YUY2Frame<10,5, 5,5, 0,5>(data,dest,npixels,width);
}
void Convert16To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width)
{
Convert_2byte_To_YUY2Frame<11,5, 5,6, 0,5>(data,dest,npixels,width);
}