diff --git a/firmware/baseband/dsp_decimate.cpp b/firmware/baseband/dsp_decimate.cpp index bb06d33c0..eef3f857a 100644 --- a/firmware/baseband/dsp_decimate.cpp +++ b/firmware/baseband/dsp_decimate.cpp @@ -385,76 +385,6 @@ buffer_s16_t DecimateBy2CIC4Real::execute( return { dst.p, src.count / 2, src.sampling_rate / 2 }; } -#if 0 -buffer_c16_t DecimateBy2HBF5Complex::execute( - buffer_c16_t const src, - buffer_c16_t const dst -) { - auto src_p = src.p; - auto dst_p = dst.p; - int32_t n = src.count; - for(; n>0; n-=2) { - /* TODO: Probably a lot of room to optimize... */ - z[0] = z[2]; - //z[1] = z[3]; - z[2] = z[4]; - //z[3] = z[5]; - z[4] = z[6]; - z[5] = z[7]; - z[6] = z[8]; - z[7] = z[9]; - z[8] = z[10]; - z[9] = *(src_p++); - z[10] = *(src_p++); - int32_t t_real { z[5].real * 256 }; - int32_t t_imag { z[5].imag * 256 }; - t_real += (z[ 0].real + z[10].real) * 3; - t_imag += (z[ 0].imag + z[10].imag) * 3; - t_real -= (z[ 2].real + z[ 8].real) * 25; - t_imag -= (z[ 2].imag + z[ 8].imag) * 25; - t_real += (z[ 4].real + z[ 6].real) * 150; - t_imag += (z[ 4].imag + z[ 6].imag) * 150; - *(dst_p++) = { t_real / 256, t_imag / 256 }; - } - - return { dst.p, src.count / 2, src.sampling_rate / 2 }; -} - -buffer_c16_t DecimateBy2HBF7Complex::execute( - buffer_c16_t const src, - buffer_c16_t const dst -) { - auto src_p = src.p; - auto dst_p = dst.p; - int32_t n = src.count; - for(; n>0; n-=2) { - /* TODO: Probably a lot of room to optimize... */ - z[0] = z[2]; - //z[1] = z[3]; - z[2] = z[4]; - //z[3] = z[5]; - z[4] = z[6]; - z[5] = z[7]; - z[6] = z[8]; - z[7] = z[9]; - z[8] = z[10]; - z[9] = *(src_p++); - z[10] = *(src_p++); - - int32_t t_real { z[5].real * 512 }; - int32_t t_imag { z[5].imag * 512 }; - t_real += (z[ 0].real + z[10].real) * 7; - t_imag += (z[ 0].imag + z[10].imag) * 7; - t_real -= (z[ 2].real + z[ 8].real) * 53; - t_imag -= (z[ 2].imag + z[ 8].imag) * 53; - t_real += (z[ 4].real + z[ 6].real) * 302; - t_imag += (z[ 4].imag + z[ 6].imag) * 302; - *(dst_p++) = { t_real / 512, t_imag / 512 }; - } - - return { dst.p, src.count / 2, src.sampling_rate / 2 }; -} -#endif } /* namespace decimate */ } /* namespace dsp */ diff --git a/firmware/baseband/dsp_decimate.hpp b/firmware/baseband/dsp_decimate.hpp index f57c5e2fe..4b392cada 100644 --- a/firmware/baseband/dsp_decimate.hpp +++ b/firmware/baseband/dsp_decimate.hpp @@ -142,112 +142,7 @@ public: private: int16_t z[5]; }; -#if 0 -class DecimateBy2HBF5Complex { -public: - buffer_c16_t execute( - buffer_c16_t const src, - buffer_c16_t const dst - ); -private: - complex16_t z[11]; -}; - -class DecimateBy2HBF7Complex { -public: - buffer_c16_t execute( - buffer_c16_t const src, - buffer_c16_t const dst - ); - -private: - complex16_t z[11]; -}; -#endif -/* From http://www.dspguru.com/book/export/html/3 - -Here are several basic techniques to fake circular buffers: - -Split the calculation: You can split any FIR calculation into its "pre-wrap" -and "post-wrap" parts. By splitting the calculation into these two parts, you -essentially can do the circular logic only once, rather than once per tap. -(See fir_double_z in FirAlgs.c above.) - -Duplicate the delay line: For a FIR with N taps, use a delay line of size 2N. -Copy each sample to its proper location, as well as at location-plus-N. -Therefore, the FIR calculation's MAC loop can be done on a flat buffer of N -points, starting anywhere within the first set of N points. The second set of -N delayed samples provides the "wrap around" comparable to a true circular -buffer. (See fir_double_z in FirAlgs.c above.) - -Duplicate the coefficients: This is similar to the above, except that the -duplication occurs in terms of the coefficients, not the delay line. -Compared to the previous method, this has a calculation advantage of not -having to store each incoming sample twice, and it also has a memory -advantage when the same coefficient set will be used on multiple delay lines. -(See fir_double_h in FirAlgs.c above.) - -Use block processing: In block processing, you use a delay line which is a -multiple of the number of taps. You therefore only have to move the data -once per block to implement the delay-line mechanism. When the block size -becomes "large", the overhead of a moving the delay line once per block -becomes negligible. -*/ - -#if 0 -template -class FIRAndDecimateBy2Complex { -public: - FIR64AndDecimateBy2Complex( - const std::array& taps - ) : taps { taps } - { - } - - buffer_c16_t execute( - buffer_c16_t const src, - buffer_c16_t const dst - ) { - /* int16_t input (sample count "n" must be multiple of 4) - * -> int16_t output, decimated by 2. - * taps are normalized to 1 << 16 == 1.0. - */ - - return { dst.p, src.count / 2 }; - } - -private: - std::array z; - const std::array& taps; - - complex process_one(const size_t start_offset) { - const auto split = &z[start_offset]; - const auto end = &z[z.size()]; - auto tap = &taps[0]; - - complex t { 0, 0 }; - - auto p = split; - while(p < end) { - const auto t = *(tap++); - const auto c = *(p++); - t.real += c.real * t; - t.imag += c.imag * t; - } - - p = &z[0]; - while(p < split) { - const auto t = *(tap++); - const auto c = *(p++); - t.real += c.real * t; - t.imag += c.imag * t; - } - - return { t.real / 65536, t.imag / 65536 }; - } -}; -#endif } /* namespace decimate */ } /* namespace dsp */