1 | ///////////////////////////////////////////////////////////////////////////////
|
---|
2 | //
|
---|
3 | /// \file lzma_encoder_optimum_fast.c
|
---|
4 | //
|
---|
5 | // Author: Igor Pavlov
|
---|
6 | //
|
---|
7 | // This file has been put into the public domain.
|
---|
8 | // You can do whatever you want with this file.
|
---|
9 | //
|
---|
10 | ///////////////////////////////////////////////////////////////////////////////
|
---|
11 |
|
---|
12 | #include "lzma_encoder_private.h"
|
---|
13 | #include "memcmplen.h"
|
---|
14 |
|
---|
15 |
|
---|
16 | #define change_pair(small_dist, big_dist) \
|
---|
17 | (((big_dist) >> 7) > (small_dist))
|
---|
18 |
|
---|
19 |
|
---|
20 | extern void
|
---|
21 | lzma_lzma_optimum_fast(lzma_lzma1_encoder *restrict coder,
|
---|
22 | lzma_mf *restrict mf,
|
---|
23 | uint32_t *restrict back_res, uint32_t *restrict len_res)
|
---|
24 | {
|
---|
25 | const uint32_t nice_len = mf->nice_len;
|
---|
26 |
|
---|
27 | uint32_t len_main;
|
---|
28 | uint32_t matches_count;
|
---|
29 | if (mf->read_ahead == 0) {
|
---|
30 | len_main = mf_find(mf, &matches_count, coder->matches);
|
---|
31 | } else {
|
---|
32 | assert(mf->read_ahead == 1);
|
---|
33 | len_main = coder->longest_match_length;
|
---|
34 | matches_count = coder->matches_count;
|
---|
35 | }
|
---|
36 |
|
---|
37 | const uint8_t *buf = mf_ptr(mf) - 1;
|
---|
38 | const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
|
---|
39 |
|
---|
40 | if (buf_avail < 2) {
|
---|
41 | // There's not enough input left to encode a match.
|
---|
42 | *back_res = UINT32_MAX;
|
---|
43 | *len_res = 1;
|
---|
44 | return;
|
---|
45 | }
|
---|
46 |
|
---|
47 | // Look for repeated matches; scan the previous four match distances
|
---|
48 | uint32_t rep_len = 0;
|
---|
49 | uint32_t rep_index = 0;
|
---|
50 |
|
---|
51 | for (uint32_t i = 0; i < REPS; ++i) {
|
---|
52 | // Pointer to the beginning of the match candidate
|
---|
53 | const uint8_t *const buf_back = buf - coder->reps[i] - 1;
|
---|
54 |
|
---|
55 | // If the first two bytes (2 == MATCH_LEN_MIN) do not match,
|
---|
56 | // this rep is not useful.
|
---|
57 | if (not_equal_16(buf, buf_back))
|
---|
58 | continue;
|
---|
59 |
|
---|
60 | // The first two bytes matched.
|
---|
61 | // Calculate the length of the match.
|
---|
62 | const uint32_t len = lzma_memcmplen(
|
---|
63 | buf, buf_back, 2, buf_avail);
|
---|
64 |
|
---|
65 | // If we have found a repeated match that is at least
|
---|
66 | // nice_len long, return it immediately.
|
---|
67 | if (len >= nice_len) {
|
---|
68 | *back_res = i;
|
---|
69 | *len_res = len;
|
---|
70 | mf_skip(mf, len - 1);
|
---|
71 | return;
|
---|
72 | }
|
---|
73 |
|
---|
74 | if (len > rep_len) {
|
---|
75 | rep_index = i;
|
---|
76 | rep_len = len;
|
---|
77 | }
|
---|
78 | }
|
---|
79 |
|
---|
80 | // We didn't find a long enough repeated match. Encode it as a normal
|
---|
81 | // match if the match length is at least nice_len.
|
---|
82 | if (len_main >= nice_len) {
|
---|
83 | *back_res = coder->matches[matches_count - 1].dist + REPS;
|
---|
84 | *len_res = len_main;
|
---|
85 | mf_skip(mf, len_main - 1);
|
---|
86 | return;
|
---|
87 | }
|
---|
88 |
|
---|
89 | uint32_t back_main = 0;
|
---|
90 | if (len_main >= 2) {
|
---|
91 | back_main = coder->matches[matches_count - 1].dist;
|
---|
92 |
|
---|
93 | while (matches_count > 1 && len_main ==
|
---|
94 | coder->matches[matches_count - 2].len + 1) {
|
---|
95 | if (!change_pair(coder->matches[
|
---|
96 | matches_count - 2].dist,
|
---|
97 | back_main))
|
---|
98 | break;
|
---|
99 |
|
---|
100 | --matches_count;
|
---|
101 | len_main = coder->matches[matches_count - 1].len;
|
---|
102 | back_main = coder->matches[matches_count - 1].dist;
|
---|
103 | }
|
---|
104 |
|
---|
105 | if (len_main == 2 && back_main >= 0x80)
|
---|
106 | len_main = 1;
|
---|
107 | }
|
---|
108 |
|
---|
109 | if (rep_len >= 2) {
|
---|
110 | if (rep_len + 1 >= len_main
|
---|
111 | || (rep_len + 2 >= len_main
|
---|
112 | && back_main > (UINT32_C(1) << 9))
|
---|
113 | || (rep_len + 3 >= len_main
|
---|
114 | && back_main > (UINT32_C(1) << 15))) {
|
---|
115 | *back_res = rep_index;
|
---|
116 | *len_res = rep_len;
|
---|
117 | mf_skip(mf, rep_len - 1);
|
---|
118 | return;
|
---|
119 | }
|
---|
120 | }
|
---|
121 |
|
---|
122 | if (len_main < 2 || buf_avail <= 2) {
|
---|
123 | *back_res = UINT32_MAX;
|
---|
124 | *len_res = 1;
|
---|
125 | return;
|
---|
126 | }
|
---|
127 |
|
---|
128 | // Get the matches for the next byte. If we find a better match,
|
---|
129 | // the current byte is encoded as a literal.
|
---|
130 | coder->longest_match_length = mf_find(mf,
|
---|
131 | &coder->matches_count, coder->matches);
|
---|
132 |
|
---|
133 | if (coder->longest_match_length >= 2) {
|
---|
134 | const uint32_t new_dist = coder->matches[
|
---|
135 | coder->matches_count - 1].dist;
|
---|
136 |
|
---|
137 | if ((coder->longest_match_length >= len_main
|
---|
138 | && new_dist < back_main)
|
---|
139 | || (coder->longest_match_length == len_main + 1
|
---|
140 | && !change_pair(back_main, new_dist))
|
---|
141 | || (coder->longest_match_length > len_main + 1)
|
---|
142 | || (coder->longest_match_length + 1 >= len_main
|
---|
143 | && len_main >= 3
|
---|
144 | && change_pair(new_dist, back_main))) {
|
---|
145 | *back_res = UINT32_MAX;
|
---|
146 | *len_res = 1;
|
---|
147 | return;
|
---|
148 | }
|
---|
149 | }
|
---|
150 |
|
---|
151 | // In contrast to LZMA SDK, dictionary could not have been moved
|
---|
152 | // between mf_find() calls, thus it is safe to just increment
|
---|
153 | // the old buf pointer instead of recalculating it with mf_ptr().
|
---|
154 | ++buf;
|
---|
155 |
|
---|
156 | const uint32_t limit = my_max(2, len_main - 1);
|
---|
157 |
|
---|
158 | for (uint32_t i = 0; i < REPS; ++i) {
|
---|
159 | if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) {
|
---|
160 | *back_res = UINT32_MAX;
|
---|
161 | *len_res = 1;
|
---|
162 | return;
|
---|
163 | }
|
---|
164 | }
|
---|
165 |
|
---|
166 | *back_res = back_main + REPS;
|
---|
167 | *len_res = len_main;
|
---|
168 | mf_skip(mf, len_main - 2);
|
---|
169 | return;
|
---|
170 | }
|
---|