1 <?php
2
3 /*
4 * This file is part of the ICanBoogie package.
5 *
6 * (c) Olivier Laviale <olivier.laviale@gmail.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12 namespace ICanBoogie\HTTP\Headers;
13
14 use ICanBoogie\Accessor\AccessorTrait;
15
16 /**
17 * Representation of a header parameter.
18 *
19 * @property-read string $attribute The attribute of the parameter.
20 * @property-read string $charset The charset of the parameter's value.
21 *
22 * @see http://tools.ietf.org/html/rfc2231
23 * @see http://tools.ietf.org/html/rfc5987
24 * @see http://greenbytes.de/tech/tc2231/#attwithfn2231utf8
25 */
26 class HeaderParameter
27 {
28 use AccessorTrait;
29
30 /**
31 * Token of the parameter.
32 *
33 * @var string
34 */
35 protected $attribute;
36
37 protected function get_attribute()
38 {
39 return $this->attribute;
40 }
41
42 /**
43 * Value of the parameter.
44 *
45 * @var string
46 */
47 public $value;
48
49 protected function get_charset()
50 {
51 return mb_detect_encoding($this->value) ?: 'ISO-8859-1';
52 }
53
54 /**
55 * Language of the value.
56 *
57 * @var string
58 */
59 public $language;
60
61 /**
62 * Creates a {@link HeaderParameter} instance from the provided source.
63 *
64 * @param mixed $source
65 *
66 * @return HeaderParameter
67 */
68 static public function from($source)
69 {
70 if ($source instanceof self)
71 {
72 return $source;
73 }
74
75 $equal_pos = strpos($source, '=');
76 $language = null;
77
78 if ($source[$equal_pos - 1] === '*')
79 {
80 $attribute = substr($source, 0, $equal_pos - 1);
81 $value = substr($source, $equal_pos + 1);
82
83 preg_match('#^([a-zA-Z0-9\-]+)?(\'([a-z\-]+)?\')?(")?([^"]+)(")?$#', $value, $matches);
84
85 if ($matches[3])
86 {
87 $language = $matches[3];
88 }
89
90 $value = urldecode($matches[5]);
91
92 if ($matches[1] === 'iso-8859-1')
93 {
94 $value = utf8_encode($value);
95 }
96 }
97 else
98 {
99 $attribute = substr($source, 0, $equal_pos);
100 $value = substr($source, $equal_pos + 1);
101
102 if ($value[0] === '"')
103 {
104 $value = substr($value, 1, -1);
105 }
106 }
107
108 $value = mb_convert_encoding($value, 'UTF-8');
109
110 return new static($attribute, $value, $language);
111 }
112
113 /**
114 * Checks if the provided string is a token.
115 *
116 * <pre>
117 * token = 1*<any CHAR except CTLs or separators>
118 * separators = "(" | ")" | "<" | ">" | "@"
119 * | "," | ";" | ":" | "\" | <">
120 * | "/" | "[" | "]" | "?" | "="
121 * | "{" | "}" | SP | HT
122 * CHAR = <any US-ASCII character (octets 0 - 127)>
123 * CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
124 * SP = <US-ASCII SP, space (32)>
125 * HT = <US-ASCII HT, horizontal-tab (9)>
126 *</pre>
127 *
128 * @param string $str
129 *
130 * @return boolean `true` if the provided string is a token, `false` otherwise.
131 */
132 static public function is_token($str)
133 {
134 // \x21 = CHAR except 0 - 31 (\x1f) and SP (\x20)
135 // \x7e = CHAR except DEL
136
137 return !preg_match('#[^\x21-\x7e]#', $str) && !preg_match('#[\(\)\<\>\@\,\;\:\\\\"\/\[\]\?\=\{\}\x9]#', $str);
138 }
139
140 /**
141 * Converts a string to the ASCI charset.
142 *
143 * Accents are converted using {@link \ICanBoogie\remove_accents()}. Characters that are not
144 * in the ASCII range are discarted.
145 *
146 * @param string $str The string to convert.
147 *
148 * @return string
149 */
150 static public function to_ascii($str)
151 {
152 $str = \ICanBoogie\remove_accents($str);
153 $str = preg_replace('/[^\x20-\x7F]+/', '', $str);
154
155 return $str;
156 }
157
158 /**
159 * Initializes the {@link $attribute}, {@link $value} and {@link $language} properties.
160 *
161 * @param string $attribute
162 * @param string $value
163 * @param string|null $language
164 */
165 public function __construct($attribute, $value=null, $language=null)
166 {
167 $this->attribute = $attribute;
168 $this->value = $value;
169 $this->language = $language;
170 }
171
172 /**
173 * Renders the attribute and value into a string.
174 *
175 * <pre>
176 * A string of text is parsed as a single word if it is quoted using
177 * double-quote marks.
178 *
179 * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
180 * qdtext = <any TEXT except <">>
181 *
182 * The backslash character ("\") MAY be used as a single-character
183 * quoting mechanism only within quoted-string and comment constructs.
184 *
185 * quoted-pair = "\" CHAR
186 * </pre>
187 *
188 * @return string
189 */
190 public function render()
191 {
192 $value = $this->value;
193
194 if (!$value)
195 {
196 return '';
197 }
198
199 $attribute = $this->attribute;
200
201 #
202 # token
203 #
204
205 if (self::is_token($value))
206 {
207 return "{$attribute}={$value}";
208 }
209
210 #
211 # quoted string
212 #
213
214 $encoding = mb_detect_encoding($value);
215
216 if (($encoding === 'ASCII' || $encoding === 'ISO-8859-1') && strpos($value, '"') === false)
217 {
218 return "{$attribute}=\"{$value}\"";
219 }
220
221 #
222 # escaped, with fallback
223 #
224 # @see http://greenbytes.de/tech/tc2231/#encoding-2231-fb
225 #
226
227 if ($encoding !== 'UTF-8')
228 {
229 $value = mb_convert_encoding($value, 'UTF-8', $encoding);
230 $encoding = mb_detect_encoding($value);
231 }
232
233 $normalized_value = self::to_ascii($value);
234 $normalized_value = str_replace([ '"', ';' ], '', $normalized_value);
235
236 return "{$attribute}=\"{$normalized_value}\"; {$attribute}*=" . $encoding . "'{$this->language}'" . rawurlencode($value);
237 }
238
239 /**
240 * Returns the value of the parameter.
241 *
242 * Note: {@link render()} to render the attribute and value of the parameter.
243 *
244 * @return string
245 */
246 public function __toString()
247 {
248 return (string) $this->value;
249 }
250 }
251